Spaces:
Running
on
Zero
Running
on
Zero
| model_name: EzAudio-XL | |
| model: | |
| mae: True | |
| mae_prob: 0.25 | |
| mask_ratio: [0.25, 1.0] | |
| mask_span: 10 | |
| img_size: 500 | |
| patch_size: 1 | |
| in_chans: 257 | |
| out_chans: 128 | |
| input_type: '1d' | |
| embed_dim: 1152 | |
| depth: 28 | |
| num_heads: 16 | |
| mlp_ratio: 4.0 | |
| qkv_bias: false | |
| qk_scale: null | |
| qk_norm: layernorm | |
| norm_layer: layernorm | |
| act_layer: geglu | |
| context_norm: true | |
| use_checkpoint: true | |
| time_fusion: 'ada_lora_bias' | |
| ada_lora_rank: 36 | |
| ada_lora_alpha: 36 | |
| cls_dim: null | |
| context_dim: 2048 | |
| context_fusion: 'cross' | |
| context_max_length: null | |
| context_pe_method: 'none' | |
| pe_method: 'none' | |
| rope_mode: 'shared' | |
| use_conv: true | |
| skip: true | |
| skip_norm: true | |
| autoencoder: | |
| name: stable_vae | |
| dim: 128 | |
| sr: 24000 | |
| latent_sr: 50 | |
| q_first: true | |
| scale: 1.0 | |
| shift: 0.0 | |
| text_encoder: | |
| model: google/flan-t5-xl | |
| max_length: 100 | |
| cfg: 0.1 | |
| diff: | |
| num_train_timesteps: 1000 | |
| beta_schedule: 'scaled_linear' | |
| beta_start: 0.00085 | |
| beta_end: 0.012 | |
| prediction_type: 'v_prediction' | |
| rescale_betas_zero_snr: true | |
| timestep_spacing: 'trailing' | |
| clip_sample: false | |