| { | |
| "exp_name": "facodec", | |
| "model_type": "FAcodec", | |
| "log_dir": "./runs/", | |
| "log_interval": 10, | |
| "save_interval": 1000, | |
| "device": "cuda", | |
| "epochs": 1000, | |
| "batch_size": 4, | |
| "batch_length": 100, | |
| "max_len": 80, | |
| "pretrained_model": "", | |
| "load_only_params": false, | |
| "F0_path": "modules/JDC/bst.t7", | |
| "dataset": "/path/to/dataset", | |
| "preprocess_params": { | |
| "sr": 24000, | |
| "frame_rate": 80, | |
| "duration_range": [1.0, 25.0], | |
| "spect_params": { | |
| "n_fft": 2048, | |
| "win_length": 1200, | |
| "hop_length": 300, | |
| "n_mels": 80 | |
| } | |
| }, | |
| "train": { | |
| "gradient_accumulation_step": 1, | |
| "batch_size": 1, | |
| "save_checkpoint_stride": [ | |
| 20 | |
| ], | |
| "random_seed": 1234, | |
| "max_epoch": -1, | |
| "max_frame_len": 80, | |
| "tracker": [ | |
| "tensorboard" | |
| ], | |
| "run_eval": [ | |
| false | |
| ], | |
| "sampler": { | |
| "holistic_shuffle": true, | |
| "drop_last": true | |
| }, | |
| "dataloader": { | |
| "num_worker": 0, | |
| "pin_memory": true | |
| } | |
| }, | |
| "model_params": { | |
| "causal": true, | |
| "lstm": 2, | |
| "norm_f0": true, | |
| "use_gr_content_f0": false, | |
| "use_gr_prosody_phone": false, | |
| "use_gr_timbre_prosody": false, | |
| "separate_prosody_encoder": true, | |
| "n_c_codebooks": 2, | |
| "timbre_norm": true, | |
| "use_gr_content_global_f0": true, | |
| "DAC": { | |
| "encoder_dim": 64, | |
| "encoder_rates": [2, 5, 5, 6], | |
| "decoder_dim": 1536, | |
| "decoder_rates": [6, 5, 5, 2], | |
| "sr": 24000 | |
| } | |
| }, | |
| "loss_params": { | |
| "base_lr": 0.0001, | |
| "warmup_steps": 200, | |
| "discriminator_iter_start": 2000, | |
| "lambda_spk": 1.0, | |
| "lambda_mel": 45, | |
| "lambda_f0": 1.0, | |
| "lambda_uv": 1.0 | |
| } | |
| } |