use_gpu: True device: 0 seed: 12345 outdir: exp/ federate: mode: standalone method: fedavg total_round_num: 100 client_num: 18 data: type: hetero_nlp_tasks root: datasets/ hetero_data_name: ['imdb', 'agnews', 'squad', 'newsqa', 'cnndm', 'msqg'] num_of_client_for_data: [1, 3, 3, 2, 5, 4] num_workers: 0 cache_dir: cache/ hetero_synth_batch_size: 32 hetero_synth_prim_weight: 0.5 hetero_synth_feat_dim: 128 num_contrast: 20000 model: type: atc_model model_type: google/bert_uncased_L-2_H-128_A-2 stage: contrast use_contrastive_loss: True contrast_topk: 20 contrast_temp: 1.0 aggregator: num_agg_topk: [16, 16, 16, 16, 8, 8] inside_weight: 1.0 outside_weight: 0.0 personalization: local_param: ['classifier', 'encoder.pooler', 'decoder', 'contrast_head'] trainer: type: atc_trainer train: batch_or_epoch: batch optimizer: type: AdamW lr: 5e-4 weight_decay: 0.01 scheduler: type: warmup_step warmup_ratio: 0.1 grad: grad_clip: 1.0 eval: split: ['test'] report: ['group_avg'] freq: 100000000 # eval freq across rounds