basic: dataset: AirQuality device: cuda:0 mode: train model: STAEFormer seed: 2023 data: batch_size: 16 column_wise: false days_per_week: 7 horizon: 24 input_dim: 6 lag: 24 normalizer: std num_nodes: 35 steps_per_day: 288 test_ratio: 0.2 val_ratio: 0.2 model: adaptive_embedding_dim: 80 dow_embedding_dim: 24 dropout: 0.1 feed_forward_dim: 256 in_steps: 24 input_dim: 6 input_embedding_dim: 24 num_heads: 4 num_layers: 3 num_nodes: 35 out_steps: 24 output_dim: 6 spatial_embedding_dim: 0 steps_per_day: 24 tod_embedding_dim: 24 use_mixed_proj: true train: batch_size: 16 debug: false early_stop: true early_stop_patience: 15 epochs: 100 grad_norm: false log_step: 20000 loss_func: mae lr_decay: false lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 6 plot: false real_value: true weight_decay: 0