basic: dataset: PEMSD4 device: cuda:0 mode: train model: STAEFormer seed: 2023 data: batch_size: 64 column_wise: false days_per_week: 7 horizon: 12 input_dim: 1 lag: 12 normalizer: std num_nodes: 307 steps_per_day: 288 test_ratio: 0.2 val_ratio: 0.1 model: adaptive_embedding_dim: 80 dow_embedding_dim: 24 dropout: 0.1 feed_forward_dim: 256 in_steps: 12 input_dim: 1 input_embedding_dim: 24 num_heads: 4 num_layers: 3 num_nodes: 307 out_steps: 12 output_dim: 1 spatial_embedding_dim: 0 steps_per_day: 288 tod_embedding_dim: 24 use_mixed_proj: true train: batch_size: 64 debug: false early_stop: true early_stop_patience: 30 epochs: 200 grad_norm: false log_step: 2000 loss_func: Huber lr_decay: true lr_decay_rate: 0.1 lr_decay_step: 5,20,40,70 lr_init: 0.001 mae_thresh: mape_thresh: 0.0 output_dim: 1 plot: false real_value: true seed: 10 weight_decay: 0.0003