basic: dataset: "PEMSD4" mode: "train" device: "cuda:0" model: "STAEFormer" data: num_nodes: 307 lag: 12 horizon: 12 val_ratio: 0.1 test_ratio: 0.2 tod: False normalizer: std column_wise: False default_graph: True add_time_in_day: True add_day_in_week: True steps_per_day: 288 days_per_week: 7 model: num_nodes: 307 in_steps: 12 out_steps: 12 steps_per_day: 288 input_dim: 1 output_dim: 1 input_embedding_dim: 24 tod_embedding_dim: 24 dow_embedding_dim: 24 spatial_embedding_dim: 0 adaptive_embedding_dim: 80 feed_forward_dim: 256 num_heads: 4 num_layers: 3 dropout: 0.1 use_mixed_proj: true train: loss_func: Huber seed: 10 batch_size: 16 epochs: 200 lr_init: 0.001 weight_decay: 0.0003 lr_decay: True lr_decay_rate: 0.1 lr_decay_step: "5,20,40,70" early_stop: True early_stop_patience: 30 grad_norm: False real_value: True test: mae_thresh: null mape_thresh: 0.0 log: log_step: 2000 plot: False