diff --git a/.vscode/launch.json b/.vscode/launch.json index 6193b75..fb16dc0 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -6,233 +6,12 @@ "configurations": [ { - "name": "DDGCRN: METR-LA", + "name": "train", "type": "debugpy", "request": "launch", - "program": "run.py", + "program": "train.py", "console": "integratedTerminal", - "args": "--config ./config/DDGCRN/METR-LA.yaml" - }, - // STID 模型组 - { - "name": "STID: PEMS-BAY", - "type": "debugpy", - "request": "launch", - "program": "run.py", - "console": "integratedTerminal", - "args": "--config ./config/STID/PEMS-BAY.yaml" - }, - { - "name": "STID: METR-LA", - "type": "debugpy", - "request": "launch", - "program": "run.py", - "console": "integratedTerminal", - "args": "--config ./config/STID/METR-LA.yaml" - }, - { - "name": "STID: PEMSD4", - "type": "debugpy", - "request": "launch", - "program": "run.py", - "console": "integratedTerminal", - "args": "--config ./config/STID/PEMSD4.yaml" - }, - { - "name": "STID: BJTaxi-InFlow", - "type": "debugpy", - "request": "launch", - "program": "run.py", - "console": "integratedTerminal", - "args": "--config ./config/STID/BJTaxi_Inflow.yaml" - }, - { - "name": "STID: BJTaxi-OutFlow", - "type": "debugpy", - "request": "launch", - "program": "run.py", - "console": "integratedTerminal", - "args": "--config ./config/STID/BJTaxi_Outflow.yaml" - }, - { - "name": "STID: NYCBike-InFlow", - "type": "debugpy", - "request": "launch", - "program": "run.py", - "console": "integratedTerminal", - "args": "--config ./config/STID/NYCBike_Inflow.yaml" - }, - { - "name": "STID: NYCBike-OutFlow", - "type": "debugpy", - "request": "launch", - "program": "run.py", - "console": "integratedTerminal", - "args": "--config ./config/STID/NYCBike_Outflow.yaml" - }, - { - "name": "STID: SolarEnergy", - "type": "debugpy", - "request": "launch", - "program": "run.py", - "console": "integratedTerminal", - "args": "--config ./config/STID/SolarEnergy.yaml" - }, - - // REPST 模型组 - { - "name": "REPST: PEMSD8", - "type": "debugpy", - "request": "launch", - "program": "run.py", - "console": "integratedTerminal", - "args": "--config ./config/REPST/PEMSD8.yaml" - }, - { - "name": "REPST: BJTaxi-InFlow", - "type": "debugpy", - "request": "launch", - "program": "run.py", - "console": "integratedTerminal", - "args": "--config ./config/REPST/BJTaxi-Inflow.yaml" - }, - { - "name": "REPST: NYCBike-outflow", - "type": "debugpy", - "request": "launch", - "program": "run.py", - "console": "integratedTerminal", - "args": "--config ./config/REPST/NYCBike-outflow.yaml" - }, - { - "name": "REPST: NYCBike-inflow", - "type": "debugpy", - "request": "launch", - "program": "run.py", - "console": "integratedTerminal", - "args": "--config ./config/REPST/NYCBike-inflow.yaml" - }, - { - "name": "REPST: PEMS-BAY", - "type": "debugpy", - "request": "launch", - "program": "run.py", - "console": "integratedTerminal", - "args": "--config ./config/REPST/PEMS-BAY.yaml" - }, - { - "name": "REPST: METR-LA", - "type": "debugpy", - "request": "launch", - "program": "run.py", - "console": "integratedTerminal", - "args": "--config ./config/REPST/METR-LA.yaml" - }, - { - "name": "REPST: SolarEnergy", - "type": "debugpy", - "request": "launch", - "program": "run.py", - "console": "integratedTerminal", - "args": "--config ./config/REPST/SolarEnergy.yaml" - }, - { - "name": "REPST: BeijingAirQuality", - "type": "debugpy", - "request": "launch", - "program": "run.py", - "console": "integratedTerminal", - "args": "--config ./config/REPST/BeijingAirQuality.yaml" - }, - { - "name": "REPST: AirQuality", - "type": "debugpy", - "request": "launch", - "program": "run.py", - "console": "integratedTerminal", - "args": "--config ./config/REPST/AirQuality.yaml" - }, - - // AEPSA 模型组 - { - "name": "AEPSA: PEMS-BAY", - "type": "debugpy", - "request": "launch", - "program": "run.py", - "console": "integratedTerminal", - "args": "--config ./config/AEPSA/PEMS-BAY.yaml" - }, - { - "name": "AEPSA: METR-LA", - "type": "debugpy", - "request": "launch", - "program": "run.py", - "console": "integratedTerminal", - "args": "--config ./config/AEPSA/METR-LA.yaml" - }, - { - "name": "AEPSA: AirQuality", - "type": "debugpy", - "request": "launch", - "program": "run.py", - "console": "integratedTerminal", - "args": "--config ./config/AEPSA/AirQuality.yaml" - }, - { - "name": "AEPSA: BJTaxi-Inflow", - "type": "debugpy", - "request": "launch", - "program": "run.py", - "console": "integratedTerminal", - "args": "--config ./config/AEPSA/BJTaxi-Inflow.yaml" - }, - { - "name": "AEPSA: BJTaxi-outflow", - "type": "debugpy", - "request": "launch", - "program": "run.py", - "console": "integratedTerminal", - "args": "--config ./config/AEPSA/BJTaxi-outflow.yaml" - }, - { - "name": "AEPSA: NYCBike-inflow", - "type": "debugpy", - "request": "launch", - "program": "run.py", - "console": "integratedTerminal", - "args": "--config ./config/AEPSA/NYCBike-inflow.yaml" - }, - { - "name": "AEPSA: NYCBike-outflow", - "type": "debugpy", - "request": "launch", - "program": "run.py", - "console": "integratedTerminal", - "args": "--config ./config/AEPSA/NYCBike-outflow.yaml" - }, - { - "name": "AEPSA: SolarEnergy", - "type": "debugpy", - "request": "launch", - "program": "run.py", - "console": "integratedTerminal", - "args": "--config ./config/AEPSA/SolarEnergy.yaml" - }, - { - "name": "AEPSA_v2: METR-LA", - "type": "debugpy", - "request": "launch", - "program": "run.py", - "console": "integratedTerminal", - "args": "--config ./config/AEPSA/v2_METR-LA.yaml" - }, - { - "name": "AEPSA_v2: SolarEnergy", - "type": "debugpy", - "request": "launch", - "program": "run.py", - "console": "integratedTerminal", - "args": "--config ./config/AEPSA/v2_SolarEnergy.yaml" - }, + "justMyCode": false + } ] } \ No newline at end of file diff --git a/config/AGCRN/AirQuality.yaml b/config/AGCRN/AirQuality.yaml index b1b904b..e400582 100644 --- a/config/AGCRN/AirQuality.yaml +++ b/config/AGCRN/AirQuality.yaml @@ -13,7 +13,7 @@ data: input_dim: 6 lag: 24 normalizer: std - num_nodes: 35 + num_nodes: 12 steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 @@ -24,6 +24,7 @@ model: embed_dim: 10 input_dim: 6 num_layers: 2 + num_nodes: 12 output_dim: 6 rnn_units: 64 @@ -38,13 +39,16 @@ train: loss_func: mae lr_decay: false lr_decay_rate: 0.3 - lr_decay_step: [5, 20, 40, 70] + lr_decay_step: + - 5 + - 20 + - 40 + - 70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 6 plot: false real_value: true - seed: 10 - weight_decay: 0 \ No newline at end of file + weight_decay: 0 diff --git a/config/AGCRN/BJTaxi-InFlow.yaml b/config/AGCRN/BJTaxi-InFlow.yaml index 5206e36..b01c8bf 100644 --- a/config/AGCRN/BJTaxi-InFlow.yaml +++ b/config/AGCRN/BJTaxi-InFlow.yaml @@ -24,6 +24,7 @@ model: embed_dim: 10 input_dim: 1 num_layers: 2 + num_nodes: 1024 output_dim: 1 rnn_units: 64 @@ -38,13 +39,16 @@ train: loss_func: mae lr_decay: false lr_decay_rate: 0.3 - lr_decay_step: [5, 20, 40, 70] + lr_decay_step: + - 5 + - 20 + - 40 + - 70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 - weight_decay: 0 \ No newline at end of file + weight_decay: 0 diff --git a/config/AGCRN/BJTaxi-OutFlow.yaml b/config/AGCRN/BJTaxi-OutFlow.yaml index 7b5dc61..c7b687a 100644 --- a/config/AGCRN/BJTaxi-OutFlow.yaml +++ b/config/AGCRN/BJTaxi-OutFlow.yaml @@ -24,6 +24,7 @@ model: embed_dim: 10 input_dim: 1 num_layers: 2 + num_nodes: 1024 output_dim: 1 rnn_units: 64 @@ -38,13 +39,16 @@ train: loss_func: mae lr_decay: false lr_decay_rate: 0.3 - lr_decay_step: [5, 20, 40, 70] + lr_decay_step: + - 5 + - 20 + - 40 + - 70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 - weight_decay: 0 \ No newline at end of file + weight_decay: 0 diff --git a/config/AGCRN/METR-LA.yaml b/config/AGCRN/METR-LA.yaml index b24e57e..20eb587 100644 --- a/config/AGCRN/METR-LA.yaml +++ b/config/AGCRN/METR-LA.yaml @@ -24,6 +24,7 @@ model: embed_dim: 10 input_dim: 1 num_layers: 2 + num_nodes: 207 output_dim: 1 rnn_units: 64 @@ -38,13 +39,16 @@ train: loss_func: mae lr_decay: false lr_decay_rate: 0.3 - lr_decay_step: [5, 20, 40, 70] + lr_decay_step: + - 5 + - 20 + - 40 + - 70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 - weight_decay: 0 \ No newline at end of file + weight_decay: 0 diff --git a/config/AGCRN/NYCBike-InFlow.yaml b/config/AGCRN/NYCBike-InFlow.yaml index c1abc45..d33cab3 100644 --- a/config/AGCRN/NYCBike-InFlow.yaml +++ b/config/AGCRN/NYCBike-InFlow.yaml @@ -24,6 +24,7 @@ model: embed_dim: 10 input_dim: 1 num_layers: 2 + num_nodes: 128 output_dim: 1 rnn_units: 64 @@ -38,13 +39,16 @@ train: loss_func: mae lr_decay: false lr_decay_rate: 0.3 - lr_decay_step: [5, 20, 40, 70] + lr_decay_step: + - 5 + - 20 + - 40 + - 70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 - weight_decay: 0 \ No newline at end of file + weight_decay: 0 diff --git a/config/AGCRN/NYCBike-OutFlow.yaml b/config/AGCRN/NYCBike-OutFlow.yaml index 9a5a846..1e1044f 100644 --- a/config/AGCRN/NYCBike-OutFlow.yaml +++ b/config/AGCRN/NYCBike-OutFlow.yaml @@ -24,6 +24,7 @@ model: embed_dim: 10 input_dim: 1 num_layers: 2 + num_nodes: 128 output_dim: 1 rnn_units: 64 @@ -38,13 +39,16 @@ train: loss_func: mae lr_decay: false lr_decay_rate: 0.3 - lr_decay_step: [5, 20, 40, 70] + lr_decay_step: + - 5 + - 20 + - 40 + - 70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 - weight_decay: 0 \ No newline at end of file + weight_decay: 0 diff --git a/config/AGCRN/PEMSD3.yaml b/config/AGCRN/PEMSD3.yaml index aa68819..53621fd 100755 --- a/config/AGCRN/PEMSD3.yaml +++ b/config/AGCRN/PEMSD3.yaml @@ -24,6 +24,7 @@ model: embed_dim: 10 input_dim: 1 num_layers: 2 + num_nodes: 358 output_dim: 1 rnn_units: 64 @@ -38,13 +39,16 @@ train: loss_func: mae lr_decay: false lr_decay_rate: 0.3 - lr_decay_step: [5, 20, 40, 70] + lr_decay_step: + - 5 + - 20 + - 40 + - 70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/AGCRN/PEMSD4.yaml b/config/AGCRN/PEMSD4.yaml index 317b767..3aa586c 100755 --- a/config/AGCRN/PEMSD4.yaml +++ b/config/AGCRN/PEMSD4.yaml @@ -24,6 +24,7 @@ model: embed_dim: 10 input_dim: 1 num_layers: 2 + num_nodes: 307 output_dim: 1 rnn_units: 64 @@ -38,13 +39,16 @@ train: loss_func: mae lr_decay: false lr_decay_rate: 0.3 - lr_decay_step: [5, 20, 40, 70] + lr_decay_step: + - 5 + - 20 + - 40 + - 70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/AGCRN/PEMSD7.yaml b/config/AGCRN/PEMSD7.yaml index 0c0d96f..5859001 100755 --- a/config/AGCRN/PEMSD7.yaml +++ b/config/AGCRN/PEMSD7.yaml @@ -24,6 +24,7 @@ model: embed_dim: 10 input_dim: 1 num_layers: 2 + num_nodes: 883 output_dim: 1 rnn_units: 64 @@ -38,13 +39,16 @@ train: loss_func: mae lr_decay: false lr_decay_rate: 0.3 - lr_decay_step: [5, 20, 40, 70] + lr_decay_step: + - 5 + - 20 + - 40 + - 70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/AGCRN/PEMSD8.yaml b/config/AGCRN/PEMSD8.yaml index 7725af7..93a4250 100755 --- a/config/AGCRN/PEMSD8.yaml +++ b/config/AGCRN/PEMSD8.yaml @@ -24,6 +24,7 @@ model: embed_dim: 2 input_dim: 1 num_layers: 2 + num_nodes: 170 output_dim: 1 rnn_units: 64 @@ -38,13 +39,16 @@ train: loss_func: mae lr_decay: false lr_decay_rate: 0.3 - lr_decay_step: [5, 20, 40, 70] + lr_decay_step: + - 5 + - 20 + - 40 + - 70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 12 weight_decay: 0 diff --git a/config/AGCRN/SolarEnergy.yaml b/config/AGCRN/SolarEnergy.yaml index 094aec9..c6a666a 100644 --- a/config/AGCRN/SolarEnergy.yaml +++ b/config/AGCRN/SolarEnergy.yaml @@ -46,11 +46,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.001 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 - weight_decay: 0 \ No newline at end of file + weight_decay: 0 diff --git a/config/ARIMA/AirQuality.yaml b/config/ARIMA/AirQuality.yaml index cc2885e..45ce496 100644 --- a/config/ARIMA/AirQuality.yaml +++ b/config/ARIMA/AirQuality.yaml @@ -4,6 +4,7 @@ basic: mode: train model: ARIMA seed: 2023 + data: batch_size: 16 column_wise: false @@ -12,17 +13,20 @@ data: input_dim: 6 lag: 24 normalizer: std - num_nodes: 35 + num_nodes: 12 steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 + model: d: 1 drift: true input_dim: 6 + num_nodes: 12 output_dim: 6 p: 2 q: 1 + train: batch_size: 16 debug: false @@ -36,11 +40,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 6 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/ARIMA/BJTaxi-InFlow.yaml b/config/ARIMA/BJTaxi-InFlow.yaml index 0be9d12..85254a9 100644 --- a/config/ARIMA/BJTaxi-InFlow.yaml +++ b/config/ARIMA/BJTaxi-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: ARIMA seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,13 +17,16 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: d: 1 drift: true input_dim: 1 + num_nodes: 1024 output_dim: 1 p: 2 q: 1 + train: batch_size: 32 debug: false @@ -36,11 +40,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/ARIMA/BJTaxi-OutFlow.yaml b/config/ARIMA/BJTaxi-OutFlow.yaml index 14f9578..3c1d233 100644 --- a/config/ARIMA/BJTaxi-OutFlow.yaml +++ b/config/ARIMA/BJTaxi-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: ARIMA seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,13 +17,16 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: d: 1 drift: true input_dim: 1 + num_nodes: 1024 output_dim: 1 p: 2 q: 1 + train: batch_size: 32 debug: false @@ -36,11 +40,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/ARIMA/Hainan.yaml b/config/ARIMA/Hainan.yaml index 90cfda2..ed1a12a 100755 --- a/config/ARIMA/Hainan.yaml +++ b/config/ARIMA/Hainan.yaml @@ -13,13 +13,14 @@ data: input_dim: 1 lag: 12 normalizer: std - num_nodes: 13 + num_nodes: 200 steps_per_day: 288 test_ratio: 0.2 val_ratio: 0.2 model: input_dim: 1 + num_nodes: 200 output_dim: 1 train: @@ -39,7 +40,7 @@ train: - 40 - 70 lr_init: 0.003 - mae_thresh: + mae_thresh: null mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 diff --git a/config/ARIMA/METR-LA.yaml b/config/ARIMA/METR-LA.yaml index 2b8598a..084e20e 100644 --- a/config/ARIMA/METR-LA.yaml +++ b/config/ARIMA/METR-LA.yaml @@ -4,6 +4,7 @@ basic: mode: train model: ARIMA seed: 2023 + data: batch_size: 16 column_wise: false @@ -16,13 +17,16 @@ data: steps_per_day: 288 test_ratio: 0.2 val_ratio: 0.2 + model: d: 1 drift: true input_dim: 1 + num_nodes: 207 output_dim: 1 p: 2 q: 1 + train: batch_size: 16 debug: false @@ -36,11 +40,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/ARIMA/NYCBike-InFlow.yaml b/config/ARIMA/NYCBike-InFlow.yaml index 127493b..0da5634 100644 --- a/config/ARIMA/NYCBike-InFlow.yaml +++ b/config/ARIMA/NYCBike-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: ARIMA seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,17 +13,20 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: d: 1 drift: true input_dim: 1 + num_nodes: 128 output_dim: 1 p: 2 q: 1 + train: batch_size: 32 debug: false @@ -36,11 +40,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/ARIMA/NYCBike-OutFlow.yaml b/config/ARIMA/NYCBike-OutFlow.yaml index a1e3819..ddb85a2 100644 --- a/config/ARIMA/NYCBike-OutFlow.yaml +++ b/config/ARIMA/NYCBike-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: ARIMA seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,17 +13,20 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: d: 1 drift: true input_dim: 1 + num_nodes: 128 output_dim: 1 p: 2 q: 1 + train: batch_size: 32 debug: false @@ -36,11 +40,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/ARIMA/PEMSD3.yaml b/config/ARIMA/PEMSD3.yaml index 37bae7a..27b8605 100755 --- a/config/ARIMA/PEMSD3.yaml +++ b/config/ARIMA/PEMSD3.yaml @@ -22,6 +22,7 @@ model: d: 1 drift: true input_dim: 1 + num_nodes: 358 output_dim: 1 p: 2 q: 1 @@ -39,11 +40,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/ARIMA/PEMSD4.yaml b/config/ARIMA/PEMSD4.yaml index 714973b..b4f0439 100755 --- a/config/ARIMA/PEMSD4.yaml +++ b/config/ARIMA/PEMSD4.yaml @@ -22,6 +22,7 @@ model: d: 1 drift: true input_dim: 1 + num_nodes: 307 output_dim: 1 p: 2 q: 1 @@ -37,13 +38,16 @@ train: loss_func: mae lr_decay: false lr_decay_rate: 0.3 - lr_decay_step: [5, 20, 40, 70] + lr_decay_step: + - 5 + - 20 + - 40 + - 70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/ARIMA/PEMSD7(L).yaml b/config/ARIMA/PEMSD7(L).yaml index 3eedd0d..25653c3 100755 --- a/config/ARIMA/PEMSD7(L).yaml +++ b/config/ARIMA/PEMSD7(L).yaml @@ -51,5 +51,4 @@ train: output_dim: 1 plot: true real_value: true - seed: 12 weight_decay: 0 diff --git a/config/ARIMA/PEMSD7(M).yaml b/config/ARIMA/PEMSD7(M).yaml index 5992f68..24ef88e 100755 --- a/config/ARIMA/PEMSD7(M).yaml +++ b/config/ARIMA/PEMSD7(M).yaml @@ -51,5 +51,4 @@ train: output_dim: 1 plot: true real_value: true - seed: 12 weight_decay: 0 diff --git a/config/ARIMA/PEMSD7.yaml b/config/ARIMA/PEMSD7.yaml index 6c027c6..f5a5255 100755 --- a/config/ARIMA/PEMSD7.yaml +++ b/config/ARIMA/PEMSD7.yaml @@ -22,6 +22,7 @@ model: d: 1 drift: true input_dim: 1 + num_nodes: 883 output_dim: 1 p: 2 q: 1 @@ -49,5 +50,4 @@ train: output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/ARIMA/PEMSD8.yaml b/config/ARIMA/PEMSD8.yaml index cbe3ed3..16e0339 100755 --- a/config/ARIMA/PEMSD8.yaml +++ b/config/ARIMA/PEMSD8.yaml @@ -22,6 +22,7 @@ model: d: 1 drift: true input_dim: 1 + num_nodes: 170 output_dim: 1 p: 2 q: 1 @@ -37,7 +38,11 @@ train: loss_func: mae lr_decay: false lr_decay_rate: 0.3 - lr_decay_step: [5, 20, 40, 70] + lr_decay_step: + - 5 + - 20 + - 40 + - 70 lr_init: 0.003 mae_thresh: None mape_thresh: 0.001 @@ -45,5 +50,4 @@ train: output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/ARIMA/SolarEnergy.yaml b/config/ARIMA/SolarEnergy.yaml index f1aeb63..9e921f0 100644 --- a/config/ARIMA/SolarEnergy.yaml +++ b/config/ARIMA/SolarEnergy.yaml @@ -4,6 +4,7 @@ basic: mode: train model: ARIMA seed: 2023 + data: batch_size: 16 column_wise: false @@ -16,13 +17,16 @@ data: steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 + model: d: 1 drift: true input_dim: 137 + num_nodes: 137 output_dim: 137 p: 2 q: 1 + train: batch_size: 16 debug: false @@ -36,11 +40,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 137 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/AEPSA/AirQuality.yaml b/config/ASTRA/AirQuality.yaml similarity index 96% rename from config/AEPSA/AirQuality.yaml rename to config/ASTRA/AirQuality.yaml index d6061d9..7d4868e 100644 --- a/config/AEPSA/AirQuality.yaml +++ b/config/ASTRA/AirQuality.yaml @@ -2,7 +2,7 @@ basic: dataset: AirQuality device: cuda:0 mode: train - model: AEPSA + model: ASTRA seed: 2023 data: @@ -32,6 +32,7 @@ model: seq_len: 24 stride: 7 word_num: 1000 + output_dim: 6 train: batch_size: 16 diff --git a/config/AEPSA/BJTaxi-Inflow.yaml b/config/ASTRA/BJTaxi-InFlow.yaml similarity index 96% rename from config/AEPSA/BJTaxi-Inflow.yaml rename to config/ASTRA/BJTaxi-InFlow.yaml index a453b38..8569919 100644 --- a/config/AEPSA/BJTaxi-Inflow.yaml +++ b/config/ASTRA/BJTaxi-InFlow.yaml @@ -2,7 +2,7 @@ basic: dataset: BJTaxi-InFlow device: cuda:0 mode: train - model: AEPSA + model: ASTRA seed: 2023 data: @@ -32,6 +32,7 @@ model: seq_len: 24 stride: 7 word_num: 1000 + output_dim: 1 train: batch_size: 32 diff --git a/config/AEPSA/BJTaxi-outflow.yaml b/config/ASTRA/BJTaxi-OutFlow.yaml similarity index 95% rename from config/AEPSA/BJTaxi-outflow.yaml rename to config/ASTRA/BJTaxi-OutFlow.yaml index 9fa0f5f..d8f0e5d 100644 --- a/config/AEPSA/BJTaxi-outflow.yaml +++ b/config/ASTRA/BJTaxi-OutFlow.yaml @@ -2,7 +2,7 @@ basic: dataset: BJTaxi-OutFlow device: cuda:0 mode: train - model: AEPSA + model: ASTRA seed: 2023 data: @@ -17,7 +17,8 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 - + output_dim: 1 + model: d_ff: 128 d_model: 64 diff --git a/config/AEPSA/METR-LA.yaml b/config/ASTRA/METR-LA.yaml similarity index 96% rename from config/AEPSA/METR-LA.yaml rename to config/ASTRA/METR-LA.yaml index a623226..3ae73ec 100644 --- a/config/AEPSA/METR-LA.yaml +++ b/config/ASTRA/METR-LA.yaml @@ -2,7 +2,7 @@ basic: dataset: METR-LA device: cuda:0 mode: train - model: AEPSA + model: ASTRA seed: 2023 data: @@ -32,6 +32,7 @@ model: seq_len: 24 stride: 7 word_num: 1000 + output_dim: 1 train: batch_size: 16 diff --git a/config/AEPSA/NYCBike-inflow.yaml b/config/ASTRA/NYCBike-InFlow.yaml similarity index 93% rename from config/AEPSA/NYCBike-inflow.yaml rename to config/ASTRA/NYCBike-InFlow.yaml index e4ba138..0099f8f 100644 --- a/config/AEPSA/NYCBike-inflow.yaml +++ b/config/ASTRA/NYCBike-InFlow.yaml @@ -2,7 +2,7 @@ basic: dataset: NYCBike-InFlow device: cuda:0 mode: train - model: AEPSA + model: ASTRA seed: 2023 data: @@ -14,7 +14,7 @@ data: lag: 24 normalizer: std num_nodes: 128 - steps_per_day: 24 + steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 @@ -32,7 +32,8 @@ model: seq_len: 24 stride: 7 word_num: 1000 - + output_dim: 1 + train: batch_size: 32 debug: false diff --git a/config/AEPSA/NYCBike-outflow.yaml b/config/ASTRA/NYCBike-OutFlow.yaml similarity index 93% rename from config/AEPSA/NYCBike-outflow.yaml rename to config/ASTRA/NYCBike-OutFlow.yaml index 7cb6798..f46cece 100644 --- a/config/AEPSA/NYCBike-outflow.yaml +++ b/config/ASTRA/NYCBike-OutFlow.yaml @@ -2,7 +2,7 @@ basic: dataset: NYCBike-OutFlow device: cuda:0 mode: train - model: AEPSA + model: ASTRA seed: 2023 data: @@ -14,7 +14,7 @@ data: lag: 24 normalizer: std num_nodes: 128 - steps_per_day: 24 + steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 @@ -32,6 +32,7 @@ model: seq_len: 24 stride: 7 word_num: 1000 + output_dim: 1 train: batch_size: 32 diff --git a/config/AEPSA/PEMS-BAY.yaml b/config/ASTRA/PEMS-BAY.yaml similarity index 96% rename from config/AEPSA/PEMS-BAY.yaml rename to config/ASTRA/PEMS-BAY.yaml index f75c63a..2b2384d 100755 --- a/config/AEPSA/PEMS-BAY.yaml +++ b/config/ASTRA/PEMS-BAY.yaml @@ -2,7 +2,7 @@ basic: dataset: PEMS-BAY device: cuda:0 mode: train - model: AEPSA + model: ASTRA seed: 2023 data: @@ -32,6 +32,7 @@ model: seq_len: 24 stride: 7 word_num: 1000 + output_dim: 1 train: batch_size: 16 diff --git a/config/AEPSA/SolarEnergy.yaml b/config/ASTRA/SolarEnergy.yaml similarity index 96% rename from config/AEPSA/SolarEnergy.yaml rename to config/ASTRA/SolarEnergy.yaml index 669c9f4..dd64d64 100644 --- a/config/AEPSA/SolarEnergy.yaml +++ b/config/ASTRA/SolarEnergy.yaml @@ -2,7 +2,7 @@ basic: dataset: SolarEnergy device: cuda:0 mode: train - model: AEPSA + model: ASTRA seed: 2023 data: @@ -32,6 +32,7 @@ model: seq_len: 24 stride: 7 word_num: 1000 + output_dim: 1 train: batch_size: 64 diff --git a/config/ASTRA_v2/AirQuality.yaml b/config/ASTRA_v2/AirQuality.yaml new file mode 100644 index 0000000..9073676 --- /dev/null +++ b/config/ASTRA_v2/AirQuality.yaml @@ -0,0 +1,58 @@ +basic: + dataset: AirQuality + device: cuda:0 + mode: train + model: ASTRA_v2 + seed: 2023 + +data: + batch_size: 16 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 6 + lag: 24 + normalizer: std + num_nodes: 35 + steps_per_day: 24 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + cheb: 3 + d_ff: 128 + d_model: 64 + dropout: 0.2 + gpt_layers: 9 + gpt_path: ./GPT-2 + input_dim: 6 + n_heads: 1 + num_nodes: 35 + patch_len: 6 + pred_len: 24 + seq_len: 24 + stride: 7 + word_num: 1000 + output_dim: 6 + graph_dim: 64 + graph_embed_dim: 10 + +train: + batch_size: 16 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 100 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.003 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 6 + plot: false + weight_decay: 0 diff --git a/config/ASTRA_v2/BJTaxi-InFlow.yaml b/config/ASTRA_v2/BJTaxi-InFlow.yaml new file mode 100644 index 0000000..5968cca --- /dev/null +++ b/config/ASTRA_v2/BJTaxi-InFlow.yaml @@ -0,0 +1,58 @@ +basic: + dataset: BJTaxi-InFlow + device: cuda:0 + mode: train + model: ASTRA_v2 + seed: 2023 + +data: + batch_size: 32 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 1024 + steps_per_day: 48 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + cheb: 3 + d_ff: 128 + d_model: 64 + dropout: 0.2 + gpt_layers: 9 + gpt_path: ./GPT-2 + input_dim: 1 + n_heads: 1 + num_nodes: 1024 + patch_len: 6 + pred_len: 24 + seq_len: 24 + stride: 7 + word_num: 1000 + graph_dim: 64 + graph_embed_dim: 10 + output_dim: 1 + +train: + batch_size: 32 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 100 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.003 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + weight_decay: 0 diff --git a/config/ASTRA_v2/BJTaxi-OutFlow.yaml b/config/ASTRA_v2/BJTaxi-OutFlow.yaml new file mode 100644 index 0000000..03859eb --- /dev/null +++ b/config/ASTRA_v2/BJTaxi-OutFlow.yaml @@ -0,0 +1,58 @@ +basic: + dataset: BJTaxi-OutFlow + device: cuda:0 + mode: train + model: ASTRA_v2 + seed: 2023 + +data: + batch_size: 32 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 1024 + steps_per_day: 48 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + cheb: 3 + d_ff: 128 + d_model: 64 + dropout: 0.2 + gpt_layers: 9 + gpt_path: ./GPT-2 + input_dim: 1 + n_heads: 1 + num_nodes: 1024 + patch_len: 6 + pred_len: 24 + seq_len: 24 + stride: 7 + word_num: 1000 + graph_dim: 64 + graph_embed_dim: 10 + output_dim: 1 + +train: + batch_size: 32 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 100 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.003 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + weight_decay: 0 diff --git a/config/ASTRA_v2/METR-LA.yaml b/config/ASTRA_v2/METR-LA.yaml new file mode 100644 index 0000000..db6e3a8 --- /dev/null +++ b/config/ASTRA_v2/METR-LA.yaml @@ -0,0 +1,59 @@ +basic: + dataset: METR-LA + device: cuda:0 + mode: train + model: ASTRA_v2 + seed: 2023 + +data: + batch_size: 16 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 207 + steps_per_day: 288 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + cheb: 3 + d_ff: 128 + d_model: 64 + dropout: 0.2 + gpt_layers: 9 + gpt_path: ./GPT-2 + input_dim: 1 + n_heads: 1 + num_nodes: 207 + patch_len: 6 + pred_len: 24 + seq_len: 24 + stride: 7 + word_num: 1000 + graph_dim: 64 + graph_embed_dim: 10 + output_dim: 1 + +train: + batch_size: 16 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.003 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + real_value: true + weight_decay: 0 diff --git a/config/ASTRA_v2/NYCBike-InFlow.yaml b/config/ASTRA_v2/NYCBike-InFlow.yaml new file mode 100644 index 0000000..caeccb7 --- /dev/null +++ b/config/ASTRA_v2/NYCBike-InFlow.yaml @@ -0,0 +1,58 @@ +basic: + dataset: NYCBike-InFlow + device: cuda:0 + mode: train + model: ASTRA_v2 + seed: 2023 + +data: + batch_size: 32 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 128 + steps_per_day: 48 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + cheb: 3 + d_ff: 128 + d_model: 64 + dropout: 0.2 + gpt_layers: 9 + gpt_path: ./GPT-2 + input_dim: 1 + n_heads: 1 + num_nodes: 128 + patch_len: 6 + pred_len: 24 + seq_len: 24 + stride: 7 + word_num: 1000 + graph_dim: 64 + graph_embed_dim: 10 + output_dim: 1 + +train: + batch_size: 32 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 100 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.003 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + weight_decay: 0 diff --git a/config/ASTRA_v2/NYCBike-OutFlow.yaml b/config/ASTRA_v2/NYCBike-OutFlow.yaml new file mode 100644 index 0000000..a586f9a --- /dev/null +++ b/config/ASTRA_v2/NYCBike-OutFlow.yaml @@ -0,0 +1,58 @@ +basic: + dataset: NYCBike-OutFlow + device: cuda:0 + mode: train + model: ASTRA_v2 + seed: 2023 + +data: + batch_size: 32 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 128 + steps_per_day: 48 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + cheb: 3 + d_ff: 128 + d_model: 64 + dropout: 0.2 + gpt_layers: 9 + gpt_path: ./GPT-2 + input_dim: 1 + n_heads: 1 + num_nodes: 128 + patch_len: 6 + pred_len: 24 + seq_len: 24 + stride: 7 + word_num: 1000 + graph_dim: 64 + graph_embed_dim: 10 + output_dim: 1 + +train: + batch_size: 32 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 100 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.003 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + weight_decay: 0 diff --git a/config/REPST/BJTaxi-Inflow.yaml b/config/ASTRA_v2/PEMS-BAY.yaml similarity index 82% rename from config/REPST/BJTaxi-Inflow.yaml rename to config/ASTRA_v2/PEMS-BAY.yaml index e8a17fc..2705006 100755 --- a/config/REPST/BJTaxi-Inflow.yaml +++ b/config/ASTRA_v2/PEMS-BAY.yaml @@ -1,8 +1,8 @@ basic: - dataset: BJTaxi-InFlow + dataset: PEMS-BAY device: cuda:0 mode: train - model: REPST + model: ASTRA_v2 seed: 2023 data: @@ -13,12 +13,13 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 - steps_per_day: 48 + num_nodes: 325 + steps_per_day: 288 test_ratio: 0.2 val_ratio: 0.2 model: + cheb: 3 d_ff: 128 d_model: 64 dropout: 0.2 @@ -26,12 +27,15 @@ model: gpt_path: ./GPT-2 input_dim: 1 n_heads: 1 - num_nodes: 1024 + num_nodes: 325 patch_len: 6 pred_len: 24 seq_len: 24 stride: 7 word_num: 1000 + graph_dim: 64 + graph_embed_dim: 10 + output_dim: 1 train: batch_size: 16 @@ -51,5 +55,4 @@ train: max_grad_norm: 5 output_dim: 1 plot: false - real_value: true weight_decay: 0 diff --git a/config/AEPSA/v2_SolarEnergy.yaml b/config/ASTRA_v2/SolarEnergy.yaml similarity index 87% rename from config/AEPSA/v2_SolarEnergy.yaml rename to config/ASTRA_v2/SolarEnergy.yaml index a45ad73..f6405a5 100644 --- a/config/AEPSA/v2_SolarEnergy.yaml +++ b/config/ASTRA_v2/SolarEnergy.yaml @@ -2,11 +2,11 @@ basic: dataset: SolarEnergy device: cuda:0 mode: train - model: AEPSA_v2 + model: ASTRA_v2 seed: 2023 data: - batch_size: 64 + batch_size: 16 column_wise: false days_per_week: 7 horizon: 24 @@ -19,6 +19,7 @@ data: val_ratio: 0.2 model: + cheb: 3 d_ff: 128 d_model: 64 dropout: 0.2 @@ -32,9 +33,12 @@ model: seq_len: 24 stride: 7 word_num: 1000 + graph_dim: 64 + graph_embed_dim: 10 + output_dim: 1 train: - batch_size: 64 + batch_size: 16 debug: false early_stop: true early_stop_patience: 15 diff --git a/config/ASTRA_v3/AirQuality.yaml b/config/ASTRA_v3/AirQuality.yaml new file mode 100644 index 0000000..c4481c0 --- /dev/null +++ b/config/ASTRA_v3/AirQuality.yaml @@ -0,0 +1,59 @@ +basic: + dataset: AirQuality + device: cuda:0 + mode: train + model: ASTRA_v3 + seed: 2023 + +data: + batch_size: 16 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 6 + lag: 24 + normalizer: std + num_nodes: 35 + steps_per_day: 24 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + cheb: 3 + d_ff: 128 + d_model: 64 + dropout: 0.2 + gpt_layers: 9 + gpt_path: ./GPT-2 + input_dim: 6 + n_heads: 1 + num_nodes: 35 + patch_len: 6 + pred_len: 24 + seq_len: 24 + stride: 7 + word_num: 1000 + output_dim: 6 + graph_dim: 64 + graph_embed_dim: 10 + output_dim: 6 + +train: + batch_size: 16 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 100 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.003 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 6 + plot: false + weight_decay: 0 diff --git a/config/ASTRA_v3/BJTaxi-InFlow.yaml b/config/ASTRA_v3/BJTaxi-InFlow.yaml new file mode 100644 index 0000000..bb09013 --- /dev/null +++ b/config/ASTRA_v3/BJTaxi-InFlow.yaml @@ -0,0 +1,58 @@ +basic: + dataset: BJTaxi-InFlow + device: cuda:0 + mode: train + model: ASTRA_v3 + seed: 2023 + +data: + batch_size: 32 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 1024 + steps_per_day: 48 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + cheb: 3 + d_ff: 128 + d_model: 64 + dropout: 0.2 + gpt_layers: 9 + gpt_path: ./GPT-2 + input_dim: 1 + n_heads: 1 + num_nodes: 1024 + patch_len: 6 + pred_len: 24 + seq_len: 24 + stride: 7 + word_num: 1000 + graph_dim: 64 + graph_embed_dim: 10 + output_dim: 1 + +train: + batch_size: 32 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 100 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.003 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + weight_decay: 0 diff --git a/config/ASTRA_v3/BJTaxi-OutFlow.yaml b/config/ASTRA_v3/BJTaxi-OutFlow.yaml new file mode 100644 index 0000000..0b4e8df --- /dev/null +++ b/config/ASTRA_v3/BJTaxi-OutFlow.yaml @@ -0,0 +1,58 @@ +basic: + dataset: BJTaxi-OutFlow + device: cuda:0 + mode: train + model: ASTRA_v3 + seed: 2023 + +data: + batch_size: 32 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 1024 + steps_per_day: 48 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + cheb: 3 + d_ff: 128 + d_model: 64 + dropout: 0.2 + gpt_layers: 9 + gpt_path: ./GPT-2 + input_dim: 1 + n_heads: 1 + num_nodes: 1024 + patch_len: 6 + pred_len: 24 + seq_len: 24 + stride: 7 + word_num: 1000 + graph_dim: 64 + graph_embed_dim: 10 + output_dim: 1 + +train: + batch_size: 32 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 100 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.003 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + weight_decay: 0 diff --git a/config/ASTRA_v3/METR-LA.yaml b/config/ASTRA_v3/METR-LA.yaml new file mode 100644 index 0000000..5efa494 --- /dev/null +++ b/config/ASTRA_v3/METR-LA.yaml @@ -0,0 +1,59 @@ +basic: + dataset: METR-LA + device: cuda:0 + mode: train + model: ASTRA_v3 + seed: 2023 + +data: + batch_size: 16 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 207 + steps_per_day: 288 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + cheb: 3 + d_ff: 128 + d_model: 64 + dropout: 0.2 + gpt_layers: 9 + gpt_path: ./GPT-2 + input_dim: 1 + n_heads: 1 + num_nodes: 207 + patch_len: 6 + pred_len: 24 + seq_len: 24 + stride: 7 + word_num: 1000 + graph_dim: 64 + graph_embed_dim: 10 + output_dim: 1 + +train: + batch_size: 16 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.003 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + real_value: true + weight_decay: 0 diff --git a/config/ASTRA_v3/NYCBike-InFlow.yaml b/config/ASTRA_v3/NYCBike-InFlow.yaml new file mode 100644 index 0000000..52008cc --- /dev/null +++ b/config/ASTRA_v3/NYCBike-InFlow.yaml @@ -0,0 +1,58 @@ +basic: + dataset: NYCBike-InFlow + device: cuda:0 + mode: train + model: ASTRA_v3 + seed: 2023 + +data: + batch_size: 32 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 128 + steps_per_day: 48 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + cheb: 3 + d_ff: 128 + d_model: 64 + dropout: 0.2 + gpt_layers: 9 + gpt_path: ./GPT-2 + input_dim: 1 + n_heads: 1 + num_nodes: 128 + patch_len: 6 + pred_len: 24 + seq_len: 24 + stride: 7 + word_num: 1000 + graph_dim: 64 + graph_embed_dim: 10 + output_dim: 1 + +train: + batch_size: 32 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 100 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.003 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + weight_decay: 0 diff --git a/config/ASTRA_v3/NYCBike-OutFlow.yaml b/config/ASTRA_v3/NYCBike-OutFlow.yaml new file mode 100644 index 0000000..0977912 --- /dev/null +++ b/config/ASTRA_v3/NYCBike-OutFlow.yaml @@ -0,0 +1,58 @@ +basic: + dataset: NYCBike-OutFlow + device: cuda:0 + mode: train + model: ASTRA_v3 + seed: 2023 + +data: + batch_size: 32 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 128 + steps_per_day: 48 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + cheb: 3 + d_ff: 128 + d_model: 64 + dropout: 0.2 + gpt_layers: 9 + gpt_path: ./GPT-2 + input_dim: 1 + n_heads: 1 + num_nodes: 128 + patch_len: 6 + pred_len: 24 + seq_len: 24 + stride: 7 + word_num: 1000 + graph_dim: 64 + graph_embed_dim: 10 + output_dim: 1 + +train: + batch_size: 32 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 100 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.003 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + weight_decay: 0 diff --git a/config/ASTRA_v3/PEMS-BAY.yaml b/config/ASTRA_v3/PEMS-BAY.yaml new file mode 100755 index 0000000..9ff0fd0 --- /dev/null +++ b/config/ASTRA_v3/PEMS-BAY.yaml @@ -0,0 +1,58 @@ +basic: + dataset: PEMS-BAY + device: cuda:0 + mode: train + model: ASTRA_v3 + seed: 2023 + +data: + batch_size: 16 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 325 + steps_per_day: 288 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + cheb: 3 + d_ff: 128 + d_model: 64 + dropout: 0.2 + gpt_layers: 9 + gpt_path: ./GPT-2 + input_dim: 1 + n_heads: 1 + num_nodes: 325 + patch_len: 6 + pred_len: 24 + seq_len: 24 + stride: 7 + word_num: 1000 + graph_dim: 64 + graph_embed_dim: 10 + output_dim: 1 + +train: + batch_size: 16 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 100 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.003 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + weight_decay: 0 diff --git a/config/ASTRA_v3/SolarEnergy.yaml b/config/ASTRA_v3/SolarEnergy.yaml new file mode 100644 index 0000000..c3f8863 --- /dev/null +++ b/config/ASTRA_v3/SolarEnergy.yaml @@ -0,0 +1,58 @@ +basic: + dataset: SolarEnergy + device: cuda:0 + mode: train + model: ASTRA_v3 + seed: 2023 + +data: + batch_size: 16 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 137 + steps_per_day: 24 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + cheb: 3 + d_ff: 128 + d_model: 64 + dropout: 0.2 + gpt_layers: 9 + gpt_path: ./GPT-2 + input_dim: 1 + n_heads: 1 + num_nodes: 137 + patch_len: 6 + pred_len: 24 + seq_len: 24 + stride: 7 + word_num: 1000 + graph_dim: 64 + graph_embed_dim: 10 + output_dim: 1 + +train: + batch_size: 16 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 100 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.003 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + weight_decay: 0 diff --git a/config/DCRNN/AirQuality.yaml b/config/DCRNN/AirQuality.yaml index 4922365..387b89b 100644 --- a/config/DCRNN/AirQuality.yaml +++ b/config/DCRNN/AirQuality.yaml @@ -13,7 +13,7 @@ data: input_dim: 6 lag: 24 normalizer: std - num_nodes: 35 + num_nodes: 12 steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 @@ -25,6 +25,7 @@ model: input_dim: 6 l1_decay: 0 max_diffusion_step: 2 + num_nodes: 12 num_rnn_layers: 2 output_dim: 6 rnn_units: 64 @@ -44,11 +45,10 @@ train: lr_decay_rate: 0.1 lr_decay_step: 10,20,40,80 lr_init: 0.001 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 6 plot: false real_value: false - seed: 10 - weight_decay: 0.0001 \ No newline at end of file + weight_decay: 0.0001 diff --git a/config/DCRNN/BJTaxi-InFlow.yaml b/config/DCRNN/BJTaxi-InFlow.yaml index b81dc86..16a3f91 100644 --- a/config/DCRNN/BJTaxi-InFlow.yaml +++ b/config/DCRNN/BJTaxi-InFlow.yaml @@ -25,6 +25,7 @@ model: input_dim: 1 l1_decay: 0 max_diffusion_step: 2 + num_nodes: 1024 num_rnn_layers: 2 output_dim: 1 rnn_units: 64 @@ -44,11 +45,10 @@ train: lr_decay_rate: 0.1 lr_decay_step: 10,20,40,80 lr_init: 0.001 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: false - seed: 10 - weight_decay: 0.0001 \ No newline at end of file + weight_decay: 0.0001 diff --git a/config/DCRNN/BJTaxi-OutFlow.yaml b/config/DCRNN/BJTaxi-OutFlow.yaml index dfffb51..339e1ec 100644 --- a/config/DCRNN/BJTaxi-OutFlow.yaml +++ b/config/DCRNN/BJTaxi-OutFlow.yaml @@ -25,6 +25,7 @@ model: input_dim: 1 l1_decay: 0 max_diffusion_step: 2 + num_nodes: 1024 num_rnn_layers: 2 output_dim: 1 rnn_units: 64 @@ -44,11 +45,10 @@ train: lr_decay_rate: 0.1 lr_decay_step: 10,20,40,80 lr_init: 0.001 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: false - seed: 10 - weight_decay: 0.0001 \ No newline at end of file + weight_decay: 0.0001 diff --git a/config/DCRNN/METR-LA.yaml b/config/DCRNN/METR-LA.yaml index 18fb223..15fcb44 100644 --- a/config/DCRNN/METR-LA.yaml +++ b/config/DCRNN/METR-LA.yaml @@ -25,6 +25,7 @@ model: input_dim: 1 l1_decay: 0 max_diffusion_step: 2 + num_nodes: 207 num_rnn_layers: 2 output_dim: 1 rnn_units: 64 @@ -44,11 +45,10 @@ train: lr_decay_rate: 0.1 lr_decay_step: 10,20,40,80 lr_init: 0.001 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: false - seed: 10 - weight_decay: 0.0001 \ No newline at end of file + weight_decay: 0.0001 diff --git a/config/DCRNN/NYCBike-InFlow.yaml b/config/DCRNN/NYCBike-InFlow.yaml index bf7d773..e53a839 100644 --- a/config/DCRNN/NYCBike-InFlow.yaml +++ b/config/DCRNN/NYCBike-InFlow.yaml @@ -25,6 +25,7 @@ model: input_dim: 1 l1_decay: 0 max_diffusion_step: 2 + num_nodes: 128 num_rnn_layers: 2 output_dim: 1 rnn_units: 64 @@ -44,11 +45,10 @@ train: lr_decay_rate: 0.1 lr_decay_step: 10,20,40,80 lr_init: 0.001 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: false - seed: 10 - weight_decay: 0.0001 \ No newline at end of file + weight_decay: 0.0001 diff --git a/config/DCRNN/NYCBike-OutFlow.yaml b/config/DCRNN/NYCBike-OutFlow.yaml index 7472459..a9ba532 100644 --- a/config/DCRNN/NYCBike-OutFlow.yaml +++ b/config/DCRNN/NYCBike-OutFlow.yaml @@ -25,6 +25,7 @@ model: input_dim: 1 l1_decay: 0 max_diffusion_step: 2 + num_nodes: 128 num_rnn_layers: 2 output_dim: 1 rnn_units: 64 @@ -44,11 +45,10 @@ train: lr_decay_rate: 0.1 lr_decay_step: 10,20,40,80 lr_init: 0.001 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: false - seed: 10 - weight_decay: 0.0001 \ No newline at end of file + weight_decay: 0.0001 diff --git a/config/DCRNN/PEMSD3.yaml b/config/DCRNN/PEMSD3.yaml index 75f7dde..7d0e4a8 100755 --- a/config/DCRNN/PEMSD3.yaml +++ b/config/DCRNN/PEMSD3.yaml @@ -25,6 +25,7 @@ model: input_dim: 1 l1_decay: 0 max_diffusion_step: 2 + num_nodes: 358 num_rnn_layers: 1 output_dim: 1 rnn_units: 64 @@ -44,11 +45,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/DCRNN/PEMSD4.yaml b/config/DCRNN/PEMSD4.yaml index 803d032..ddf3156 100755 --- a/config/DCRNN/PEMSD4.yaml +++ b/config/DCRNN/PEMSD4.yaml @@ -25,6 +25,7 @@ model: input_dim: 1 l1_decay: 0 max_diffusion_step: 2 + num_nodes: 307 num_rnn_layers: 2 output_dim: 1 rnn_units: 64 @@ -44,11 +45,10 @@ train: lr_decay_rate: 0.1 lr_decay_step: 10,20,40,80 lr_init: 0.001 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: false - seed: 10 weight_decay: 0.0001 diff --git a/config/DCRNN/PEMSD7.yaml b/config/DCRNN/PEMSD7.yaml index e940611..8b8e43b 100755 --- a/config/DCRNN/PEMSD7.yaml +++ b/config/DCRNN/PEMSD7.yaml @@ -25,6 +25,7 @@ model: input_dim: 1 l1_decay: 0 max_diffusion_step: 2 + num_nodes: 883 num_rnn_layers: 1 output_dim: 1 rnn_units: 64 @@ -44,11 +45,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/DCRNN/PEMSD8.yaml b/config/DCRNN/PEMSD8.yaml index cde60d3..709b392 100755 --- a/config/DCRNN/PEMSD8.yaml +++ b/config/DCRNN/PEMSD8.yaml @@ -25,6 +25,7 @@ model: input_dim: 1 l1_decay: 0 max_diffusion_step: 2 + num_nodes: 170 num_rnn_layers: 1 output_dim: 1 rnn_units: 64 @@ -44,11 +45,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/DCRNN/SolarEnergy.yaml b/config/DCRNN/SolarEnergy.yaml index 3bc9fc2..434abdd 100644 --- a/config/DCRNN/SolarEnergy.yaml +++ b/config/DCRNN/SolarEnergy.yaml @@ -45,11 +45,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.001 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 - weight_decay: 0 \ No newline at end of file + weight_decay: 0 diff --git a/config/DDGCRN/AirQuality.yaml b/config/DDGCRN/AirQuality.yaml index 954728b..7090253 100644 --- a/config/DDGCRN/AirQuality.yaml +++ b/config/DDGCRN/AirQuality.yaml @@ -4,6 +4,7 @@ basic: mode: train model: DDGCRN seed: 2023 + data: batch_size: 16 column_wise: false @@ -12,19 +13,22 @@ data: input_dim: 6 lag: 24 normalizer: std - num_nodes: 35 + num_nodes: 12 steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_order: 2 embed_dim: 12 input_dim: 6 num_layers: 1 + num_nodes: 12 output_dim: 6 rnn_units: 64 use_day: true use_week: false + train: batch_size: 16 debug: false @@ -38,11 +42,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 6 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/DDGCRN/BJTaxi-InFlow.yaml b/config/DDGCRN/BJTaxi-InFlow.yaml index ebd58a2..12dffa4 100644 --- a/config/DDGCRN/BJTaxi-InFlow.yaml +++ b/config/DDGCRN/BJTaxi-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: DDGCRN seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,15 +17,18 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_order: 2 embed_dim: 12 input_dim: 1 num_layers: 1 + num_nodes: 1024 output_dim: 1 rnn_units: 64 use_day: true use_week: false + train: batch_size: 32 debug: false @@ -38,11 +42,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/DDGCRN/BJTaxi-OutFlow.yaml b/config/DDGCRN/BJTaxi-OutFlow.yaml index 89a64b6..eb88c12 100644 --- a/config/DDGCRN/BJTaxi-OutFlow.yaml +++ b/config/DDGCRN/BJTaxi-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: DDGCRN seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,15 +17,18 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_order: 2 embed_dim: 12 input_dim: 1 num_layers: 1 + num_nodes: 1024 output_dim: 1 rnn_units: 64 use_day: true use_week: false + train: batch_size: 32 debug: false @@ -38,11 +42,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/DDGCRN/Hainan.yaml b/config/DDGCRN/Hainan.yaml index e22e71f..c02cd76 100755 --- a/config/DDGCRN/Hainan.yaml +++ b/config/DDGCRN/Hainan.yaml @@ -13,7 +13,7 @@ data: input_dim: 1 lag: 12 normalizer: std - num_nodes: 13 + num_nodes: 200 steps_per_day: 288 test_ratio: 0.2 val_ratio: 0.2 @@ -25,6 +25,7 @@ model: horizon: 12 input_dim: 1 num_layers: 1 + num_nodes: 200 output_dim: 1 rnn_units: 32 use_day: true @@ -47,7 +48,7 @@ train: - 40 - 70 lr_init: 0.003 - mae_thresh: + mae_thresh: null mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 diff --git a/config/DDGCRN/METR-LA.yaml b/config/DDGCRN/METR-LA.yaml index 013fb5e..c18ffb7 100755 --- a/config/DDGCRN/METR-LA.yaml +++ b/config/DDGCRN/METR-LA.yaml @@ -48,5 +48,4 @@ train: max_grad_norm: 5 output_dim: 1 plot: false - seed: 10 weight_decay: 0 diff --git a/config/DDGCRN/NYCBike-InFlow.yaml b/config/DDGCRN/NYCBike-InFlow.yaml index 30846fb..1e9f2fb 100644 --- a/config/DDGCRN/NYCBike-InFlow.yaml +++ b/config/DDGCRN/NYCBike-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: DDGCRN seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,19 +13,22 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_order: 2 embed_dim: 12 input_dim: 1 num_layers: 1 + num_nodes: 128 output_dim: 1 rnn_units: 64 use_day: true use_week: false + train: batch_size: 32 debug: false @@ -38,11 +42,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/DDGCRN/NYCBike-OutFlow.yaml b/config/DDGCRN/NYCBike-OutFlow.yaml index b48986f..227d00e 100644 --- a/config/DDGCRN/NYCBike-OutFlow.yaml +++ b/config/DDGCRN/NYCBike-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: DDGCRN seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,19 +13,22 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_order: 2 embed_dim: 12 input_dim: 1 num_layers: 1 + num_nodes: 128 output_dim: 1 rnn_units: 64 use_day: true use_week: false + train: batch_size: 32 debug: false @@ -38,11 +42,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/DDGCRN/PEMSD3.yaml b/config/DDGCRN/PEMSD3.yaml index 98bebd0..3064f54 100755 --- a/config/DDGCRN/PEMSD3.yaml +++ b/config/DDGCRN/PEMSD3.yaml @@ -23,6 +23,7 @@ model: embed_dim: 12 input_dim: 1 num_layers: 1 + num_nodes: 358 output_dim: 1 rnn_units: 64 use_day: true @@ -41,11 +42,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/DDGCRN/PEMSD4.yaml b/config/DDGCRN/PEMSD4.yaml index fcf818e..b6c4073 100755 --- a/config/DDGCRN/PEMSD4.yaml +++ b/config/DDGCRN/PEMSD4.yaml @@ -23,6 +23,7 @@ model: embed_dim: 10 input_dim: 1 num_layers: 1 + num_nodes: 307 output_dim: 1 rnn_units: 64 use_day: true @@ -41,11 +42,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/DDGCRN/PEMSD7(L).yaml b/config/DDGCRN/PEMSD7(L).yaml index f9063ef..2ddc530 100755 --- a/config/DDGCRN/PEMSD7(L).yaml +++ b/config/DDGCRN/PEMSD7(L).yaml @@ -51,5 +51,4 @@ train: output_dim: 1 plot: true real_value: true - seed: 12 weight_decay: 0 diff --git a/config/DDGCRN/PEMSD7(M).yaml b/config/DDGCRN/PEMSD7(M).yaml index e7d87c3..a907f41 100755 --- a/config/DDGCRN/PEMSD7(M).yaml +++ b/config/DDGCRN/PEMSD7(M).yaml @@ -51,5 +51,4 @@ train: output_dim: 1 plot: true real_value: true - seed: 12 weight_decay: 0 diff --git a/config/DDGCRN/PEMSD7.yaml b/config/DDGCRN/PEMSD7.yaml index ef828cb..48c2129 100755 --- a/config/DDGCRN/PEMSD7.yaml +++ b/config/DDGCRN/PEMSD7.yaml @@ -23,6 +23,7 @@ model: embed_dim: 12 input_dim: 1 num_layers: 1 + num_nodes: 883 output_dim: 1 rnn_units: 64 use_day: true @@ -51,5 +52,4 @@ train: output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/DDGCRN/PEMSD8.yaml b/config/DDGCRN/PEMSD8.yaml index d467cf8..05b469e 100755 --- a/config/DDGCRN/PEMSD8.yaml +++ b/config/DDGCRN/PEMSD8.yaml @@ -49,5 +49,4 @@ train: output_dim: 1 plot: false real_value: true - seed: 12 weight_decay: 0 diff --git a/config/DDGCRN/SolarEnergy.yaml b/config/DDGCRN/SolarEnergy.yaml index b23ea03..902aa98 100644 --- a/config/DDGCRN/SolarEnergy.yaml +++ b/config/DDGCRN/SolarEnergy.yaml @@ -4,6 +4,7 @@ basic: mode: train model: DDGCRN seed: 2023 + data: batch_size: 16 column_wise: false @@ -16,15 +17,18 @@ data: steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_order: 2 embed_dim: 12 input_dim: 137 num_layers: 1 + num_nodes: 137 output_dim: 137 rnn_units: 64 use_day: true use_week: false + train: batch_size: 16 debug: false @@ -38,11 +42,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 137 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/DSANET/AirQuality.yaml b/config/DSANET/AirQuality.yaml index 2147269..f5d0b7d 100644 --- a/config/DSANET/AirQuality.yaml +++ b/config/DSANET/AirQuality.yaml @@ -4,6 +4,7 @@ basic: mode: train model: DSANET seed: 2023 + data: batch_size: 16 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 6 lag: 24 normalizer: std - num_nodes: 35 + num_nodes: 12 steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 + model: batch_size: 64 d_inner: 2048 @@ -29,9 +31,11 @@ model: n_kernels: 32 n_layers: 6 n_multiv: 35 + num_nodes: 12 output_dim: 6 w_kernel: 1 window: 24 + train: batch_size: 16 debug: false @@ -45,11 +49,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 6 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/DSANET/BJTaxi-InFlow.yaml b/config/DSANET/BJTaxi-InFlow.yaml index 7d40eff..2f81ff6 100644 --- a/config/DSANET/BJTaxi-InFlow.yaml +++ b/config/DSANET/BJTaxi-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: DSANET seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: batch_size: 64 d_inner: 2048 @@ -29,9 +31,11 @@ model: n_kernels: 32 n_layers: 6 n_multiv: 1024 + num_nodes: 1024 output_dim: 1 w_kernel: 1 window: 24 + train: batch_size: 32 debug: false @@ -45,11 +49,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/DSANET/BJTaxi-OutFlow.yaml b/config/DSANET/BJTaxi-OutFlow.yaml index 38e1e4e..dc5c1bc 100644 --- a/config/DSANET/BJTaxi-OutFlow.yaml +++ b/config/DSANET/BJTaxi-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: DSANET seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: batch_size: 64 d_inner: 2048 @@ -29,9 +31,11 @@ model: n_kernels: 32 n_layers: 6 n_multiv: 1024 + num_nodes: 1024 output_dim: 1 w_kernel: 1 window: 24 + train: batch_size: 32 debug: false @@ -45,11 +49,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/DSANET/METR-LA.yaml b/config/DSANET/METR-LA.yaml index 108931b..6e920c5 100644 --- a/config/DSANET/METR-LA.yaml +++ b/config/DSANET/METR-LA.yaml @@ -4,6 +4,7 @@ basic: mode: train model: DSANET seed: 2023 + data: batch_size: 16 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 288 test_ratio: 0.2 val_ratio: 0.2 + model: batch_size: 64 d_inner: 2048 @@ -29,9 +31,11 @@ model: n_kernels: 32 n_layers: 6 n_multiv: 207 + num_nodes: 207 output_dim: 1 w_kernel: 1 window: 12 + train: batch_size: 16 debug: false @@ -45,11 +49,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/DSANET/NYCBike-InFlow.yaml b/config/DSANET/NYCBike-InFlow.yaml index 2534078..f3cc3f8 100644 --- a/config/DSANET/NYCBike-InFlow.yaml +++ b/config/DSANET/NYCBike-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: DSANET seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: batch_size: 64 d_inner: 2048 @@ -29,9 +31,11 @@ model: n_kernels: 32 n_layers: 6 n_multiv: 1024 + num_nodes: 128 output_dim: 1 w_kernel: 1 window: 24 + train: batch_size: 32 debug: false @@ -45,11 +49,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/DSANET/NYCBike-OutFlow.yaml b/config/DSANET/NYCBike-OutFlow.yaml index 3131ccc..eb6c116 100644 --- a/config/DSANET/NYCBike-OutFlow.yaml +++ b/config/DSANET/NYCBike-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: DSANET seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: batch_size: 64 d_inner: 2048 @@ -29,9 +31,11 @@ model: n_kernels: 32 n_layers: 6 n_multiv: 1024 + num_nodes: 128 output_dim: 1 w_kernel: 1 window: 24 + train: batch_size: 32 debug: false @@ -45,11 +49,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/DSANET/PEMSD3.yaml b/config/DSANET/PEMSD3.yaml index 38dccec..c9cb07e 100755 --- a/config/DSANET/PEMSD3.yaml +++ b/config/DSANET/PEMSD3.yaml @@ -31,6 +31,7 @@ model: n_kernels: 32 n_layers: 6 n_multiv: 358 + num_nodes: 358 output_dim: 1 w_kernel: 1 window: 12 @@ -48,11 +49,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/DSANET/PEMSD4.yaml b/config/DSANET/PEMSD4.yaml index bba0aa4..6676526 100755 --- a/config/DSANET/PEMSD4.yaml +++ b/config/DSANET/PEMSD4.yaml @@ -31,6 +31,7 @@ model: n_kernels: 32 n_layers: 6 n_multiv: 307 + num_nodes: 307 output_dim: 1 w_kernel: 1 window: 12 @@ -48,11 +49,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/DSANET/PEMSD7.yaml b/config/DSANET/PEMSD7.yaml index a04d2de..8d51681 100755 --- a/config/DSANET/PEMSD7.yaml +++ b/config/DSANET/PEMSD7.yaml @@ -31,6 +31,7 @@ model: n_kernels: 32 n_layers: 3 n_multiv: 883 + num_nodes: 883 output_dim: 1 w_kernel: 1 window: 12 @@ -48,11 +49,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/DSANET/PEMSD8.yaml b/config/DSANET/PEMSD8.yaml index 02a46bd..5f5ce7e 100755 --- a/config/DSANET/PEMSD8.yaml +++ b/config/DSANET/PEMSD8.yaml @@ -31,6 +31,7 @@ model: n_kernels: 32 n_layers: 6 n_multiv: 170 + num_nodes: 170 output_dim: 1 w_kernel: 1 window: 12 @@ -48,11 +49,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/DSANET/SolarEnergy.yaml b/config/DSANET/SolarEnergy.yaml index c2dd6eb..cc44e42 100644 --- a/config/DSANET/SolarEnergy.yaml +++ b/config/DSANET/SolarEnergy.yaml @@ -4,6 +4,7 @@ basic: mode: train model: DSANET seed: 2023 + data: batch_size: 16 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 + model: batch_size: 64 d_inner: 2048 @@ -29,9 +31,11 @@ model: n_kernels: 32 n_layers: 6 n_multiv: 137 + num_nodes: 137 output_dim: 137 w_kernel: 1 window: 24 + train: batch_size: 16 debug: false @@ -45,11 +49,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 137 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/EXP/AirQuality.yaml b/config/EXP/AirQuality.yaml index ff7cf8d..f8dbaba 100644 --- a/config/EXP/AirQuality.yaml +++ b/config/EXP/AirQuality.yaml @@ -4,6 +4,7 @@ basic: mode: train model: EXP seed: 2023 + data: batch_size: 16 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 6 lag: 24 normalizer: std - num_nodes: 35 + num_nodes: 12 steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_order: 2 embed_dim: 10 @@ -24,11 +26,13 @@ model: in_len: 24 input_dim: 6 num_layers: 1 + num_nodes: 12 output_dim: 6 rnn_units: 64 top_k: 2 use_day: true use_week: true + train: batch_size: 16 debug: false @@ -42,11 +46,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 6 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/EXP/BJTaxi-InFlow.yaml b/config/EXP/BJTaxi-InFlow.yaml index c924453..01f1e63 100644 --- a/config/EXP/BJTaxi-InFlow.yaml +++ b/config/EXP/BJTaxi-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: EXP seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_order: 2 embed_dim: 10 @@ -24,11 +26,13 @@ model: in_len: 24 input_dim: 1 num_layers: 1 + num_nodes: 1024 output_dim: 1 rnn_units: 64 top_k: 2 use_day: true use_week: true + train: batch_size: 32 debug: false @@ -42,11 +46,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/EXP/BJTaxi-OutFlow.yaml b/config/EXP/BJTaxi-OutFlow.yaml index 6377e0b..4acad90 100644 --- a/config/EXP/BJTaxi-OutFlow.yaml +++ b/config/EXP/BJTaxi-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: EXP seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_order: 2 embed_dim: 10 @@ -24,11 +26,13 @@ model: in_len: 24 input_dim: 1 num_layers: 1 + num_nodes: 1024 output_dim: 1 rnn_units: 64 top_k: 2 use_day: true use_week: true + train: batch_size: 32 debug: false @@ -42,11 +46,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/EXP/METR-LA.yaml b/config/EXP/METR-LA.yaml index 28ef4c1..e0ecd55 100644 --- a/config/EXP/METR-LA.yaml +++ b/config/EXP/METR-LA.yaml @@ -4,6 +4,7 @@ basic: mode: train model: EXP seed: 2023 + data: batch_size: 16 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 288 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_order: 2 embed_dim: 10 @@ -24,11 +26,13 @@ model: in_len: 12 input_dim: 1 num_layers: 1 + num_nodes: 207 output_dim: 1 rnn_units: 64 top_k: 2 use_day: true use_week: true + train: batch_size: 16 debug: false @@ -42,11 +46,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/EXP/NYCBike-InFlow.yaml b/config/EXP/NYCBike-InFlow.yaml index 34876bc..3210b53 100644 --- a/config/EXP/NYCBike-InFlow.yaml +++ b/config/EXP/NYCBike-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: EXP seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_order: 2 embed_dim: 10 @@ -24,11 +26,13 @@ model: in_len: 24 input_dim: 1 num_layers: 1 + num_nodes: 128 output_dim: 1 rnn_units: 64 top_k: 2 use_day: true use_week: true + train: batch_size: 32 debug: false @@ -42,11 +46,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/EXP/NYCBike-OutFlow.yaml b/config/EXP/NYCBike-OutFlow.yaml index 79e06ac..28e74d2 100644 --- a/config/EXP/NYCBike-OutFlow.yaml +++ b/config/EXP/NYCBike-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: EXP seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_order: 2 embed_dim: 10 @@ -24,11 +26,13 @@ model: in_len: 24 input_dim: 1 num_layers: 1 + num_nodes: 128 output_dim: 1 rnn_units: 64 top_k: 2 use_day: true use_week: true + train: batch_size: 32 debug: false @@ -42,11 +46,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/EXP/PEMSD3.yaml b/config/EXP/PEMSD3.yaml index 7e00b5f..def7295 100755 --- a/config/EXP/PEMSD3.yaml +++ b/config/EXP/PEMSD3.yaml @@ -26,6 +26,7 @@ model: in_len: 12 input_dim: 1 num_layers: 1 + num_nodes: 358 output_dim: 1 rnn_units: 64 top_k: 2 @@ -45,11 +46,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/EXP/PEMSD4.yaml b/config/EXP/PEMSD4.yaml index 560d6da..cdb81aa 100755 --- a/config/EXP/PEMSD4.yaml +++ b/config/EXP/PEMSD4.yaml @@ -23,6 +23,7 @@ model: cycle_len: 288 in_len: 12 input_dim: 1 + num_nodes: 307 output_dim: 1 train: @@ -38,11 +39,10 @@ train: lr_decay_rate: 0.5 lr_decay_step: 5,20,40,65 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/EXP/PEMSD7.yaml b/config/EXP/PEMSD7.yaml index 029e356..9233f08 100755 --- a/config/EXP/PEMSD7.yaml +++ b/config/EXP/PEMSD7.yaml @@ -22,6 +22,7 @@ model: batch_size: 64 in_len: 12 input_dim: 1 + num_nodes: 883 output_dim: 1 train: @@ -37,11 +38,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/EXP/PEMSD8.yaml b/config/EXP/PEMSD8.yaml index 5061050..e8af5dc 100755 --- a/config/EXP/PEMSD8.yaml +++ b/config/EXP/PEMSD8.yaml @@ -43,11 +43,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/EXP/SD.yaml b/config/EXP/SD.yaml index f61120b..493b443 100755 --- a/config/EXP/SD.yaml +++ b/config/EXP/SD.yaml @@ -13,7 +13,7 @@ data: input_dim: 1 lag: 12 normalizer: std - num_nodes: 716 + num_nodes: 307 steps_per_day: 288 test_ratio: 0.2 val_ratio: 0.2 @@ -22,6 +22,7 @@ model: batch_size: 64 in_len: 12 input_dim: 1 + num_nodes: 307 output_dim: 1 train: @@ -37,7 +38,7 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: null mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 diff --git a/config/EXP/SolarEnergy.yaml b/config/EXP/SolarEnergy.yaml index 79e1496..de5edfe 100644 --- a/config/EXP/SolarEnergy.yaml +++ b/config/EXP/SolarEnergy.yaml @@ -4,6 +4,7 @@ basic: mode: train model: EXP seed: 2023 + data: batch_size: 16 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_order: 2 embed_dim: 10 @@ -24,11 +26,13 @@ model: in_len: 24 input_dim: 137 num_layers: 1 + num_nodes: 137 output_dim: 137 rnn_units: 64 top_k: 2 use_day: true use_week: true + train: batch_size: 16 debug: false @@ -42,11 +46,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 137 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/EXPB/AirQuality.yaml b/config/EXPB/AirQuality.yaml index 238f7f7..4b8082c 100644 --- a/config/EXPB/AirQuality.yaml +++ b/config/EXPB/AirQuality.yaml @@ -4,6 +4,7 @@ basic: mode: train model: EXPB seed: 2023 + data: batch_size: 16 column_wise: false @@ -12,20 +13,23 @@ data: input_dim: 6 lag: 24 normalizer: std - num_nodes: 35 + num_nodes: 12 steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_order: 2 embed_dim: 10 input_dim: 6 num_layers: 1 + num_nodes: 12 output_dim: 6 patch_size: 3 rnn_units: 64 use_day: true use_week: true + train: batch_size: 16 debug: false @@ -39,11 +43,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 6 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/EXPB/BJTaxi-InFlow.yaml b/config/EXPB/BJTaxi-InFlow.yaml index 1eb34f4..0fb7614 100644 --- a/config/EXPB/BJTaxi-InFlow.yaml +++ b/config/EXPB/BJTaxi-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: EXPB seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,16 +17,19 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_order: 2 embed_dim: 10 input_dim: 1 num_layers: 1 + num_nodes: 1024 output_dim: 1 patch_size: 3 rnn_units: 64 use_day: true use_week: true + train: batch_size: 32 debug: false @@ -39,11 +43,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/EXPB/BJTaxi-OutFlow.yaml b/config/EXPB/BJTaxi-OutFlow.yaml index b913f8f..798f5ca 100644 --- a/config/EXPB/BJTaxi-OutFlow.yaml +++ b/config/EXPB/BJTaxi-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: EXPB seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,16 +17,19 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_order: 2 embed_dim: 10 input_dim: 1 num_layers: 1 + num_nodes: 1024 output_dim: 1 patch_size: 3 rnn_units: 64 use_day: true use_week: true + train: batch_size: 32 debug: false @@ -39,11 +43,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/EXPB/METR-LA.yaml b/config/EXPB/METR-LA.yaml index 3416970..343b252 100644 --- a/config/EXPB/METR-LA.yaml +++ b/config/EXPB/METR-LA.yaml @@ -4,6 +4,7 @@ basic: mode: train model: EXPB seed: 2023 + data: batch_size: 64 column_wise: false @@ -16,16 +17,19 @@ data: steps_per_day: 288 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_order: 2 embed_dim: 10 input_dim: 1 num_layers: 1 + num_nodes: 207 output_dim: 1 patch_size: 3 rnn_units: 64 use_day: true use_week: true + train: batch_size: 64 debug: false @@ -39,11 +43,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/EXPB/NYCBike-InFlow.yaml b/config/EXPB/NYCBike-InFlow.yaml index 2642db3..ac7b0d9 100644 --- a/config/EXPB/NYCBike-InFlow.yaml +++ b/config/EXPB/NYCBike-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: EXPB seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,20 +13,23 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_order: 2 embed_dim: 10 input_dim: 1 num_layers: 1 + num_nodes: 128 output_dim: 1 patch_size: 3 rnn_units: 64 use_day: true use_week: true + train: batch_size: 32 debug: false @@ -39,11 +43,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/EXPB/NYCBike-OutFlow.yaml b/config/EXPB/NYCBike-OutFlow.yaml index 3501ece..a4d845e 100644 --- a/config/EXPB/NYCBike-OutFlow.yaml +++ b/config/EXPB/NYCBike-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: EXPB seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,20 +13,23 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_order: 2 embed_dim: 10 input_dim: 1 num_layers: 1 + num_nodes: 128 output_dim: 1 patch_size: 3 rnn_units: 64 use_day: true use_week: true + train: batch_size: 32 debug: false @@ -39,11 +43,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/EXPB/PEMSD4.yaml b/config/EXPB/PEMSD4.yaml index 4e2c908..cf301c2 100755 --- a/config/EXPB/PEMSD4.yaml +++ b/config/EXPB/PEMSD4.yaml @@ -23,6 +23,7 @@ model: embed_dim: 10 input_dim: 1 num_layers: 1 + num_nodes: 307 output_dim: 1 patch_size: 3 rnn_units: 64 @@ -42,11 +43,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/EXPB/SolarEnergy.yaml b/config/EXPB/SolarEnergy.yaml index 8e1a595..2d1f64e 100644 --- a/config/EXPB/SolarEnergy.yaml +++ b/config/EXPB/SolarEnergy.yaml @@ -4,6 +4,7 @@ basic: mode: train model: EXPB seed: 2023 + data: batch_size: 16 column_wise: false @@ -16,16 +17,19 @@ data: steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_order: 2 embed_dim: 10 input_dim: 137 num_layers: 1 + num_nodes: 137 output_dim: 137 patch_size: 3 rnn_units: 64 use_day: true use_week: true + train: batch_size: 16 debug: false @@ -39,11 +43,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 137 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/FPT/AirQuality.yaml b/config/FPT/AirQuality.yaml new file mode 100644 index 0000000..0604938 --- /dev/null +++ b/config/FPT/AirQuality.yaml @@ -0,0 +1,51 @@ +basic: + dataset: AirQuality + device: cuda:0 + mode: train + model: FPT + seed: 2023 + +data: + batch_size: 16 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 6 + lag: 24 + normalizer: std + num_nodes: 35 + steps_per_day: 24 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + d_model: 768 + gpt_layers: 9 + gpt_path: ./GPT-2 + input_dim: 6 + n_heads: 1 + num_nodes: 35 + patch_len: 6 + pred_len: 24 + seq_len: 24 + stride: 7 + +train: + batch_size: 16 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 100 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.003 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 6 + plot: false + weight_decay: 0 diff --git a/config/AEPSA/BJTaxi-InFlow.yaml b/config/FPT/BJTaxi-InFlow.yaml similarity index 91% rename from config/AEPSA/BJTaxi-InFlow.yaml rename to config/FPT/BJTaxi-InFlow.yaml index 64b53dc..18abb67 100644 --- a/config/AEPSA/BJTaxi-InFlow.yaml +++ b/config/FPT/BJTaxi-InFlow.yaml @@ -2,8 +2,9 @@ basic: dataset: BJTaxi-InFlow device: cuda:0 mode: train - model: AEPSA + model: FPT seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,10 +17,9 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: - d_ff: 128 - d_model: 64 - dropout: 0.2 + d_model: 768 gpt_layers: 9 gpt_path: ./GPT-2 input_dim: 1 @@ -29,7 +29,7 @@ model: pred_len: 24 seq_len: 24 stride: 7 - word_num: 1000 + train: batch_size: 32 debug: false diff --git a/config/AEPSA/BJTaxi-OutFlow.yaml b/config/FPT/BJTaxi-OutFlow.yaml similarity index 91% rename from config/AEPSA/BJTaxi-OutFlow.yaml rename to config/FPT/BJTaxi-OutFlow.yaml index d0cf19d..3e6765a 100644 --- a/config/AEPSA/BJTaxi-OutFlow.yaml +++ b/config/FPT/BJTaxi-OutFlow.yaml @@ -2,8 +2,9 @@ basic: dataset: BJTaxi-OutFlow device: cuda:0 mode: train - model: AEPSA + model: FPT seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,10 +17,9 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: - d_ff: 128 - d_model: 64 - dropout: 0.2 + d_model: 768 gpt_layers: 9 gpt_path: ./GPT-2 input_dim: 1 @@ -29,7 +29,7 @@ model: pred_len: 24 seq_len: 24 stride: 7 - word_num: 1000 + train: batch_size: 32 debug: false diff --git a/config/AEPSA/v2_METR-LA.yaml b/config/FPT/METR-LA.yaml similarity index 91% rename from config/AEPSA/v2_METR-LA.yaml rename to config/FPT/METR-LA.yaml index bf92089..0c22dcb 100644 --- a/config/AEPSA/v2_METR-LA.yaml +++ b/config/FPT/METR-LA.yaml @@ -2,7 +2,7 @@ basic: dataset: METR-LA device: cuda:0 mode: train - model: AEPSA_v2 + model: FPT seed: 2023 data: @@ -19,9 +19,7 @@ data: val_ratio: 0.2 model: - d_ff: 128 - d_model: 64 - dropout: 0.2 + d_model: 768 gpt_layers: 9 gpt_path: ./GPT-2 input_dim: 1 @@ -31,7 +29,6 @@ model: pred_len: 24 seq_len: 24 stride: 7 - word_num: 1000 train: batch_size: 16 diff --git a/config/AEPSA/NYCBike-InFlow.yaml b/config/FPT/NYCBike-InFlow.yaml similarity index 86% rename from config/AEPSA/NYCBike-InFlow.yaml rename to config/FPT/NYCBike-InFlow.yaml index 2384c58..41a8c8b 100644 --- a/config/AEPSA/NYCBike-InFlow.yaml +++ b/config/FPT/NYCBike-InFlow.yaml @@ -2,8 +2,9 @@ basic: dataset: NYCBike-InFlow device: cuda:0 mode: train - model: AEPSA + model: FPT seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,24 +13,23 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: - d_ff: 128 - d_model: 64 - dropout: 0.2 + d_model: 768 gpt_layers: 9 gpt_path: ./GPT-2 input_dim: 1 n_heads: 1 - num_nodes: 1024 + num_nodes: 128 patch_len: 6 pred_len: 24 seq_len: 24 stride: 7 - word_num: 1000 + train: batch_size: 32 debug: false diff --git a/config/AEPSA/NYCBike-OutFlow.yaml b/config/FPT/NYCBike-OutFlow.yaml similarity index 86% rename from config/AEPSA/NYCBike-OutFlow.yaml rename to config/FPT/NYCBike-OutFlow.yaml index 0b3597f..cc52b1a 100644 --- a/config/AEPSA/NYCBike-OutFlow.yaml +++ b/config/FPT/NYCBike-OutFlow.yaml @@ -2,8 +2,9 @@ basic: dataset: NYCBike-OutFlow device: cuda:0 mode: train - model: AEPSA + model: FPT seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,24 +13,23 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: - d_ff: 128 - d_model: 64 - dropout: 0.2 + d_model: 768 gpt_layers: 9 gpt_path: ./GPT-2 input_dim: 1 n_heads: 1 - num_nodes: 1024 + num_nodes: 128 patch_len: 6 pred_len: 24 seq_len: 24 stride: 7 - word_num: 1000 + train: batch_size: 32 debug: false diff --git a/config/FPT/PEMS-BAY.yaml b/config/FPT/PEMS-BAY.yaml new file mode 100755 index 0000000..efe4d7c --- /dev/null +++ b/config/FPT/PEMS-BAY.yaml @@ -0,0 +1,51 @@ +basic: + dataset: PEMS-BAY + device: cuda:0 + mode: train + model: FPT + seed: 2023 + +data: + batch_size: 16 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 325 + steps_per_day: 288 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + d_model: 768 + gpt_layers: 9 + gpt_path: ./GPT-2 + input_dim: 1 + n_heads: 1 + num_nodes: 325 + patch_len: 6 + pred_len: 24 + seq_len: 24 + stride: 7 + +train: + batch_size: 16 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 100 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.003 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + weight_decay: 0 diff --git a/config/FPT/SolarEnergy.yaml b/config/FPT/SolarEnergy.yaml new file mode 100644 index 0000000..fe1ea22 --- /dev/null +++ b/config/FPT/SolarEnergy.yaml @@ -0,0 +1,51 @@ +basic: + dataset: SolarEnergy + device: cuda:0 + mode: train + model: FPT + seed: 2023 + +data: + batch_size: 16 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 137 + steps_per_day: 24 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + d_model: 768 + gpt_layers: 9 + gpt_path: ./GPT-2 + input_dim: 1 + n_heads: 1 + num_nodes: 137 + patch_len: 6 + pred_len: 24 + seq_len: 24 + stride: 7 + +train: + batch_size: 16 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 100 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.003 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + weight_decay: 0 diff --git a/config/GWN/AirQuality.yaml b/config/GWN/AirQuality.yaml index e1d8f4b..c8e57d5 100644 --- a/config/GWN/AirQuality.yaml +++ b/config/GWN/AirQuality.yaml @@ -6,7 +6,7 @@ basic: seed: 2023 data: - batch_size: 16 + batch_size: 64 column_wise: false days_per_week: 7 horizon: 24 @@ -20,26 +20,27 @@ data: model: addaptadj: true - aptinit: - batch_size: 16 + apt_size: 10 + aptinit: null + batch_size: 64 blocks: 4 dilation_channels: 32 dropout: 0.3 + do_graph_conv: True end_channels: 512 gcn_bool: true - in_dim: 2 - input_dim: 6 + in_dim: 1 + input_dim: 1 kernel_size: 2 - layers: 2 - out_dim: 12 - output_dim: 6 + layers: 4 + num_nodes: 35 + out_dim: 24 residual_channels: 32 skip_channels: 256 - supports: - + supports: null train: - batch_size: 16 + batch_size: 64 debug: false early_stop: true early_stop_patience: 15 @@ -51,11 +52,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 - output_dim: 6 + output_dim: 1 plot: false real_value: true - seed: 10 - weight_decay: 0 \ No newline at end of file + weight_decay: 0 diff --git a/config/GWN/BJTaxi-InFlow.yaml b/config/GWN/BJTaxi-InFlow.yaml index 54a5631..8f4de85 100644 --- a/config/GWN/BJTaxi-InFlow.yaml +++ b/config/GWN/BJTaxi-InFlow.yaml @@ -20,22 +20,24 @@ data: model: addaptadj: true - aptinit: - batch_size: 32 + apt_size: 10 + aptinit: null + batch_size: 16 blocks: 4 dilation_channels: 32 dropout: 0.3 + do_graph_conv: True end_channels: 512 gcn_bool: true - in_dim: 2 + in_dim: 1 input_dim: 1 kernel_size: 2 - layers: 2 - out_dim: 12 - output_dim: 1 + layers: 4 + num_nodes: 1024 + out_dim: 24 residual_channels: 32 skip_channels: 256 - supports: + supports: null train: @@ -51,11 +53,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 - weight_decay: 0 \ No newline at end of file + weight_decay: 0 diff --git a/config/GWN/BJTaxi-OutFlow.yaml b/config/GWN/BJTaxi-OutFlow.yaml index ea133e8..f86270e 100644 --- a/config/GWN/BJTaxi-OutFlow.yaml +++ b/config/GWN/BJTaxi-OutFlow.yaml @@ -20,23 +20,24 @@ data: model: addaptadj: true - aptinit: + apt_size: 10 + aptinit: null batch_size: 32 blocks: 4 dilation_channels: 32 dropout: 0.3 + do_graph_conv: True end_channels: 512 gcn_bool: true - in_dim: 2 + in_dim: 1 input_dim: 1 kernel_size: 2 - layers: 2 - out_dim: 12 - output_dim: 1 + layers: 4 + num_nodes: 1024 + out_dim: 24 residual_channels: 32 skip_channels: 256 - supports: - + supports: null train: batch_size: 32 @@ -51,11 +52,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 - weight_decay: 0 \ No newline at end of file + weight_decay: 0 diff --git a/config/GWN/METR-LA.yaml b/config/GWN/METR-LA.yaml index 96faa45..fc93634 100644 --- a/config/GWN/METR-LA.yaml +++ b/config/GWN/METR-LA.yaml @@ -20,23 +20,24 @@ data: model: addaptadj: true - aptinit: - batch_size: 16 + apt_size: 10 + aptinit: null + batch_size: 64 blocks: 4 dilation_channels: 32 dropout: 0.3 + do_graph_conv: True end_channels: 512 gcn_bool: true - in_dim: 2 + in_dim: 1 input_dim: 1 kernel_size: 2 - layers: 2 - out_dim: 12 - output_dim: 1 + layers: 4 + num_nodes: 207 + out_dim: 24 residual_channels: 32 skip_channels: 256 - supports: - + supports: null train: batch_size: 16 @@ -51,11 +52,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 - weight_decay: 0 \ No newline at end of file + weight_decay: 0 diff --git a/config/GWN/NYCBike-InFlow.yaml b/config/GWN/NYCBike-InFlow.yaml index 1f4c646..a85e36c 100644 --- a/config/GWN/NYCBike-InFlow.yaml +++ b/config/GWN/NYCBike-InFlow.yaml @@ -20,23 +20,24 @@ data: model: addaptadj: true - aptinit: + apt_size: 10 + aptinit: null batch_size: 32 blocks: 4 dilation_channels: 32 dropout: 0.3 + do_graph_conv: True end_channels: 512 gcn_bool: true - in_dim: 2 + in_dim: 1 input_dim: 1 kernel_size: 2 - layers: 2 - out_dim: 12 - output_dim: 1 + layers: 4 + num_nodes: 128 + out_dim: 24 residual_channels: 32 skip_channels: 256 - supports: - + supports: null train: batch_size: 32 @@ -51,11 +52,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 - weight_decay: 0 \ No newline at end of file + weight_decay: 0 diff --git a/config/GWN/NYCBike-OutFlow.yaml b/config/GWN/NYCBike-OutFlow.yaml index a73d3fc..3ef3c8f 100644 --- a/config/GWN/NYCBike-OutFlow.yaml +++ b/config/GWN/NYCBike-OutFlow.yaml @@ -6,7 +6,7 @@ basic: seed: 2023 data: - batch_size: 32 + batch_size: 16 column_wise: false days_per_week: 7 horizon: 24 @@ -20,26 +20,27 @@ data: model: addaptadj: true - aptinit: - batch_size: 32 + apt_size: 10 + aptinit: null + batch_size: 16 blocks: 4 dilation_channels: 32 dropout: 0.3 + do_graph_conv: True end_channels: 512 gcn_bool: true - in_dim: 2 + in_dim: 1 input_dim: 1 kernel_size: 2 - layers: 2 - out_dim: 12 - output_dim: 1 + layers: 4 + num_nodes: 128 + out_dim: 24 residual_channels: 32 skip_channels: 256 - supports: - + supports: null train: - batch_size: 32 + batch_size: 16 debug: false early_stop: true early_stop_patience: 15 @@ -51,11 +52,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 - weight_decay: 0 \ No newline at end of file + weight_decay: 0 diff --git a/config/GWN/PEMS-BAY.yaml b/config/GWN/PEMS-BAY.yaml new file mode 100644 index 0000000..3dc7acd --- /dev/null +++ b/config/GWN/PEMS-BAY.yaml @@ -0,0 +1,61 @@ +basic: + dataset: PEMS-BAY + device: cuda:0 + mode: train + model: GWN + seed: 2023 + +data: + batch_size: 64 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 325 + steps_per_day: 288 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + addaptadj: true + apt_size: 10 + aptinit: null + batch_size: 64 + blocks: 4 + dilation_channels: 32 + dropout: 0.3 + do_graph_conv: True + end_channels: 512 + gcn_bool: true + in_dim: 1 + input_dim: 1 + kernel_size: 2 + layers: 4 + num_nodes: 325 + out_dim: 24 + residual_channels: 32 + skip_channels: 256 + supports: null + +train: + batch_size: 64 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 300 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: false + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.003 + mae_thresh: 0.0 + mape_thresh: 0.0 + max_grad_norm: 5 + output_dim: 1 + plot: false + real_value: true + weight_decay: 0 diff --git a/config/GWN/PEMSD3.yaml b/config/GWN/PEMSD3.yaml index f3d78ac..9194d3d 100755 --- a/config/GWN/PEMSD3.yaml +++ b/config/GWN/PEMSD3.yaml @@ -20,30 +20,23 @@ data: model: addaptadj: true - aptinit: + aptinit: null batch_size: 64 blocks: 4 dilation_channels: 32 dropout: 0.3 end_channels: 512 gcn_bool: true - in_dim: 2 + in_dim: 3 input_dim: 1 kernel_size: 2 layers: 2 + num_nodes: 358 out_dim: 12 output_dim: 1 residual_channels: 32 skip_channels: 256 - supports: - - - - - - - - + supports: null train: batch_size: 16 @@ -58,11 +51,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/GWN/PEMSD4.yaml b/config/GWN/PEMSD4.yaml index ceccee3..ab6f18e 100755 --- a/config/GWN/PEMSD4.yaml +++ b/config/GWN/PEMSD4.yaml @@ -20,30 +20,23 @@ data: model: addaptadj: true - aptinit: + aptinit: null batch_size: 64 blocks: 4 dilation_channels: 32 dropout: 0.3 end_channels: 512 gcn_bool: true - in_dim: 2 + in_dim: 1 input_dim: 1 kernel_size: 2 layers: 2 + num_nodes: 307 out_dim: 12 output_dim: 1 residual_channels: 32 skip_channels: 256 - supports: - - - - - - - - + supports: null train: batch_size: 64 @@ -58,11 +51,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/GWN/PEMSD7.yaml b/config/GWN/PEMSD7.yaml index 2cbfc62..4d82415 100755 --- a/config/GWN/PEMSD7.yaml +++ b/config/GWN/PEMSD7.yaml @@ -20,30 +20,23 @@ data: model: addaptadj: true - aptinit: + aptinit: null batch_size: 64 blocks: 4 dilation_channels: 32 dropout: 0.3 end_channels: 512 gcn_bool: true - in_dim: 2 + in_dim: 3 input_dim: 1 kernel_size: 2 layers: 2 + num_nodes: 883 out_dim: 12 output_dim: 1 residual_channels: 32 skip_channels: 256 - supports: - - - - - - - - + supports: null train: batch_size: 16 @@ -58,11 +51,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/GWN/PEMSD8.yaml b/config/GWN/PEMSD8.yaml index 88a5090..26d0de8 100755 --- a/config/GWN/PEMSD8.yaml +++ b/config/GWN/PEMSD8.yaml @@ -20,30 +20,23 @@ data: model: addaptadj: true - aptinit: + aptinit: null batch_size: 64 blocks: 4 dilation_channels: 32 dropout: 0.3 end_channels: 512 gcn_bool: true - in_dim: 2 + in_dim: 3 input_dim: 1 kernel_size: 2 layers: 2 + num_nodes: 170 out_dim: 12 output_dim: 1 residual_channels: 32 skip_channels: 256 - supports: - - - - - - - - + supports: null train: batch_size: 64 @@ -58,11 +51,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/GWN/SolarEnergy.yaml b/config/GWN/SolarEnergy.yaml index 76110e1..4e572fa 100644 --- a/config/GWN/SolarEnergy.yaml +++ b/config/GWN/SolarEnergy.yaml @@ -6,7 +6,7 @@ basic: seed: 2023 data: - batch_size: 64 + batch_size: 16 column_wise: false days_per_week: 7 horizon: 24 @@ -20,25 +20,27 @@ data: model: addaptadj: true - aptinit: - batch_size: 64 + apt_size: 10 + aptinit: null + batch_size: 32 blocks: 4 dilation_channels: 32 dropout: 0.3 + do_graph_conv: True end_channels: 512 gcn_bool: true - in_dim: 2 + in_dim: 1 input_dim: 1 kernel_size: 2 - layers: 2 - out_dim: 12 - output_dim: 1 + layers: 4 + num_nodes: 137 + out_dim: 24 residual_channels: 32 skip_channels: 256 - supports: + supports: null train: - batch_size: 64 + batch_size: 16 debug: false early_stop: true early_stop_patience: 15 @@ -50,11 +52,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.001 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 - weight_decay: 0 \ No newline at end of file + weight_decay: 0 diff --git a/config/HI/AirQuality.yaml b/config/HI/AirQuality.yaml new file mode 100644 index 0000000..147da8a --- /dev/null +++ b/config/HI/AirQuality.yaml @@ -0,0 +1,48 @@ +basic: + dataset: AirQuality + device: cuda:0 + mode: train + model: HI + seed: 2023 + +data: + batch_size: 512 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 6 + lag: 24 + normalizer: std + num_nodes: 35 + steps_per_day: 24 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + input_len: 24 + output_len: 24 + reverse: False + + +train: + batch_size: 512 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 1 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.0001 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 6 + optimizer: null + plot: false + real_value: true + scheduler: null + weight_decay: 0 \ No newline at end of file diff --git a/config/HI/BJTaxi-InFlow.yaml b/config/HI/BJTaxi-InFlow.yaml new file mode 100644 index 0000000..d3b39ea --- /dev/null +++ b/config/HI/BJTaxi-InFlow.yaml @@ -0,0 +1,48 @@ +basic: + dataset: BJTaxi-InFlow + device: cuda:0 + mode: train + model: HI + seed: 2023 + +data: + batch_size: 2048 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 1024 + steps_per_day: 48 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + input_len: 24 + output_len: 24 + reverse: False + + +train: + batch_size: 2048 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 1 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.0001 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + optimizer: null + plot: false + real_value: true + scheduler: null + weight_decay: 0 \ No newline at end of file diff --git a/config/HI/BJTaxi-OutFlow.yaml b/config/HI/BJTaxi-OutFlow.yaml new file mode 100644 index 0000000..96f4253 --- /dev/null +++ b/config/HI/BJTaxi-OutFlow.yaml @@ -0,0 +1,48 @@ +basic: + dataset: BJTaxi-OutFlow + device: cuda:0 + mode: train + model: HI + seed: 2023 + +data: + batch_size: 2048 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 1024 + steps_per_day: 48 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + input_len: 24 + output_len: 24 + reverse: False + + +train: + batch_size: 2048 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 1 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.0001 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + optimizer: null + plot: false + real_value: true + scheduler: null + weight_decay: 0 \ No newline at end of file diff --git a/config/HI/METR-LA.yaml b/config/HI/METR-LA.yaml new file mode 100644 index 0000000..203db0d --- /dev/null +++ b/config/HI/METR-LA.yaml @@ -0,0 +1,48 @@ +basic: + dataset: METR-LA + device: cuda:1 + mode: train + model: HI + seed: 2023 + +data: + batch_size: 512 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 207 + steps_per_day: 288 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + input_len: 24 + output_len: 24 + reverse: False + + +train: + batch_size: 512 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 1 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.0001 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + optimizer: null + plot: false + real_value: true + scheduler: null + weight_decay: 0 \ No newline at end of file diff --git a/config/HI/NYCBike-InFlow.yaml b/config/HI/NYCBike-InFlow.yaml new file mode 100644 index 0000000..a24a481 --- /dev/null +++ b/config/HI/NYCBike-InFlow.yaml @@ -0,0 +1,48 @@ +basic: + dataset: NYCBike-InFlow + device: cuda:0 + mode: train + model: HI + seed: 2023 + +data: + batch_size: 512 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 128 + steps_per_day: 48 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + input_len: 24 + output_len: 24 + reverse: False + + +train: + batch_size: 512 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 1 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.0001 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + optimizer: null + plot: false + real_value: true + scheduler: null + weight_decay: 0 \ No newline at end of file diff --git a/config/HI/NYCBike-OutFlow.yaml b/config/HI/NYCBike-OutFlow.yaml new file mode 100644 index 0000000..87d6156 --- /dev/null +++ b/config/HI/NYCBike-OutFlow.yaml @@ -0,0 +1,48 @@ +basic: + dataset: NYCBike-OutFlow + device: cuda:0 + mode: train + model: HI + seed: 2023 + +data: + batch_size: 512 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 128 + steps_per_day: 48 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + input_len: 24 + output_len: 24 + reverse: False + + +train: + batch_size: 512 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 1 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.0001 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + optimizer: null + plot: false + real_value: true + scheduler: null + weight_decay: 0 \ No newline at end of file diff --git a/config/HI/PEMS-BAY.yaml b/config/HI/PEMS-BAY.yaml new file mode 100644 index 0000000..e012772 --- /dev/null +++ b/config/HI/PEMS-BAY.yaml @@ -0,0 +1,48 @@ +basic: + dataset: PEMS-BAY + device: cuda:0 + mode: train + model: HI + seed: 2023 + +data: + batch_size: 512 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 325 + steps_per_day: 288 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + input_len: 24 + output_len: 24 + reverse: False + + +train: + batch_size: 512 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 1 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.0001 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + optimizer: null + plot: false + real_value: true + scheduler: null + weight_decay: 0 \ No newline at end of file diff --git a/config/HI/SolarEnergy.yaml b/config/HI/SolarEnergy.yaml new file mode 100644 index 0000000..aa07cf6 --- /dev/null +++ b/config/HI/SolarEnergy.yaml @@ -0,0 +1,48 @@ +basic: + dataset: SolarEnergy + device: cuda:0 + mode: train + model: HI + seed: 2023 + +data: + batch_size: 64 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 137 + steps_per_day: 24 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + input_len: 24 + output_len: 24 + reverse: False + + +train: + batch_size: 64 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 1 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: + lr_decay_rate: + lr_decay_step: + lr_init: 0.0001 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + optimizer: null + plot: false + real_value: true + scheduler: null + weight_decay: 0 \ No newline at end of file diff --git a/config/Informer/AirQuality.yaml b/config/Informer/AirQuality.yaml new file mode 100644 index 0000000..4b1568a --- /dev/null +++ b/config/Informer/AirQuality.yaml @@ -0,0 +1,66 @@ +basic: + dataset: AirQuality + device: cuda:0 + mode: train + model: Informer + seed: 2023 + +data: + batch_size: 256 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 6 + lag: 24 + label_len: 24 + normalizer: std + num_nodes: 35 + steps_per_day: 24 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + activation: gelu + seq_len: 24 + label_len: 24 + pred_len: 24 + d_model: 128 + d_ff: 2048 + dropout: 0.1 + e_layers: 2 + d_layers: 1 + n_heads: 8 + output_attention: False + factor: 5 + attn: prob + embed: fixed + freq: h + distil: true + mix: true + enc_in: 6 + dec_in: 6 + c_out: 6 + + +train: + batch_size: 256 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + label_len: 24 + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.0001 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 6 + plot: false + pred_len: 24 + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/Informer/BJTaxi-InFlow.yaml b/config/Informer/BJTaxi-InFlow.yaml new file mode 100644 index 0000000..56d089d --- /dev/null +++ b/config/Informer/BJTaxi-InFlow.yaml @@ -0,0 +1,66 @@ +basic: + dataset: BJTaxi-InFlow + device: cuda:0 + mode: train + model: Informer + seed: 2023 + +data: + batch_size: 2048 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + label_len: 24 + normalizer: std + num_nodes: 1024 + steps_per_day: 48 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + activation: gelu + seq_len: 24 + label_len: 12 + pred_len: 24 + d_model: 128 + d_ff: 2048 + dropout: 0.1 + e_layers: 2 + d_layers: 1 + n_heads: 8 + output_attention: False + factor: 5 + attn: prob + embed: fixed + freq: h + distil: true + mix: true + enc_in: 1 + dec_in: 1 + c_out: 1 + + +train: + batch_size: 2048 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + label_len: 24 + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.0001 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 6 + plot: false + pred_len: 24 + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/Informer/BJTaxi-OutFlow.yaml b/config/Informer/BJTaxi-OutFlow.yaml new file mode 100644 index 0000000..875cce8 --- /dev/null +++ b/config/Informer/BJTaxi-OutFlow.yaml @@ -0,0 +1,66 @@ +basic: + dataset: BJTaxi-OutFlow + device: cuda:0 + mode: train + model: Informer + seed: 2023 + +data: + batch_size: 2048 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + label_len: 24 + normalizer: std + num_nodes: 1024 + steps_per_day: 48 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + activation: gelu + seq_len: 24 + label_len: 12 + pred_len: 24 + d_model: 128 + d_ff: 2048 + dropout: 0.1 + e_layers: 2 + d_layers: 1 + n_heads: 8 + output_attention: False + factor: 5 + attn: prob + embed: fixed + freq: h + distil: true + mix: true + enc_in: 1 + dec_in: 1 + c_out: 1 + + +train: + batch_size: 2048 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + label_len: 24 + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.0001 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + pred_len: 24 + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/Informer/METR-LA.yaml b/config/Informer/METR-LA.yaml new file mode 100644 index 0000000..731fa3e --- /dev/null +++ b/config/Informer/METR-LA.yaml @@ -0,0 +1,66 @@ +basic: + dataset: METR-LA + device: cuda:0 + mode: train + model: Informer + seed: 2023 + +data: + batch_size: 256 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + label_len: 24 + normalizer: std + num_nodes: 207 + steps_per_day: 288 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + activation: gelu + seq_len: 24 + label_len: 12 + pred_len: 24 + d_model: 128 + d_ff: 2048 + dropout: 0.1 + e_layers: 2 + d_layers: 1 + n_heads: 8 + output_attention: False + factor: 5 + attn: prob + embed: fixed + freq: h + distil: true + mix: true + enc_in: 1 + dec_in: 1 + c_out: 1 + + +train: + batch_size: 256 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + label_len: 24 + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.0001 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + pred_len: 24 + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/Informer/NYCBike-InFlow.yaml b/config/Informer/NYCBike-InFlow.yaml new file mode 100644 index 0000000..30ca485 --- /dev/null +++ b/config/Informer/NYCBike-InFlow.yaml @@ -0,0 +1,66 @@ +basic: + dataset: NYCBike-InFlow + device: cuda:0 + mode: train + model: Informer + seed: 2023 + +data: + batch_size: 256 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + label_len: 24 + normalizer: std + num_nodes: 128 + steps_per_day: 48 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + activation: gelu + seq_len: 24 + label_len: 12 + pred_len: 24 + d_model: 128 + d_ff: 2048 + dropout: 0.1 + e_layers: 2 + d_layers: 1 + n_heads: 8 + output_attention: False + factor: 5 + attn: prob + embed: fixed + freq: h + distil: true + mix: true + enc_in: 1 + dec_in: 1 + c_out: 1 + + +train: + batch_size: 256 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + label_len: 24 + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.0001 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + pred_len: 24 + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/Informer/NYCBike-OutFlow.yaml b/config/Informer/NYCBike-OutFlow.yaml new file mode 100644 index 0000000..9fcfe6b --- /dev/null +++ b/config/Informer/NYCBike-OutFlow.yaml @@ -0,0 +1,66 @@ +basic: + dataset: NYCBike-OutFlow + device: cuda:0 + mode: train + model: Informer + seed: 2023 + +data: + batch_size: 256 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + label_len: 24 + normalizer: std + num_nodes: 128 + steps_per_day: 48 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + activation: gelu + seq_len: 24 + label_len: 12 + pred_len: 24 + d_model: 128 + d_ff: 2048 + dropout: 0.1 + e_layers: 2 + d_layers: 1 + n_heads: 8 + output_attention: False + factor: 5 + attn: prob + embed: fixed + freq: h + distil: true + mix: true + enc_in: 1 + dec_in: 1 + c_out: 1 + + +train: + batch_size: 256 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + label_len: 24 + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.0001 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + pred_len: 24 + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/Informer/PEMS-BAY.yaml b/config/Informer/PEMS-BAY.yaml new file mode 100644 index 0000000..961bd6f --- /dev/null +++ b/config/Informer/PEMS-BAY.yaml @@ -0,0 +1,66 @@ +basic: + dataset: PEMS-BAY + device: cuda:0 + mode: train + model: Informer + seed: 2023 + +data: + batch_size: 2048 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + label_len: 24 + normalizer: std + num_nodes: 325 + steps_per_day: 288 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + activation: gelu + seq_len: 24 + label_len: 12 + pred_len: 24 + d_model: 128 + d_ff: 2048 + dropout: 0.1 + e_layers: 2 + d_layers: 1 + n_heads: 8 + output_attention: False + factor: 5 + attn: prob + embed: fixed + freq: h + distil: true + mix: true + enc_in: 1 + dec_in: 1 + c_out: 1 + + +train: + batch_size: 2048 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + label_len: 24 + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.0001 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 6 + plot: false + pred_len: 24 + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/Informer/SolarEnergy.yaml b/config/Informer/SolarEnergy.yaml new file mode 100644 index 0000000..0d31425 --- /dev/null +++ b/config/Informer/SolarEnergy.yaml @@ -0,0 +1,66 @@ +basic: + dataset: SolarEnergy + device: cuda:0 + mode: train + model: Informer + seed: 2023 + +data: + batch_size: 1024 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 6 + lag: 24 + label_len: 24 + normalizer: std + num_nodes: 137 + steps_per_day: 24 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + activation: gelu + seq_len: 24 + label_len: 12 + pred_len: 24 + d_model: 128 + d_ff: 2048 + dropout: 0.1 + e_layers: 2 + d_layers: 1 + n_heads: 8 + output_attention: False + factor: 5 + attn: prob + embed: fixed + freq: h + distil: true + mix: true + enc_in: 1 + dec_in: 1 + c_out: 1 + + +train: + batch_size: 1024 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + label_len: 24 + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.0001 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + pred_len: 24 + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/MTGNN/AirQuality.yaml b/config/MTGNN/AirQuality.yaml new file mode 100644 index 0000000..9846895 --- /dev/null +++ b/config/MTGNN/AirQuality.yaml @@ -0,0 +1,64 @@ +basic: + dataset: AirQuality + device: cuda:0 + mode: train + model: MTGNN + seed: 2023 + +data: + batch_size: 64 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 6 + lag: 24 + normalizer: std + num_nodes: 35 + steps_per_day: 24 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + gcn_true: True # 是否使用图卷积网络 (bool) + buildA_true: True # 是否动态构建邻接矩阵 (bool) + subgraph_size: 20 # 子图大小 (int) + num_nodes: 35 # 节点数量 (int) + node_dim: 40 # 节点嵌入维度 (int) + dilation_exponential: 1 # 膨胀卷积指数 (int) + conv_channels: 32 # 卷积通道数 (int) + residual_channels: 32 # 残差通道数 (int) + skip_channels: 64 # 跳跃连接通道数 (int) + end_channels: 128 # 输出层通道数 (int) + seq_len: 24 # 输入序列长度 (int) + in_dim: 6 # 输入特征维度 (int) + out_len: 24 # 输出序列长度 (int) + out_dim: 6 # 输出预测维度 (int) + layers: 3 # 模型层数 (int) + propalpha: 0.05 # 图传播参数alpha (float) + tanhalpha: 3 # tanh激活参数alpha (float) + layer_norm_affline: True # 层归一化是否使用affine变换 (bool) + gcn_depth: 2 # 图卷积深度 (int) + dropout: 0.3 # dropout率 (float) + predefined_A: null # 预定义邻接矩阵 (optional, None) + static_feat: null # 静态特征 (optional, None) + +train: + batch_size: 64 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.003 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 6 + plot: false + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/MTGNN/BJTaxi-InFlow.yaml b/config/MTGNN/BJTaxi-InFlow.yaml new file mode 100644 index 0000000..09e453a --- /dev/null +++ b/config/MTGNN/BJTaxi-InFlow.yaml @@ -0,0 +1,64 @@ +basic: + dataset: BJTaxi-InFlow + device: cuda:0 + mode: train + model: MTGNN + seed: 2023 + +data: + batch_size: 64 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 1024 + steps_per_day: 48 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + gcn_true: True # 是否使用图卷积网络 (bool) + buildA_true: True # 是否动态构建邻接矩阵 (bool) + subgraph_size: 20 # 子图大小 (int) + num_nodes: 1024 # 节点数量 (int) + node_dim: 40 # 节点嵌入维度 (int) + dilation_exponential: 1 # 膨胀卷积指数 (int) + conv_channels: 32 # 卷积通道数 (int) + residual_channels: 32 # 残差通道数 (int) + skip_channels: 64 # 跳跃连接通道数 (int) + end_channels: 128 # 输出层通道数 (int) + seq_len: 24 # 输入序列长度 (int) + in_dim: 1 # 输入特征维度 (int) + out_len: 24 # 输出序列长度 (int) + out_dim: 1 # 输出预测维度 (int) + layers: 3 # 模型层数 (int) + propalpha: 0.05 # 图传播参数alpha (float) + tanhalpha: 3 # tanh激活参数alpha (float) + layer_norm_affline: True # 层归一化是否使用affine变换 (bool) + gcn_depth: 2 # 图卷积深度 (int) + dropout: 0.3 # dropout率 (float) + predefined_A: null # 预定义邻接矩阵 (optional, None) + static_feat: null # 静态特征 (optional, None) + +train: + batch_size: 64 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.003 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/MTGNN/BJTaxi-OutFlow.yaml b/config/MTGNN/BJTaxi-OutFlow.yaml new file mode 100644 index 0000000..1b62a4e --- /dev/null +++ b/config/MTGNN/BJTaxi-OutFlow.yaml @@ -0,0 +1,64 @@ +basic: + dataset: BJTaxi-OutFlow + device: cuda:0 + mode: train + model: MTGNN + seed: 2023 + +data: + batch_size: 64 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 1024 + steps_per_day: 48 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + gcn_true: True # 是否使用图卷积网络 (bool) + buildA_true: True # 是否动态构建邻接矩阵 (bool) + subgraph_size: 20 # 子图大小 (int) + num_nodes: 1024 # 节点数量 (int) + node_dim: 40 # 节点嵌入维度 (int) + dilation_exponential: 1 # 膨胀卷积指数 (int) + conv_channels: 32 # 卷积通道数 (int) + residual_channels: 32 # 残差通道数 (int) + skip_channels: 64 # 跳跃连接通道数 (int) + end_channels: 128 # 输出层通道数 (int) + seq_len: 24 # 输入序列长度 (int) + in_dim: 1 # 输入特征维度 (int) + out_len: 24 # 输出序列长度 (int) + out_dim: 1 # 输出预测维度 (int) + layers: 3 # 模型层数 (int) + propalpha: 0.05 # 图传播参数alpha (float) + tanhalpha: 3 # tanh激活参数alpha (float) + layer_norm_affline: True # 层归一化是否使用affine变换 (bool) + gcn_depth: 2 # 图卷积深度 (int) + dropout: 0.3 # dropout率 (float) + predefined_A: null # 预定义邻接矩阵 (optional, None) + static_feat: null # 静态特征 (optional, None) + +train: + batch_size: 64 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.003 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/MTGNN/METR-LA.yaml b/config/MTGNN/METR-LA.yaml new file mode 100644 index 0000000..2518638 --- /dev/null +++ b/config/MTGNN/METR-LA.yaml @@ -0,0 +1,64 @@ +basic: + dataset: METR-LA + device: cuda:1 + mode: train + model: MTGNN + seed: 2023 + +data: + batch_size: 64 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 207 + steps_per_day: 288 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + gcn_true: True # 是否使用图卷积网络 (bool) + buildA_true: True # 是否动态构建邻接矩阵 (bool) + subgraph_size: 20 # 子图大小 (int) + num_nodes: 207 # 节点数量 (int) + node_dim: 40 # 节点嵌入维度 (int) + dilation_exponential: 1 # 膨胀卷积指数 (int) + conv_channels: 32 # 卷积通道数 (int) + residual_channels: 32 # 残差通道数 (int) + skip_channels: 64 # 跳跃连接通道数 (int) + end_channels: 128 # 输出层通道数 (int) + seq_len: 24 # 输入序列长度 (int) + in_dim: 1 # 输入特征维度 (int) + out_len: 24 # 输出序列长度 (int) + out_dim: 1 # 输出预测维度 (int) + layers: 3 # 模型层数 (int) + propalpha: 0.05 # 图传播参数alpha (float) + tanhalpha: 3 # tanh激活参数alpha (float) + layer_norm_affline: True # 层归一化是否使用affine变换 (bool) + gcn_depth: 2 # 图卷积深度 (int) + dropout: 0.3 # dropout率 (float) + predefined_A: null # 预定义邻接矩阵 (optional, None) + static_feat: null # 静态特征 (optional, None) + +train: + batch_size: 64 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.003 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/MTGNN/NYCBike-InFlow.yaml b/config/MTGNN/NYCBike-InFlow.yaml new file mode 100644 index 0000000..95ae41b --- /dev/null +++ b/config/MTGNN/NYCBike-InFlow.yaml @@ -0,0 +1,64 @@ +basic: + dataset: NYCBike-InFlow + device: cuda:0 + mode: train + model: MTGNN + seed: 2023 + +data: + batch_size: 64 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 128 + steps_per_day: 48 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + gcn_true: True # 是否使用图卷积网络 (bool) + buildA_true: True # 是否动态构建邻接矩阵 (bool) + subgraph_size: 20 # 子图大小 (int) + num_nodes: 128 # 节点数量 (int) + node_dim: 40 # 节点嵌入维度 (int) + dilation_exponential: 1 # 膨胀卷积指数 (int) + conv_channels: 32 # 卷积通道数 (int) + residual_channels: 32 # 残差通道数 (int) + skip_channels: 64 # 跳跃连接通道数 (int) + end_channels: 128 # 输出层通道数 (int) + seq_len: 24 # 输入序列长度 (int) + in_dim: 1 # 输入特征维度 (int) + out_len: 24 # 输出序列长度 (int) + out_dim: 1 # 输出预测维度 (int) + layers: 3 # 模型层数 (int) + propalpha: 0.05 # 图传播参数alpha (float) + tanhalpha: 3 # tanh激活参数alpha (float) + layer_norm_affline: True # 层归一化是否使用affine变换 (bool) + gcn_depth: 2 # 图卷积深度 (int) + dropout: 0.3 # dropout率 (float) + predefined_A: null # 预定义邻接矩阵 (optional, None) + static_feat: null # 静态特征 (optional, None) + +train: + batch_size: 64 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.003 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/MTGNN/NYCBike-OutFlow.yaml b/config/MTGNN/NYCBike-OutFlow.yaml new file mode 100644 index 0000000..b1646ea --- /dev/null +++ b/config/MTGNN/NYCBike-OutFlow.yaml @@ -0,0 +1,64 @@ +basic: + dataset: NYCBike-OutFlow + device: cuda:0 + mode: train + model: MTGNN + seed: 2023 + +data: + batch_size: 64 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 128 + steps_per_day: 48 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + gcn_true: True # 是否使用图卷积网络 (bool) + buildA_true: True # 是否动态构建邻接矩阵 (bool) + subgraph_size: 20 # 子图大小 (int) + num_nodes: 128 # 节点数量 (int) + node_dim: 40 # 节点嵌入维度 (int) + dilation_exponential: 1 # 膨胀卷积指数 (int) + conv_channels: 32 # 卷积通道数 (int) + residual_channels: 32 # 残差通道数 (int) + skip_channels: 64 # 跳跃连接通道数 (int) + end_channels: 128 # 输出层通道数 (int) + seq_len: 24 # 输入序列长度 (int) + in_dim: 1 # 输入特征维度 (int) + out_len: 24 # 输出序列长度 (int) + out_dim: 1 # 输出预测维度 (int) + layers: 3 # 模型层数 (int) + propalpha: 0.05 # 图传播参数alpha (float) + tanhalpha: 3 # tanh激活参数alpha (float) + layer_norm_affline: True # 层归一化是否使用affine变换 (bool) + gcn_depth: 2 # 图卷积深度 (int) + dropout: 0.3 # dropout率 (float) + predefined_A: null # 预定义邻接矩阵 (optional, None) + static_feat: null # 静态特征 (optional, None) + +train: + batch_size: 64 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.003 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/MTGNN/PEMS-BAY.yaml b/config/MTGNN/PEMS-BAY.yaml new file mode 100644 index 0000000..7f28aca --- /dev/null +++ b/config/MTGNN/PEMS-BAY.yaml @@ -0,0 +1,64 @@ +basic: + dataset: PEMS-BAY + device: cuda:0 + mode: train + model: MTGNN + seed: 2023 + +data: + batch_size: 64 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 325 + steps_per_day: 288 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + gcn_true: True # 是否使用图卷积网络 (bool) + buildA_true: True # 是否动态构建邻接矩阵 (bool) + subgraph_size: 20 # 子图大小 (int) + num_nodes: 325 # 节点数量 (int) + node_dim: 40 # 节点嵌入维度 (int) + dilation_exponential: 1 # 膨胀卷积指数 (int) + conv_channels: 32 # 卷积通道数 (int) + residual_channels: 32 # 残差通道数 (int) + skip_channels: 64 # 跳跃连接通道数 (int) + end_channels: 128 # 输出层通道数 (int) + seq_len: 24 # 输入序列长度 (int) + in_dim: 1 # 输入特征维度 (int) + out_len: 24 # 输出序列长度 (int) + out_dim: 1 # 输出预测维度 (int) + layers: 3 # 模型层数 (int) + propalpha: 0.05 # 图传播参数alpha (float) + tanhalpha: 3 # tanh激活参数alpha (float) + layer_norm_affline: True # 层归一化是否使用affine变换 (bool) + gcn_depth: 2 # 图卷积深度 (int) + dropout: 0.3 # dropout率 (float) + predefined_A: null # 预定义邻接矩阵 (optional, None) + static_feat: null # 静态特征 (optional, None) + +train: + batch_size: 64 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.003 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/MTGNN/SolarEnergy.yaml b/config/MTGNN/SolarEnergy.yaml new file mode 100644 index 0000000..57e17c8 --- /dev/null +++ b/config/MTGNN/SolarEnergy.yaml @@ -0,0 +1,64 @@ +basic: + dataset: SolarEnergy + device: cuda:0 + mode: train + model: MTGNN + seed: 2023 + +data: + batch_size: 64 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 137 + steps_per_day: 24 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + gcn_true: True # 是否使用图卷积网络 (bool) + buildA_true: True # 是否动态构建邻接矩阵 (bool) + subgraph_size: 20 # 子图大小 (int) + num_nodes: 137 # 节点数量 (int) + node_dim: 40 # 节点嵌入维度 (int) + dilation_exponential: 1 # 膨胀卷积指数 (int) + conv_channels: 32 # 卷积通道数 (int) + residual_channels: 32 # 残差通道数 (int) + skip_channels: 64 # 跳跃连接通道数 (int) + end_channels: 128 # 输出层通道数 (int) + seq_len: 24 # 输入序列长度 (int) + in_dim: 1 # 输入特征维度 (int) + out_len: 24 # 输出序列长度 (int) + out_dim: 1 # 输出预测维度 (int) + layers: 3 # 模型层数 (int) + propalpha: 0.05 # 图传播参数alpha (float) + tanhalpha: 3 # tanh激活参数alpha (float) + layer_norm_affline: True # 层归一化是否使用affine变换 (bool) + gcn_depth: 2 # 图卷积深度 (int) + dropout: 0.3 # dropout率 (float) + predefined_A: null # 预定义邻接矩阵 (optional, None) + static_feat: null # 静态特征 (optional, None) + +train: + batch_size: 64 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.003 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/MegaCRN/AirQuality.yaml b/config/MegaCRN/AirQuality.yaml index 66583fe..c7fdfe8 100644 --- a/config/MegaCRN/AirQuality.yaml +++ b/config/MegaCRN/AirQuality.yaml @@ -4,6 +4,7 @@ basic: mode: train model: MegaCRN seed: 2023 + data: batch_size: 16 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 6 lag: 24 normalizer: std - num_nodes: 35 + num_nodes: 12 steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_k: 3 cl_decay_steps: 2000 @@ -23,10 +25,12 @@ model: mem_dim: 64 mem_num: 20 num_layers: 1 + num_nodes: 12 output_dim: 6 rnn_units: 64 use_curriculum_learning: true ycov_dim: 1 + train: batch_size: 16 debug: false @@ -40,11 +44,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 6 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/MegaCRN/BJTaxi-InFlow.yaml b/config/MegaCRN/BJTaxi-InFlow.yaml index c1e5954..b6b0fd5 100644 --- a/config/MegaCRN/BJTaxi-InFlow.yaml +++ b/config/MegaCRN/BJTaxi-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: MegaCRN seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_k: 3 cl_decay_steps: 2000 @@ -23,10 +25,12 @@ model: mem_dim: 64 mem_num: 20 num_layers: 1 + num_nodes: 1024 output_dim: 1 rnn_units: 64 use_curriculum_learning: true ycov_dim: 1 + train: batch_size: 32 debug: false @@ -40,11 +44,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/MegaCRN/BJTaxi-OutFlow.yaml b/config/MegaCRN/BJTaxi-OutFlow.yaml index df43640..41602a5 100644 --- a/config/MegaCRN/BJTaxi-OutFlow.yaml +++ b/config/MegaCRN/BJTaxi-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: MegaCRN seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_k: 3 cl_decay_steps: 2000 @@ -23,10 +25,12 @@ model: mem_dim: 64 mem_num: 20 num_layers: 1 + num_nodes: 1024 output_dim: 1 rnn_units: 64 use_curriculum_learning: true ycov_dim: 1 + train: batch_size: 32 debug: false @@ -40,11 +44,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/MegaCRN/METR-LA.yaml b/config/MegaCRN/METR-LA.yaml index 9be97b9..c3e7805 100644 --- a/config/MegaCRN/METR-LA.yaml +++ b/config/MegaCRN/METR-LA.yaml @@ -4,6 +4,7 @@ basic: mode: train model: MegaCRN seed: 2023 + data: batch_size: 16 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 288 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_k: 3 cl_decay_steps: 2000 @@ -23,10 +25,12 @@ model: mem_dim: 64 mem_num: 20 num_layers: 1 + num_nodes: 207 output_dim: 1 rnn_units: 64 use_curriculum_learning: true ycov_dim: 1 + train: batch_size: 16 debug: false @@ -40,11 +44,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/MegaCRN/NYCBike-InFlow.yaml b/config/MegaCRN/NYCBike-InFlow.yaml index ef35650..de90784 100644 --- a/config/MegaCRN/NYCBike-InFlow.yaml +++ b/config/MegaCRN/NYCBike-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: MegaCRN seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_k: 3 cl_decay_steps: 2000 @@ -23,10 +25,12 @@ model: mem_dim: 64 mem_num: 20 num_layers: 1 + num_nodes: 128 output_dim: 1 rnn_units: 64 use_curriculum_learning: true ycov_dim: 1 + train: batch_size: 32 debug: false @@ -40,11 +44,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/MegaCRN/NYCBike-OutFlow.yaml b/config/MegaCRN/NYCBike-OutFlow.yaml index 85465f7..ec0487b 100644 --- a/config/MegaCRN/NYCBike-OutFlow.yaml +++ b/config/MegaCRN/NYCBike-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: MegaCRN seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_k: 3 cl_decay_steps: 2000 @@ -23,10 +25,12 @@ model: mem_dim: 64 mem_num: 20 num_layers: 1 + num_nodes: 128 output_dim: 1 rnn_units: 64 use_curriculum_learning: true ycov_dim: 1 + train: batch_size: 32 debug: false @@ -40,11 +44,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/MegaCRN/PEMSD3.yaml b/config/MegaCRN/PEMSD3.yaml index 2716192..5814af0 100644 --- a/config/MegaCRN/PEMSD3.yaml +++ b/config/MegaCRN/PEMSD3.yaml @@ -25,6 +25,7 @@ model: mem_dim: 64 mem_num: 20 num_layers: 1 + num_nodes: 358 output_dim: 1 rnn_units: 64 use_curriculum_learning: true @@ -43,11 +44,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/MegaCRN/PEMSD4.yaml b/config/MegaCRN/PEMSD4.yaml index 2ed68ca..d3c06c9 100644 --- a/config/MegaCRN/PEMSD4.yaml +++ b/config/MegaCRN/PEMSD4.yaml @@ -25,6 +25,7 @@ model: mem_dim: 64 mem_num: 20 num_layers: 1 + num_nodes: 307 output_dim: 1 rnn_units: 64 use_curriculum_learning: true @@ -43,11 +44,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/MegaCRN/PEMSD7.yaml b/config/MegaCRN/PEMSD7.yaml index 47e34f4..b83d7b3 100644 --- a/config/MegaCRN/PEMSD7.yaml +++ b/config/MegaCRN/PEMSD7.yaml @@ -25,6 +25,7 @@ model: mem_dim: 64 mem_num: 20 num_layers: 1 + num_nodes: 883 output_dim: 1 rnn_units: 64 use_curriculum_learning: true @@ -43,11 +44,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/MegaCRN/PEMSD8.yaml b/config/MegaCRN/PEMSD8.yaml index aeda484..ae40736 100644 --- a/config/MegaCRN/PEMSD8.yaml +++ b/config/MegaCRN/PEMSD8.yaml @@ -25,6 +25,7 @@ model: mem_dim: 64 mem_num: 20 num_layers: 1 + num_nodes: 170 output_dim: 1 rnn_units: 64 use_curriculum_learning: true @@ -43,11 +44,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/MegaCRN/SolarEnergy.yaml b/config/MegaCRN/SolarEnergy.yaml index ae10bdc..669c0c8 100644 --- a/config/MegaCRN/SolarEnergy.yaml +++ b/config/MegaCRN/SolarEnergy.yaml @@ -4,6 +4,7 @@ basic: mode: train model: MegaCRN seed: 2023 + data: batch_size: 16 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_k: 3 cl_decay_steps: 2000 @@ -23,10 +25,12 @@ model: mem_dim: 64 mem_num: 20 num_layers: 1 + num_nodes: 137 output_dim: 137 rnn_units: 64 use_curriculum_learning: true ycov_dim: 1 + train: batch_size: 16 debug: false @@ -40,11 +44,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 137 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/NLT/AirQuality.yaml b/config/NLT/AirQuality.yaml index e5c6a67..c6fa211 100644 --- a/config/NLT/AirQuality.yaml +++ b/config/NLT/AirQuality.yaml @@ -4,6 +4,7 @@ basic: mode: train model: NLT seed: 2023 + data: batch_size: 16 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 6 lag: 24 normalizer: std - num_nodes: 35 + num_nodes: 12 steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 + model: embed_dim: 10 feature_dim: 6 @@ -26,10 +28,12 @@ model: natt_hops: 4 nfc: 256 num_layers: 2 + num_nodes: 12 output_dim: 6 output_window: 24 use_day: false use_week: false + train: batch_size: 16 debug: false @@ -43,11 +47,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 6 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/NLT/BJTaxi-InFlow.yaml b/config/NLT/BJTaxi-InFlow.yaml index 8f54e18..8c918ab 100644 --- a/config/NLT/BJTaxi-InFlow.yaml +++ b/config/NLT/BJTaxi-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: NLT seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: embed_dim: 10 feature_dim: 1 @@ -26,10 +28,12 @@ model: natt_hops: 4 nfc: 256 num_layers: 2 + num_nodes: 1024 output_dim: 1 output_window: 24 use_day: false use_week: false + train: batch_size: 32 debug: false @@ -43,11 +47,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/NLT/BJTaxi-OutFlow.yaml b/config/NLT/BJTaxi-OutFlow.yaml index 5e989ba..e537d52 100644 --- a/config/NLT/BJTaxi-OutFlow.yaml +++ b/config/NLT/BJTaxi-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: NLT seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: embed_dim: 10 feature_dim: 1 @@ -26,10 +28,12 @@ model: natt_hops: 4 nfc: 256 num_layers: 2 + num_nodes: 1024 output_dim: 1 output_window: 24 use_day: false use_week: false + train: batch_size: 32 debug: false @@ -43,11 +47,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/NLT/METR-LA.yaml b/config/NLT/METR-LA.yaml index bcfc403..03601e9 100644 --- a/config/NLT/METR-LA.yaml +++ b/config/NLT/METR-LA.yaml @@ -4,6 +4,7 @@ basic: mode: train model: NLT seed: 2023 + data: batch_size: 16 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 288 test_ratio: 0.2 val_ratio: 0.2 + model: embed_dim: 10 feature_dim: 1 @@ -26,10 +28,12 @@ model: natt_hops: 4 nfc: 256 num_layers: 2 + num_nodes: 207 output_dim: 1 output_window: 12 use_day: false use_week: false + train: batch_size: 16 debug: false @@ -43,11 +47,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/NLT/NYCBike-InFlow.yaml b/config/NLT/NYCBike-InFlow.yaml index b6ac09a..bde93dc 100644 --- a/config/NLT/NYCBike-InFlow.yaml +++ b/config/NLT/NYCBike-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: NLT seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: embed_dim: 10 feature_dim: 1 @@ -26,10 +28,12 @@ model: natt_hops: 4 nfc: 256 num_layers: 2 + num_nodes: 128 output_dim: 1 output_window: 24 use_day: false use_week: false + train: batch_size: 32 debug: false @@ -43,11 +47,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/NLT/NYCBike-OutFlow.yaml b/config/NLT/NYCBike-OutFlow.yaml index 5e801b2..8c24df4 100644 --- a/config/NLT/NYCBike-OutFlow.yaml +++ b/config/NLT/NYCBike-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: NLT seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: embed_dim: 10 feature_dim: 1 @@ -26,10 +28,12 @@ model: natt_hops: 4 nfc: 256 num_layers: 2 + num_nodes: 128 output_dim: 1 output_window: 24 use_day: false use_week: false + train: batch_size: 32 debug: false @@ -43,11 +47,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/NLT/PEMSD3.yaml b/config/NLT/PEMSD3.yaml index 7212f76..8086056 100755 --- a/config/NLT/PEMSD3.yaml +++ b/config/NLT/PEMSD3.yaml @@ -28,6 +28,7 @@ model: natt_hops: 4 nfc: 256 num_layers: 2 + num_nodes: 358 output_dim: 1 output_window: 12 use_day: false @@ -46,11 +47,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/NLT/PEMSD4.yaml b/config/NLT/PEMSD4.yaml index b924b9f..6d41a7c 100755 --- a/config/NLT/PEMSD4.yaml +++ b/config/NLT/PEMSD4.yaml @@ -28,6 +28,7 @@ model: natt_hops: 4 nfc: 256 num_layers: 2 + num_nodes: 307 output_dim: 1 output_window: 12 use_day: false @@ -46,11 +47,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/NLT/PEMSD7.yaml b/config/NLT/PEMSD7.yaml index c07708c..7a9783d 100755 --- a/config/NLT/PEMSD7.yaml +++ b/config/NLT/PEMSD7.yaml @@ -28,6 +28,7 @@ model: natt_hops: 4 nfc: 256 num_layers: 2 + num_nodes: 883 output_dim: 1 output_window: 12 use_day: false @@ -46,11 +47,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/NLT/PEMSD8.yaml b/config/NLT/PEMSD8.yaml index 9a3441f..de8494b 100755 --- a/config/NLT/PEMSD8.yaml +++ b/config/NLT/PEMSD8.yaml @@ -28,6 +28,7 @@ model: natt_hops: 4 nfc: 256 num_layers: 2 + num_nodes: 170 output_dim: 1 output_window: 12 use_day: false @@ -46,11 +47,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/NLT/SolarEnergy.yaml b/config/NLT/SolarEnergy.yaml index 9bcd7af..f9da4af 100644 --- a/config/NLT/SolarEnergy.yaml +++ b/config/NLT/SolarEnergy.yaml @@ -4,6 +4,7 @@ basic: mode: train model: NLT seed: 2023 + data: batch_size: 16 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 + model: embed_dim: 10 feature_dim: 137 @@ -26,10 +28,12 @@ model: natt_hops: 4 nfc: 256 num_layers: 2 + num_nodes: 137 output_dim: 137 output_window: 24 use_day: false use_week: false + train: batch_size: 16 debug: false @@ -43,11 +47,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 137 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/PDG2SEQ/AirQuality.yaml b/config/PDG2SEQ/AirQuality.yaml index 27ec4a2..a2ad31c 100644 --- a/config/PDG2SEQ/AirQuality.yaml +++ b/config/PDG2SEQ/AirQuality.yaml @@ -4,6 +4,7 @@ basic: mode: train model: PDG2SEQ seed: 2023 + data: batch_size: 16 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 6 lag: 24 normalizer: std - num_nodes: 35 + num_nodes: 12 steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_k: 2 embed_dim: 12 @@ -23,11 +25,13 @@ model: lr_decay_step: 10000 lr_decay_step1: 75,90,120 num_layers: 1 + num_nodes: 12 output_dim: 6 rnn_units: 64 time_dim: 8 use_day: true use_week: true + train: batch_size: 16 debug: false @@ -41,11 +45,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 6 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/PDG2SEQ/BJTaxi-InFlow.yaml b/config/PDG2SEQ/BJTaxi-InFlow.yaml index 5cbdf37..917b505 100644 --- a/config/PDG2SEQ/BJTaxi-InFlow.yaml +++ b/config/PDG2SEQ/BJTaxi-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: PDG2SEQ seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_k: 2 embed_dim: 12 @@ -23,11 +25,13 @@ model: lr_decay_step: 10000 lr_decay_step1: 75,90,120 num_layers: 1 + num_nodes: 1024 output_dim: 1 rnn_units: 64 time_dim: 8 use_day: true use_week: true + train: batch_size: 32 debug: false @@ -41,11 +45,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/PDG2SEQ/BJTaxi-OutFlow.yaml b/config/PDG2SEQ/BJTaxi-OutFlow.yaml index f50e98a..a5ccc47 100644 --- a/config/PDG2SEQ/BJTaxi-OutFlow.yaml +++ b/config/PDG2SEQ/BJTaxi-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: PDG2SEQ seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_k: 2 embed_dim: 12 @@ -23,11 +25,13 @@ model: lr_decay_step: 10000 lr_decay_step1: 75,90,120 num_layers: 1 + num_nodes: 1024 output_dim: 1 rnn_units: 64 time_dim: 8 use_day: true use_week: true + train: batch_size: 32 debug: false @@ -41,11 +45,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/PDG2SEQ/METR-LA.yaml b/config/PDG2SEQ/METR-LA.yaml index 0a52a6c..6999a03 100644 --- a/config/PDG2SEQ/METR-LA.yaml +++ b/config/PDG2SEQ/METR-LA.yaml @@ -4,6 +4,7 @@ basic: mode: train model: PDG2SEQ seed: 2023 + data: batch_size: 16 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 288 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_k: 2 embed_dim: 12 @@ -23,11 +25,13 @@ model: lr_decay_step: 10000 lr_decay_step1: 75,90,120 num_layers: 1 + num_nodes: 207 output_dim: 1 rnn_units: 64 time_dim: 8 use_day: true use_week: true + train: batch_size: 16 debug: false @@ -41,11 +45,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/PDG2SEQ/NYCBike-InFlow.yaml b/config/PDG2SEQ/NYCBike-InFlow.yaml index d898dcc..56c3abe 100644 --- a/config/PDG2SEQ/NYCBike-InFlow.yaml +++ b/config/PDG2SEQ/NYCBike-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: PDG2SEQ seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_k: 2 embed_dim: 12 @@ -23,11 +25,13 @@ model: lr_decay_step: 10000 lr_decay_step1: 75,90,120 num_layers: 1 + num_nodes: 128 output_dim: 1 rnn_units: 64 time_dim: 8 use_day: true use_week: true + train: batch_size: 32 debug: false @@ -41,11 +45,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/PDG2SEQ/NYCBike-OutFlow.yaml b/config/PDG2SEQ/NYCBike-OutFlow.yaml index 52dee49..39dc207 100644 --- a/config/PDG2SEQ/NYCBike-OutFlow.yaml +++ b/config/PDG2SEQ/NYCBike-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: PDG2SEQ seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_k: 2 embed_dim: 12 @@ -23,11 +25,13 @@ model: lr_decay_step: 10000 lr_decay_step1: 75,90,120 num_layers: 1 + num_nodes: 128 output_dim: 1 rnn_units: 64 time_dim: 8 use_day: true use_week: true + train: batch_size: 32 debug: false @@ -41,11 +45,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/PDG2SEQ/PEMSD3.yaml b/config/PDG2SEQ/PEMSD3.yaml index 015116b..f0e6730 100755 --- a/config/PDG2SEQ/PEMSD3.yaml +++ b/config/PDG2SEQ/PEMSD3.yaml @@ -25,6 +25,7 @@ model: lr_decay_step: 10000 lr_decay_step1: 75,90,120 num_layers: 1 + num_nodes: 358 output_dim: 1 rnn_units: 64 time_dim: 8 @@ -44,11 +45,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/PDG2SEQ/PEMSD4.yaml b/config/PDG2SEQ/PEMSD4.yaml index a4cb033..3f28b2e 100755 --- a/config/PDG2SEQ/PEMSD4.yaml +++ b/config/PDG2SEQ/PEMSD4.yaml @@ -25,6 +25,7 @@ model: lr_decay_step: 1500 lr_decay_step1: 60,75,90,120 num_layers: 1 + num_nodes: 307 output_dim: 1 rnn_units: 64 time_dim: 16 @@ -44,11 +45,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/PDG2SEQ/PEMSD7.yaml b/config/PDG2SEQ/PEMSD7.yaml index 5cd0707..4922dfc 100755 --- a/config/PDG2SEQ/PEMSD7.yaml +++ b/config/PDG2SEQ/PEMSD7.yaml @@ -25,6 +25,7 @@ model: lr_decay_step: 12000 lr_decay_step1: 80,100,120 num_layers: 1 + num_nodes: 883 output_dim: 1 rnn_units: 64 time_dim: 20 @@ -54,5 +55,4 @@ train: output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/PDG2SEQ/PEMSD8.yaml b/config/PDG2SEQ/PEMSD8.yaml index f250216..3c25095 100755 --- a/config/PDG2SEQ/PEMSD8.yaml +++ b/config/PDG2SEQ/PEMSD8.yaml @@ -25,6 +25,7 @@ model: lr_decay_step: 2000 lr_decay_step1: 50,75 num_layers: 1 + num_nodes: 170 output_dim: 1 rnn_units: 64 time_dim: 16 @@ -50,5 +51,4 @@ train: output_dim: 1 plot: false real_value: true - seed: 12 weight_decay: 0 diff --git a/config/PDG2SEQ/SolarEnergy.yaml b/config/PDG2SEQ/SolarEnergy.yaml index 42c6d14..a04a56f 100644 --- a/config/PDG2SEQ/SolarEnergy.yaml +++ b/config/PDG2SEQ/SolarEnergy.yaml @@ -4,6 +4,7 @@ basic: mode: train model: PDG2SEQ seed: 2023 + data: batch_size: 16 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_k: 2 embed_dim: 12 @@ -23,11 +25,13 @@ model: lr_decay_step: 10000 lr_decay_step1: 75,90,120 num_layers: 1 + num_nodes: 137 output_dim: 137 rnn_units: 64 time_dim: 8 use_day: true use_week: true + train: batch_size: 16 debug: false @@ -41,11 +45,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 137 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/PatchTST/AirQuality.yaml b/config/PatchTST/AirQuality.yaml new file mode 100644 index 0000000..91a497e --- /dev/null +++ b/config/PatchTST/AirQuality.yaml @@ -0,0 +1,55 @@ +basic: + dataset: AirQuality + device: cuda:0 + mode: train + model: PatchTST + seed: 2023 + +data: + batch_size: 64 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 6 + lag: 24 + normalizer: std + num_nodes: 35 + steps_per_day: 24 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + activation: gelu + seq_len: 24 + pred_len: 24 + patch_len: 6 + enc_in: 6 + stride: 8 + d_model: 128 + d_ff: 2048 + dropout: 0.1 + e_layers: 2 + n_heads: 8 + output_attention: False + + +train: + batch_size: 64 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.0001 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 6 + plot: false + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/PatchTST/BJTaxi-InFlow.yaml b/config/PatchTST/BJTaxi-InFlow.yaml new file mode 100644 index 0000000..95ad0b1 --- /dev/null +++ b/config/PatchTST/BJTaxi-InFlow.yaml @@ -0,0 +1,55 @@ +basic: + dataset: BJTaxi-InFlow + device: cuda:0 + mode: train + model: PatchTST + seed: 2023 + +data: + batch_size: 16 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 1024 + steps_per_day: 48 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + activation: gelu + seq_len: 24 + pred_len: 24 + patch_len: 6 + enc_in: 1 + stride: 8 + d_model: 128 + d_ff: 2048 + dropout: 0.1 + e_layers: 2 + n_heads: 8 + output_attention: False + + +train: + batch_size: 16 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.0001 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/PatchTST/BJTaxi-OutFlow.yaml b/config/PatchTST/BJTaxi-OutFlow.yaml new file mode 100644 index 0000000..f416372 --- /dev/null +++ b/config/PatchTST/BJTaxi-OutFlow.yaml @@ -0,0 +1,55 @@ +basic: + dataset: BJTaxi-OutFlow + device: cuda:0 + mode: train + model: PatchTST + seed: 2023 + +data: + batch_size: 16 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 1024 + steps_per_day: 48 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + activation: gelu + seq_len: 24 + pred_len: 24 + patch_len: 6 + enc_in: 1 + stride: 8 + d_model: 128 + d_ff: 2048 + dropout: 0.1 + e_layers: 2 + n_heads: 8 + output_attention: False + + +train: + batch_size: 16 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.0001 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/PatchTST/METR-LA.yaml b/config/PatchTST/METR-LA.yaml new file mode 100644 index 0000000..3f88951 --- /dev/null +++ b/config/PatchTST/METR-LA.yaml @@ -0,0 +1,55 @@ +basic: + dataset: METR-LA + device: cuda:1 + mode: train + model: PatchTST + seed: 2023 + +data: + batch_size: 64 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 207 + steps_per_day: 288 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + activation: gelu + seq_len: 24 + pred_len: 24 + patch_len: 6 + enc_in: 1 + stride: 8 + d_model: 128 + d_ff: 2048 + dropout: 0.1 + e_layers: 2 + n_heads: 8 + output_attention: False + + +train: + batch_size: 64 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.0001 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/PatchTST/NYCBike-InFlow.yaml b/config/PatchTST/NYCBike-InFlow.yaml new file mode 100644 index 0000000..0f7bc97 --- /dev/null +++ b/config/PatchTST/NYCBike-InFlow.yaml @@ -0,0 +1,55 @@ +basic: + dataset: NYCBike-InFlow + device: cuda:0 + mode: train + model: PatchTST + seed: 2023 + +data: + batch_size: 64 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 128 + steps_per_day: 48 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + activation: gelu + seq_len: 24 + pred_len: 24 + patch_len: 6 + enc_in: 1 + stride: 8 + d_model: 128 + d_ff: 2048 + dropout: 0.1 + e_layers: 2 + n_heads: 8 + output_attention: False + + +train: + batch_size: 64 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.0001 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/PatchTST/NYCBike-OutFlow.yaml b/config/PatchTST/NYCBike-OutFlow.yaml new file mode 100644 index 0000000..516e1e1 --- /dev/null +++ b/config/PatchTST/NYCBike-OutFlow.yaml @@ -0,0 +1,55 @@ +basic: + dataset: NYCBike-OutFlow + device: cuda:0 + mode: train + model: PatchTST + seed: 2023 + +data: + batch_size: 64 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 128 + steps_per_day: 48 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + activation: gelu + seq_len: 24 + pred_len: 24 + patch_len: 6 + enc_in: 1 + stride: 8 + d_model: 128 + d_ff: 2048 + dropout: 0.1 + e_layers: 2 + n_heads: 8 + output_attention: False + + +train: + batch_size: 64 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.0001 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/PatchTST/PEMS-BAY.yaml b/config/PatchTST/PEMS-BAY.yaml new file mode 100644 index 0000000..ba93575 --- /dev/null +++ b/config/PatchTST/PEMS-BAY.yaml @@ -0,0 +1,55 @@ +basic: + dataset: PEMS-BAY + device: cuda:0 + mode: train + model: PatchTST + seed: 2023 + +data: + batch_size: 64 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 325 + steps_per_day: 288 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + activation: gelu + seq_len: 24 + pred_len: 24 + d_model: 128 + patch_len: 6 + enc_in: 1 + stride: 8 + d_ff: 2048 + dropout: 0.1 + e_layers: 2 + n_heads: 8 + output_attention: False + + +train: + batch_size: 64 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.0001 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/PatchTST/SolarEnergy.yaml b/config/PatchTST/SolarEnergy.yaml new file mode 100644 index 0000000..b6ca055 --- /dev/null +++ b/config/PatchTST/SolarEnergy.yaml @@ -0,0 +1,55 @@ +basic: + dataset: SolarEnergy + device: cuda:0 + mode: train + model: PatchTST + seed: 2023 + +data: + batch_size: 64 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 137 + steps_per_day: 24 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + activation: gelu + seq_len: 24 + pred_len: 24 + d_model: 128 + patch_len: 6 + enc_in: 6 + stride: 8 + d_ff: 2048 + dropout: 0.1 + e_layers: 2 + n_heads: 8 + output_attention: False + + +train: + batch_size: 64 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.0001 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/REPST/AirQuality.yaml b/config/REPST/AirQuality.yaml index f192382..8eeba4f 100755 --- a/config/REPST/AirQuality.yaml +++ b/config/REPST/AirQuality.yaml @@ -14,7 +14,7 @@ data: lag: 24 normalizer: std num_nodes: 35 - steps_per_day: 288 + steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 @@ -26,8 +26,8 @@ model: gpt_path: ./GPT-2 input_dim: 6 n_heads: 1 - num_nodes: 35 - output_dim: 3 + num_nodes: 12 + output_dim: 6 patch_len: 6 pred_len: 24 seq_len: 24 @@ -50,7 +50,7 @@ train: mae_thresh: None mape_thresh: 0.001 max_grad_norm: 5 - output_dim: 3 + output_dim: 6 plot: false real_value: true weight_decay: 0 diff --git a/config/REPST/BJTaxi-InFlow.yaml b/config/REPST/BJTaxi-InFlow.yaml old mode 100644 new mode 100755 index 56ccf66..e8a17fc --- a/config/REPST/BJTaxi-InFlow.yaml +++ b/config/REPST/BJTaxi-InFlow.yaml @@ -1,9 +1,10 @@ basic: dataset: BJTaxi-InFlow - device: cuda:1 + device: cuda:0 mode: train model: REPST seed: 2023 + data: batch_size: 16 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: d_ff: 128 d_model: 64 @@ -25,12 +27,12 @@ model: input_dim: 1 n_heads: 1 num_nodes: 1024 - output_dim: 1 patch_len: 6 pred_len: 24 seq_len: 24 stride: 7 word_num: 1000 + train: batch_size: 16 debug: false @@ -38,7 +40,7 @@ train: early_stop_patience: 15 epochs: 100 grad_norm: false - log_step: 1000 + log_step: 100 loss_func: mae lr_decay: true lr_decay_rate: 0.3 diff --git a/config/REPST/BJTaxi-OutFlow.yaml b/config/REPST/BJTaxi-OutFlow.yaml index 36dae39..2c251e6 100644 --- a/config/REPST/BJTaxi-OutFlow.yaml +++ b/config/REPST/BJTaxi-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: REPST seed: 2023 + data: batch_size: 16 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: d_ff: 128 d_model: 64 @@ -31,6 +33,7 @@ model: seq_len: 24 stride: 7 word_num: 1000 + train: batch_size: 16 debug: false diff --git a/config/REPST/NYCBike-InFlow.yaml b/config/REPST/NYCBike-InFlow.yaml index b63b151..3ed89c8 100644 --- a/config/REPST/NYCBike-InFlow.yaml +++ b/config/REPST/NYCBike-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: REPST seed: 2023 + data: batch_size: 16 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: d_ff: 128 d_model: 64 @@ -24,13 +26,14 @@ model: gpt_path: ./GPT-2 input_dim: 1 n_heads: 1 - num_nodes: 1024 + num_nodes: 128 output_dim: 1 patch_len: 6 pred_len: 24 seq_len: 24 stride: 7 word_num: 1000 + train: batch_size: 16 debug: false diff --git a/config/REPST/NYCBike-OutFlow.yaml b/config/REPST/NYCBike-OutFlow.yaml index 9ab3c6d..59d4364 100644 --- a/config/REPST/NYCBike-OutFlow.yaml +++ b/config/REPST/NYCBike-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: REPST seed: 2023 + data: batch_size: 16 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: d_ff: 128 d_model: 64 @@ -24,13 +26,14 @@ model: gpt_path: ./GPT-2 input_dim: 1 n_heads: 1 - num_nodes: 1024 + num_nodes: 128 output_dim: 1 patch_len: 6 pred_len: 24 seq_len: 24 stride: 7 word_num: 1000 + train: batch_size: 16 debug: false diff --git a/config/REPST/SolarEnergy.yaml b/config/REPST/SolarEnergy.yaml index dd4579e..a96e58a 100755 --- a/config/REPST/SolarEnergy.yaml +++ b/config/REPST/SolarEnergy.yaml @@ -6,7 +6,7 @@ basic: seed: 2023 data: - batch_size: 64 + batch_size: 16 column_wise: false days_per_week: 7 horizon: 24 @@ -34,7 +34,7 @@ model: word_num: 1000 train: - batch_size: 64 + batch_size: 16 debug: false early_stop: true early_stop_patience: 15 diff --git a/config/STAEFormer/AirQuality.yaml b/config/STAEFormer/AirQuality.yaml index b622adc..b7956da 100644 --- a/config/STAEFormer/AirQuality.yaml +++ b/config/STAEFormer/AirQuality.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STAEFormer seed: 2023 + data: batch_size: 16 column_wise: false @@ -13,9 +14,10 @@ data: lag: 24 normalizer: std num_nodes: 35 - steps_per_day: 24 + steps_per_day: 288 test_ratio: 0.2 val_ratio: 0.2 + model: adaptive_embedding_dim: 80 dow_embedding_dim: 24 @@ -33,24 +35,24 @@ model: steps_per_day: 24 tod_embedding_dim: 24 use_mixed_proj: true + train: batch_size: 16 debug: false early_stop: true early_stop_patience: 15 - epochs: 300 + epochs: 100 grad_norm: false - log_step: 200 + log_step: 20000 loss_func: mae lr_decay: false lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 6 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STAEFormer/BJTaxi-InFlow.yaml b/config/STAEFormer/BJTaxi-InFlow.yaml index 3404e8a..7eb24c1 100644 --- a/config/STAEFormer/BJTaxi-InFlow.yaml +++ b/config/STAEFormer/BJTaxi-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STAEFormer seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: adaptive_embedding_dim: 80 dow_embedding_dim: 24 @@ -33,6 +35,7 @@ model: steps_per_day: 48 tod_embedding_dim: 24 use_mixed_proj: true + train: batch_size: 32 debug: false @@ -46,11 +49,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STAEFormer/BJTaxi-OutFlow.yaml b/config/STAEFormer/BJTaxi-OutFlow.yaml index 76c7369..fbc5d56 100644 --- a/config/STAEFormer/BJTaxi-OutFlow.yaml +++ b/config/STAEFormer/BJTaxi-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STAEFormer seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: adaptive_embedding_dim: 80 dow_embedding_dim: 24 @@ -33,6 +35,7 @@ model: steps_per_day: 48 tod_embedding_dim: 24 use_mixed_proj: true + train: batch_size: 32 debug: false @@ -46,11 +49,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STAEFormer/METR-LA.yaml b/config/STAEFormer/METR-LA.yaml index e982b2b..6fa708e 100644 --- a/config/STAEFormer/METR-LA.yaml +++ b/config/STAEFormer/METR-LA.yaml @@ -4,35 +4,38 @@ basic: mode: train model: STAEFormer seed: 2023 + data: batch_size: 16 column_wise: false days_per_week: 7 - horizon: 12 + horizon: 24 input_dim: 1 - lag: 12 + lag: 24 normalizer: std num_nodes: 207 steps_per_day: 288 test_ratio: 0.2 val_ratio: 0.2 + model: adaptive_embedding_dim: 80 dow_embedding_dim: 24 dropout: 0.1 feed_forward_dim: 256 - in_steps: 12 + in_steps: 24 input_dim: 1 input_embedding_dim: 24 num_heads: 4 num_layers: 3 num_nodes: 207 - out_steps: 12 + out_steps: 24 output_dim: 1 spatial_embedding_dim: 0 steps_per_day: 288 tod_embedding_dim: 24 use_mixed_proj: true + train: batch_size: 16 debug: false @@ -46,11 +49,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STAEFormer/NYCBike-InFlow.yaml b/config/STAEFormer/NYCBike-InFlow.yaml index 4f88780..a96571c 100644 --- a/config/STAEFormer/NYCBike-InFlow.yaml +++ b/config/STAEFormer/NYCBike-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STAEFormer seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: adaptive_embedding_dim: 80 dow_embedding_dim: 24 @@ -26,13 +28,14 @@ model: input_embedding_dim: 24 num_heads: 4 num_layers: 3 - num_nodes: 1024 + num_nodes: 128 out_steps: 24 output_dim: 1 spatial_embedding_dim: 0 steps_per_day: 48 tod_embedding_dim: 24 use_mixed_proj: true + train: batch_size: 32 debug: false @@ -46,11 +49,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STAEFormer/NYCBike-OutFlow.yaml b/config/STAEFormer/NYCBike-OutFlow.yaml index ee13784..dc9d430 100644 --- a/config/STAEFormer/NYCBike-OutFlow.yaml +++ b/config/STAEFormer/NYCBike-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STAEFormer seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: adaptive_embedding_dim: 80 dow_embedding_dim: 24 @@ -26,13 +28,14 @@ model: input_embedding_dim: 24 num_heads: 4 num_layers: 3 - num_nodes: 1024 + num_nodes: 128 out_steps: 24 output_dim: 1 spatial_embedding_dim: 0 steps_per_day: 48 tod_embedding_dim: 24 use_mixed_proj: true + train: batch_size: 32 debug: false @@ -46,11 +49,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STAEFormer/PEMS-BAY.yaml b/config/STAEFormer/PEMS-BAY.yaml new file mode 100644 index 0000000..353da67 --- /dev/null +++ b/config/STAEFormer/PEMS-BAY.yaml @@ -0,0 +1,58 @@ +basic: + dataset: PEMS-BAY + device: cuda:0 + mode: train + model: STAEFormer + seed: 2023 + +data: + batch_size: 64 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 325 + steps_per_day: 288 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + adaptive_embedding_dim: 80 + dow_embedding_dim: 24 + dropout: 0.1 + feed_forward_dim: 256 + in_steps: 24 + input_dim: 1 + input_embedding_dim: 24 + num_heads: 4 + num_layers: 3 + num_nodes: 325 + out_steps: 24 + output_dim: 1 + spatial_embedding_dim: 0 + steps_per_day: 288 + tod_embedding_dim: 24 + use_mixed_proj: true + +train: + batch_size: 64 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 300 + grad_norm: false + log_step: 200 + loss_func: mae + lr_decay: false + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.003 + mae_thresh: 0.0 + mape_thresh: 0.0 + max_grad_norm: 5 + output_dim: 1 + plot: false + real_value: true + weight_decay: 0 diff --git a/config/STAEFormer/PEMSD3.yaml b/config/STAEFormer/PEMSD3.yaml index 79eb4de..7497a8b 100755 --- a/config/STAEFormer/PEMSD3.yaml +++ b/config/STAEFormer/PEMSD3.yaml @@ -49,11 +49,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STAEFormer/PEMSD4.yaml b/config/STAEFormer/PEMSD4.yaml index a832b53..b248ffc 100755 --- a/config/STAEFormer/PEMSD4.yaml +++ b/config/STAEFormer/PEMSD4.yaml @@ -49,10 +49,9 @@ train: lr_decay_rate: 0.1 lr_decay_step: 5,20,40,70 lr_init: 0.001 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0.0003 diff --git a/config/STAEFormer/PEMSD7.yaml b/config/STAEFormer/PEMSD7.yaml index e41e643..be99282 100755 --- a/config/STAEFormer/PEMSD7.yaml +++ b/config/STAEFormer/PEMSD7.yaml @@ -49,11 +49,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STAEFormer/PEMSD8.yaml b/config/STAEFormer/PEMSD8.yaml index dbee2c7..d9c91a9 100755 --- a/config/STAEFormer/PEMSD8.yaml +++ b/config/STAEFormer/PEMSD8.yaml @@ -49,11 +49,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STAEFormer/SolarEnergy.yaml b/config/STAEFormer/SolarEnergy.yaml index fafffd6..a3fed30 100644 --- a/config/STAEFormer/SolarEnergy.yaml +++ b/config/STAEFormer/SolarEnergy.yaml @@ -4,41 +4,44 @@ basic: mode: train model: STAEFormer seed: 2023 + data: batch_size: 16 column_wise: false days_per_week: 7 horizon: 24 - input_dim: 137 + input_dim: 1 lag: 24 normalizer: std num_nodes: 137 steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 + model: adaptive_embedding_dim: 80 dow_embedding_dim: 24 dropout: 0.1 feed_forward_dim: 256 in_steps: 24 - input_dim: 137 + input_dim: 1 input_embedding_dim: 24 num_heads: 4 num_layers: 3 num_nodes: 137 out_steps: 24 - output_dim: 137 + output_dim: 1 spatial_embedding_dim: 0 steps_per_day: 24 tod_embedding_dim: 24 use_mixed_proj: true + train: batch_size: 16 debug: false early_stop: true early_stop_patience: 15 - epochs: 300 + epochs: 100 grad_norm: false log_step: 200 loss_func: mae @@ -46,11 +49,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 - output_dim: 137 + output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STAWnet/AirQuality.yaml b/config/STAWnet/AirQuality.yaml index 8f9f94f..6d3e0d0 100644 --- a/config/STAWnet/AirQuality.yaml +++ b/config/STAWnet/AirQuality.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STAWnet seed: 2023 + data: batch_size: 16 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 6 lag: 24 normalizer: std - num_nodes: 35 + num_nodes: 12 steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 + model: addaptadj: true aptonly: false @@ -30,9 +32,11 @@ model: kernel_size: 2 layers: 2 noapt: false + num_nodes: 12 output_dim: 6 residual_channels: 32 skip_channels: 256 + train: batch_size: 16 debug: false @@ -46,11 +50,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 6 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STAWnet/BJTaxi-InFlow.yaml b/config/STAWnet/BJTaxi-InFlow.yaml index 029930a..edd919a 100644 --- a/config/STAWnet/BJTaxi-InFlow.yaml +++ b/config/STAWnet/BJTaxi-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STAWnet seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: addaptadj: true aptonly: false @@ -30,9 +32,11 @@ model: kernel_size: 2 layers: 2 noapt: false + num_nodes: 1024 output_dim: 1 residual_channels: 32 skip_channels: 256 + train: batch_size: 32 debug: false @@ -46,11 +50,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STAWnet/BJTaxi-OutFlow.yaml b/config/STAWnet/BJTaxi-OutFlow.yaml index f3856e8..e40975b 100644 --- a/config/STAWnet/BJTaxi-OutFlow.yaml +++ b/config/STAWnet/BJTaxi-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STAWnet seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: addaptadj: true aptonly: false @@ -30,9 +32,11 @@ model: kernel_size: 2 layers: 2 noapt: false + num_nodes: 1024 output_dim: 1 residual_channels: 32 skip_channels: 256 + train: batch_size: 32 debug: false @@ -46,11 +50,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STAWnet/METR-LA.yaml b/config/STAWnet/METR-LA.yaml index dc84df8..d0fc158 100644 --- a/config/STAWnet/METR-LA.yaml +++ b/config/STAWnet/METR-LA.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STAWnet seed: 2023 + data: batch_size: 16 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 288 test_ratio: 0.2 val_ratio: 0.2 + model: addaptadj: true aptonly: false @@ -30,9 +32,11 @@ model: kernel_size: 2 layers: 2 noapt: false + num_nodes: 207 output_dim: 1 residual_channels: 32 skip_channels: 256 + train: batch_size: 16 debug: false @@ -46,11 +50,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STAWnet/NYCBike-InFlow.yaml b/config/STAWnet/NYCBike-InFlow.yaml index caea941..563d80c 100644 --- a/config/STAWnet/NYCBike-InFlow.yaml +++ b/config/STAWnet/NYCBike-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STAWnet seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: addaptadj: true aptonly: false @@ -30,9 +32,11 @@ model: kernel_size: 2 layers: 2 noapt: false + num_nodes: 128 output_dim: 1 residual_channels: 32 skip_channels: 256 + train: batch_size: 32 debug: false @@ -46,11 +50,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STAWnet/NYCBike-OutFlow.yaml b/config/STAWnet/NYCBike-OutFlow.yaml index 33a377e..38853aa 100644 --- a/config/STAWnet/NYCBike-OutFlow.yaml +++ b/config/STAWnet/NYCBike-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STAWnet seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: addaptadj: true aptonly: false @@ -30,9 +32,11 @@ model: kernel_size: 2 layers: 2 noapt: false + num_nodes: 128 output_dim: 1 residual_channels: 32 skip_channels: 256 + train: batch_size: 32 debug: false @@ -46,11 +50,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STAWnet/PEMSD3.yaml b/config/STAWnet/PEMSD3.yaml index 30aaddc..9b0f48f 100644 --- a/config/STAWnet/PEMSD3.yaml +++ b/config/STAWnet/PEMSD3.yaml @@ -32,6 +32,7 @@ model: kernel_size: 2 layers: 2 noapt: false + num_nodes: 358 output_dim: 1 residual_channels: 32 skip_channels: 256 @@ -49,11 +50,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STAWnet/PEMSD4.yaml b/config/STAWnet/PEMSD4.yaml index b89454f..d17a5ab 100644 --- a/config/STAWnet/PEMSD4.yaml +++ b/config/STAWnet/PEMSD4.yaml @@ -32,6 +32,7 @@ model: kernel_size: 2 layers: 2 noapt: false + num_nodes: 307 output_dim: 1 residual_channels: 32 skip_channels: 256 @@ -49,11 +50,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STAWnet/PEMSD7.yaml b/config/STAWnet/PEMSD7.yaml index 5c52d57..c018131 100644 --- a/config/STAWnet/PEMSD7.yaml +++ b/config/STAWnet/PEMSD7.yaml @@ -32,6 +32,7 @@ model: kernel_size: 2 layers: 2 noapt: false + num_nodes: 883 output_dim: 1 residual_channels: 32 skip_channels: 256 @@ -49,11 +50,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STAWnet/PEMSD8.yaml b/config/STAWnet/PEMSD8.yaml index 52fcddf..a0f9e0a 100644 --- a/config/STAWnet/PEMSD8.yaml +++ b/config/STAWnet/PEMSD8.yaml @@ -32,6 +32,7 @@ model: kernel_size: 2 layers: 2 noapt: false + num_nodes: 170 output_dim: 1 residual_channels: 32 skip_channels: 256 @@ -49,11 +50,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STAWnet/SolarEnergy.yaml b/config/STAWnet/SolarEnergy.yaml index d4e3b0a..6a2cfb3 100644 --- a/config/STAWnet/SolarEnergy.yaml +++ b/config/STAWnet/SolarEnergy.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STAWnet seed: 2023 + data: batch_size: 16 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 + model: addaptadj: true aptonly: false @@ -30,9 +32,11 @@ model: kernel_size: 2 layers: 2 noapt: false + num_nodes: 137 output_dim: 137 residual_channels: 32 skip_channels: 256 + train: batch_size: 16 debug: false @@ -46,11 +50,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 137 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STFGNN/AirQuality.yaml b/config/STFGNN/AirQuality.yaml index 34ab48b..d559c4e 100644 --- a/config/STFGNN/AirQuality.yaml +++ b/config/STFGNN/AirQuality.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STFGNN seed: 2023 + data: batch_size: 16 column_wise: false @@ -12,31 +13,34 @@ data: input_dim: 6 lag: 24 normalizer: std - num_nodes: 35 + num_nodes: 12 steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 + model: activation: GLU first_layer_embedding_size: 64 hidden_dims: - - - 64 - - 64 - - 64 - - - 64 - - 64 - - 64 - - - 64 - - 64 - - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 horizon: 24 input_dim: 6 mask: None + num_nodes: 12 out_layer_dim: 128 output_dim: 6 spatial_emb: true temporal_emb: true window: 24 + train: batch_size: 16 debug: false @@ -50,11 +54,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 6 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STFGNN/BJTaxi-InFlow.yaml b/config/STFGNN/BJTaxi-InFlow.yaml index ca1d078..0b9f284 100644 --- a/config/STFGNN/BJTaxi-InFlow.yaml +++ b/config/STFGNN/BJTaxi-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STFGNN seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,27 +17,30 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: activation: GLU first_layer_embedding_size: 64 hidden_dims: - - - 64 - - 64 - - 64 - - - 64 - - 64 - - 64 - - - 64 - - 64 - - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 horizon: 24 input_dim: 1 mask: None + num_nodes: 1024 out_layer_dim: 128 output_dim: 1 spatial_emb: true temporal_emb: true window: 24 + train: batch_size: 32 debug: false @@ -50,11 +54,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STFGNN/BJTaxi-OutFlow.yaml b/config/STFGNN/BJTaxi-OutFlow.yaml index 32e5a5c..75916dd 100644 --- a/config/STFGNN/BJTaxi-OutFlow.yaml +++ b/config/STFGNN/BJTaxi-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STFGNN seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,27 +17,30 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: activation: GLU first_layer_embedding_size: 64 hidden_dims: - - - 64 - - 64 - - 64 - - - 64 - - 64 - - 64 - - - 64 - - 64 - - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 horizon: 24 input_dim: 1 mask: None + num_nodes: 1024 out_layer_dim: 128 output_dim: 1 spatial_emb: true temporal_emb: true window: 24 + train: batch_size: 32 debug: false @@ -50,11 +54,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STFGNN/METR-LA.yaml b/config/STFGNN/METR-LA.yaml index 2f39be2..5553fd0 100644 --- a/config/STFGNN/METR-LA.yaml +++ b/config/STFGNN/METR-LA.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STFGNN seed: 2023 + data: batch_size: 16 column_wise: false @@ -16,27 +17,30 @@ data: steps_per_day: 288 test_ratio: 0.2 val_ratio: 0.2 + model: activation: GLU first_layer_embedding_size: 64 hidden_dims: - - - 64 - - 64 - - 64 - - - 64 - - 64 - - 64 - - - 64 - - 64 - - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 horizon: 12 input_dim: 1 mask: None + num_nodes: 207 out_layer_dim: 128 output_dim: 1 spatial_emb: true temporal_emb: true window: 12 + train: batch_size: 16 debug: false @@ -50,11 +54,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STFGNN/NYCBike-InFlow.yaml b/config/STFGNN/NYCBike-InFlow.yaml index 7c123a3..0a903f9 100644 --- a/config/STFGNN/NYCBike-InFlow.yaml +++ b/config/STFGNN/NYCBike-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STFGNN seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,31 +13,34 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: activation: GLU first_layer_embedding_size: 64 hidden_dims: - - - 64 - - 64 - - 64 - - - 64 - - 64 - - 64 - - - 64 - - 64 - - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 horizon: 24 input_dim: 1 mask: None + num_nodes: 128 out_layer_dim: 128 output_dim: 1 spatial_emb: true temporal_emb: true window: 24 + train: batch_size: 32 debug: false @@ -50,11 +54,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STFGNN/NYCBike-OutFlow.yaml b/config/STFGNN/NYCBike-OutFlow.yaml index d170b59..199c36f 100644 --- a/config/STFGNN/NYCBike-OutFlow.yaml +++ b/config/STFGNN/NYCBike-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STFGNN seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,31 +13,34 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: activation: GLU first_layer_embedding_size: 64 hidden_dims: - - - 64 - - 64 - - 64 - - - 64 - - 64 - - 64 - - - 64 - - 64 - - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 horizon: 24 input_dim: 1 mask: None + num_nodes: 128 out_layer_dim: 128 output_dim: 1 spatial_emb: true temporal_emb: true window: 24 + train: batch_size: 32 debug: false @@ -50,11 +54,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STFGNN/PEMSD3.yaml b/config/STFGNN/PEMSD3.yaml index 35312b4..0935f72 100755 --- a/config/STFGNN/PEMSD3.yaml +++ b/config/STFGNN/PEMSD3.yaml @@ -21,10 +21,20 @@ data: model: activation: GLU first_layer_embedding_size: 64 - hidden_dims: [[64, 64, 64], [64, 64, 64], [64, 64, 64]] + hidden_dims: + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 horizon: 12 input_dim: 1 mask: None + num_nodes: 358 out_layer_dim: 128 output_dim: 1 spatial_emb: true @@ -44,11 +54,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STFGNN/PEMSD4.yaml b/config/STFGNN/PEMSD4.yaml index 8df35b7..b47b851 100755 --- a/config/STFGNN/PEMSD4.yaml +++ b/config/STFGNN/PEMSD4.yaml @@ -21,10 +21,20 @@ data: model: activation: GLU first_layer_embedding_size: 64 - hidden_dims: [[64, 64, 64], [64, 64, 64], [64, 64, 64]] + hidden_dims: + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 horizon: 12 input_dim: 1 mask: None + num_nodes: 307 out_layer_dim: 128 output_dim: 1 spatial_emb: true @@ -44,11 +54,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STFGNN/PEMSD7.yaml b/config/STFGNN/PEMSD7.yaml index d5338f8..b7320f6 100755 --- a/config/STFGNN/PEMSD7.yaml +++ b/config/STFGNN/PEMSD7.yaml @@ -21,10 +21,20 @@ data: model: activation: GLU first_layer_embedding_size: 64 - hidden_dims: [[32, 32, 32], [32, 32, 32], [32, 32, 32]] + hidden_dims: + - - 32 + - 32 + - 32 + - - 32 + - 32 + - 32 + - - 32 + - 32 + - 32 horizon: 12 input_dim: 1 mask: None + num_nodes: 883 out_layer_dim: 64 output_dim: 1 spatial_emb: true @@ -45,11 +55,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STFGNN/PEMSD8.yaml b/config/STFGNN/PEMSD8.yaml index a1f49d4..08d4f72 100755 --- a/config/STFGNN/PEMSD8.yaml +++ b/config/STFGNN/PEMSD8.yaml @@ -21,10 +21,20 @@ data: model: activation: GLU first_layer_embedding_size: 64 - hidden_dims: [[64, 64, 64], [64, 64, 64], [64, 64, 64]] + hidden_dims: + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 horizon: 12 input_dim: 1 mask: None + num_nodes: 170 out_layer_dim: 128 output_dim: 1 spatial_emb: true @@ -44,11 +54,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STFGNN/SolarEnergy.yaml b/config/STFGNN/SolarEnergy.yaml index 5f5f052..2531a1a 100644 --- a/config/STFGNN/SolarEnergy.yaml +++ b/config/STFGNN/SolarEnergy.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STFGNN seed: 2023 + data: batch_size: 16 column_wise: false @@ -16,27 +17,30 @@ data: steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 + model: activation: GLU first_layer_embedding_size: 64 hidden_dims: - - - 64 - - 64 - - 64 - - - 64 - - 64 - - 64 - - - 64 - - 64 - - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 horizon: 24 input_dim: 137 mask: None + num_nodes: 137 out_layer_dim: 128 output_dim: 137 spatial_emb: true temporal_emb: true window: 24 + train: batch_size: 16 debug: false @@ -50,11 +54,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 137 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STGCN/AirQuality.yaml b/config/STGCN/AirQuality.yaml index 4a5b272..4c684d3 100644 --- a/config/STGCN/AirQuality.yaml +++ b/config/STGCN/AirQuality.yaml @@ -13,7 +13,7 @@ data: input_dim: 6 lag: 24 normalizer: std - num_nodes: 35 + num_nodes: 12 steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 @@ -28,6 +28,7 @@ model: gso_type: sym_norm_lap input_dim: 6 n_his: 24 + num_nodes: 12 output_dim: 6 stblock_num: 2 @@ -44,11 +45,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 6 plot: false real_value: true - seed: 10 - weight_decay: 0 \ No newline at end of file + weight_decay: 0 diff --git a/config/STGCN/BJTaxi-InFlow.yaml b/config/STGCN/BJTaxi-InFlow.yaml index 8a8cc89..6860b15 100644 --- a/config/STGCN/BJTaxi-InFlow.yaml +++ b/config/STGCN/BJTaxi-InFlow.yaml @@ -28,6 +28,7 @@ model: gso_type: sym_norm_lap input_dim: 1 n_his: 24 + num_nodes: 1024 output_dim: 1 stblock_num: 2 @@ -44,11 +45,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 - weight_decay: 0 \ No newline at end of file + weight_decay: 0 diff --git a/config/STGCN/BJTaxi-OutFlow.yaml b/config/STGCN/BJTaxi-OutFlow.yaml index b0641dc..8480e65 100644 --- a/config/STGCN/BJTaxi-OutFlow.yaml +++ b/config/STGCN/BJTaxi-OutFlow.yaml @@ -28,6 +28,7 @@ model: gso_type: sym_norm_lap input_dim: 1 n_his: 24 + num_nodes: 1024 output_dim: 1 stblock_num: 2 @@ -44,11 +45,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 - weight_decay: 0 \ No newline at end of file + weight_decay: 0 diff --git a/config/STGCN/METR-LA.yaml b/config/STGCN/METR-LA.yaml index e73ecbf..fe24edc 100644 --- a/config/STGCN/METR-LA.yaml +++ b/config/STGCN/METR-LA.yaml @@ -28,6 +28,7 @@ model: gso_type: sym_norm_lap input_dim: 1 n_his: 24 + num_nodes: 207 output_dim: 1 stblock_num: 2 @@ -44,11 +45,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 - weight_decay: 0 \ No newline at end of file + weight_decay: 0 diff --git a/config/STGCN/NYCBike-InFlow.yaml b/config/STGCN/NYCBike-InFlow.yaml index b01cd36..29a07a6 100644 --- a/config/STGCN/NYCBike-InFlow.yaml +++ b/config/STGCN/NYCBike-InFlow.yaml @@ -28,6 +28,7 @@ model: gso_type: sym_norm_lap input_dim: 1 n_his: 24 + num_nodes: 128 output_dim: 1 stblock_num: 2 @@ -44,11 +45,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 - weight_decay: 0 \ No newline at end of file + weight_decay: 0 diff --git a/config/STGCN/NYCBike-OutFlow.yaml b/config/STGCN/NYCBike-OutFlow.yaml index 8171033..1c747eb 100644 --- a/config/STGCN/NYCBike-OutFlow.yaml +++ b/config/STGCN/NYCBike-OutFlow.yaml @@ -28,6 +28,7 @@ model: gso_type: sym_norm_lap input_dim: 1 n_his: 24 + num_nodes: 128 output_dim: 1 stblock_num: 2 @@ -44,11 +45,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 - weight_decay: 0 \ No newline at end of file + weight_decay: 0 diff --git a/config/STGCN/PEMSD3.yaml b/config/STGCN/PEMSD3.yaml index ab254ec..6fa6c75 100755 --- a/config/STGCN/PEMSD3.yaml +++ b/config/STGCN/PEMSD3.yaml @@ -28,6 +28,7 @@ model: gso_type: sym_norm_lap input_dim: 1 n_his: 12 + num_nodes: 358 output_dim: 1 stblock_num: 2 @@ -44,11 +45,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STGCN/PEMSD4.yaml b/config/STGCN/PEMSD4.yaml index bc62528..596f6f2 100755 --- a/config/STGCN/PEMSD4.yaml +++ b/config/STGCN/PEMSD4.yaml @@ -28,6 +28,7 @@ model: gso_type: sym_norm_lap input_dim: 1 n_his: 12 + num_nodes: 307 output_dim: 1 stblock_num: 2 @@ -44,11 +45,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STGCN/PEMSD7.yaml b/config/STGCN/PEMSD7.yaml index 1f4139b..9615f85 100755 --- a/config/STGCN/PEMSD7.yaml +++ b/config/STGCN/PEMSD7.yaml @@ -28,6 +28,7 @@ model: gso_type: sym_norm_lap input_dim: 1 n_his: 12 + num_nodes: 883 output_dim: 1 stblock_num: 2 @@ -44,11 +45,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STGCN/PEMSD8.yaml b/config/STGCN/PEMSD8.yaml index 2dd7bce..846ad3f 100755 --- a/config/STGCN/PEMSD8.yaml +++ b/config/STGCN/PEMSD8.yaml @@ -28,6 +28,7 @@ model: gso_type: sym_norm_lap input_dim: 1 n_his: 12 + num_nodes: 170 output_dim: 1 stblock_num: 2 @@ -44,11 +45,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STGCN/SolarEnergy.yaml b/config/STGCN/SolarEnergy.yaml index fc9ecc7..d3e44dc 100644 --- a/config/STGCN/SolarEnergy.yaml +++ b/config/STGCN/SolarEnergy.yaml @@ -28,6 +28,7 @@ model: gso_type: sym_norm_lap input_dim: 1 n_his: 24 + num_nodes: 137 output_dim: 1 stblock_num: 2 @@ -44,11 +45,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.001 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 - weight_decay: 0 \ No newline at end of file + weight_decay: 0 diff --git a/config/STGNCDE/AirQuality.yaml b/config/STGNCDE/AirQuality.yaml index c7905f6..88bf8f0 100644 --- a/config/STGNCDE/AirQuality.yaml +++ b/config/STGNCDE/AirQuality.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STGNCDE seed: 2023 + data: batch_size: 16 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 6 lag: 24 normalizer: std - num_nodes: 35 + num_nodes: 12 steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_k: 2 embed_dim: 10 @@ -24,9 +26,11 @@ model: hid_hid_dim: 128 input_dim: 2 num_layers: 2 + num_nodes: 12 output_dim: 6 solver: rk4 type: type1 + train: batch_size: 16 debug: false @@ -39,16 +43,15 @@ train: lr_decay: false lr_decay_rate: 0.3 lr_decay_step: - - 5 - - 20 - - 40 - - 70 + - 5 + - 20 + - 40 + - 70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 6 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STGNCDE/BJTaxi-InFlow.yaml b/config/STGNCDE/BJTaxi-InFlow.yaml index 0bb3ab5..0de1907 100644 --- a/config/STGNCDE/BJTaxi-InFlow.yaml +++ b/config/STGNCDE/BJTaxi-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STGNCDE seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_k: 2 embed_dim: 10 @@ -24,9 +26,11 @@ model: hid_hid_dim: 128 input_dim: 2 num_layers: 2 + num_nodes: 1024 output_dim: 1 solver: rk4 type: type1 + train: batch_size: 32 debug: false @@ -39,16 +43,15 @@ train: lr_decay: false lr_decay_rate: 0.3 lr_decay_step: - - 5 - - 20 - - 40 - - 70 + - 5 + - 20 + - 40 + - 70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STGNCDE/BJTaxi-OutFlow.yaml b/config/STGNCDE/BJTaxi-OutFlow.yaml index 4cc5fdb..2022544 100644 --- a/config/STGNCDE/BJTaxi-OutFlow.yaml +++ b/config/STGNCDE/BJTaxi-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STGNCDE seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_k: 2 embed_dim: 10 @@ -24,9 +26,11 @@ model: hid_hid_dim: 128 input_dim: 2 num_layers: 2 + num_nodes: 1024 output_dim: 1 solver: rk4 type: type1 + train: batch_size: 32 debug: false @@ -39,16 +43,15 @@ train: lr_decay: false lr_decay_rate: 0.3 lr_decay_step: - - 5 - - 20 - - 40 - - 70 + - 5 + - 20 + - 40 + - 70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STGNCDE/METR-LA.yaml b/config/STGNCDE/METR-LA.yaml index 135de6f..127e6fa 100644 --- a/config/STGNCDE/METR-LA.yaml +++ b/config/STGNCDE/METR-LA.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STGNCDE seed: 2023 + data: batch_size: 16 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 288 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_k: 2 embed_dim: 10 @@ -24,9 +26,11 @@ model: hid_hid_dim: 128 input_dim: 2 num_layers: 2 + num_nodes: 207 output_dim: 1 solver: rk4 type: type1 + train: batch_size: 16 debug: false @@ -39,16 +43,15 @@ train: lr_decay: false lr_decay_rate: 0.3 lr_decay_step: - - 5 - - 20 - - 40 - - 70 + - 5 + - 20 + - 40 + - 70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STGNCDE/NYCBike-InFlow.yaml b/config/STGNCDE/NYCBike-InFlow.yaml index a35aeb5..0d00183 100644 --- a/config/STGNCDE/NYCBike-InFlow.yaml +++ b/config/STGNCDE/NYCBike-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STGNCDE seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_k: 2 embed_dim: 10 @@ -24,9 +26,11 @@ model: hid_hid_dim: 128 input_dim: 2 num_layers: 2 + num_nodes: 128 output_dim: 1 solver: rk4 type: type1 + train: batch_size: 32 debug: false @@ -39,16 +43,15 @@ train: lr_decay: false lr_decay_rate: 0.3 lr_decay_step: - - 5 - - 20 - - 40 - - 70 + - 5 + - 20 + - 40 + - 70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STGNCDE/NYCBike-OutFlow.yaml b/config/STGNCDE/NYCBike-OutFlow.yaml index 98d94a2..b54a641 100644 --- a/config/STGNCDE/NYCBike-OutFlow.yaml +++ b/config/STGNCDE/NYCBike-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STGNCDE seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_k: 2 embed_dim: 10 @@ -24,9 +26,11 @@ model: hid_hid_dim: 128 input_dim: 2 num_layers: 2 + num_nodes: 128 output_dim: 1 solver: rk4 type: type1 + train: batch_size: 32 debug: false @@ -39,16 +43,15 @@ train: lr_decay: false lr_decay_rate: 0.3 lr_decay_step: - - 5 - - 20 - - 40 - - 70 + - 5 + - 20 + - 40 + - 70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STGNCDE/PEMSD3.yaml b/config/STGNCDE/PEMSD3.yaml index b6abea6..83e880c 100755 --- a/config/STGNCDE/PEMSD3.yaml +++ b/config/STGNCDE/PEMSD3.yaml @@ -26,6 +26,7 @@ model: hid_hid_dim: 128 input_dim: 2 num_layers: 2 + num_nodes: 358 output_dim: 1 solver: rk4 type: type1 @@ -41,13 +42,16 @@ train: loss_func: mae lr_decay: false lr_decay_rate: 0.3 - lr_decay_step: [5, 20, 40, 70] + lr_decay_step: + - 5 + - 20 + - 40 + - 70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STGNCDE/PEMSD4.yaml b/config/STGNCDE/PEMSD4.yaml index 7c2d8d4..8df1d04 100755 --- a/config/STGNCDE/PEMSD4.yaml +++ b/config/STGNCDE/PEMSD4.yaml @@ -26,6 +26,7 @@ model: hid_hid_dim: 128 input_dim: 2 num_layers: 2 + num_nodes: 307 output_dim: 1 solver: rk4 type: type1 @@ -41,13 +42,16 @@ train: loss_func: mae lr_decay: false lr_decay_rate: 0.3 - lr_decay_step: [5, 20, 40, 70] + lr_decay_step: + - 5 + - 20 + - 40 + - 70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STGNCDE/PEMSD7.yaml b/config/STGNCDE/PEMSD7.yaml index b11d474..ae7bbf5 100755 --- a/config/STGNCDE/PEMSD7.yaml +++ b/config/STGNCDE/PEMSD7.yaml @@ -26,6 +26,7 @@ model: hid_hid_dim: 64 input_dim: 2 num_layers: 2 + num_nodes: 883 output_dim: 1 solver: rk4 type: type1 @@ -41,13 +42,16 @@ train: loss_func: mae lr_decay: false lr_decay_rate: 0.3 - lr_decay_step: [5, 20, 40, 70] + lr_decay_step: + - 5 + - 20 + - 40 + - 70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STGNCDE/PEMSD8.yaml b/config/STGNCDE/PEMSD8.yaml index 452f280..22b22ab 100755 --- a/config/STGNCDE/PEMSD8.yaml +++ b/config/STGNCDE/PEMSD8.yaml @@ -26,6 +26,7 @@ model: hid_hid_dim: 64 input_dim: 2 num_layers: 2 + num_nodes: 170 output_dim: 1 solver: rk4 type: type1 @@ -41,13 +42,16 @@ train: loss_func: mae lr_decay: false lr_decay_rate: 0.3 - lr_decay_step: [5, 20, 40, 70] + lr_decay_step: + - 5 + - 20 + - 40 + - 70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STGNCDE/SolarEnergy.yaml b/config/STGNCDE/SolarEnergy.yaml index db78adc..134268a 100644 --- a/config/STGNCDE/SolarEnergy.yaml +++ b/config/STGNCDE/SolarEnergy.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STGNCDE seed: 2023 + data: batch_size: 16 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_k: 2 embed_dim: 10 @@ -24,9 +26,11 @@ model: hid_hid_dim: 128 input_dim: 2 num_layers: 2 + num_nodes: 137 output_dim: 137 solver: rk4 type: type1 + train: batch_size: 16 debug: false @@ -39,16 +43,15 @@ train: lr_decay: false lr_decay_rate: 0.3 lr_decay_step: - - 5 - - 20 - - 40 - - 70 + - 5 + - 20 + - 40 + - 70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 137 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STGNRDE/AirQuality.yaml b/config/STGNRDE/AirQuality.yaml index 340696a..7086bb6 100644 --- a/config/STGNRDE/AirQuality.yaml +++ b/config/STGNRDE/AirQuality.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STGNRDE seed: 2023 + data: batch_size: 16 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 6 lag: 24 normalizer: std - num_nodes: 35 + num_nodes: 12 steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 + model: adp_opt: false cheb_k: 3 @@ -28,8 +30,10 @@ model: interpolation: cubic model_type: rde num_layers: 2 + num_nodes: 12 output_dim: 6 solver: rk4 + train: batch_size: 16 debug: false @@ -42,16 +46,15 @@ train: lr_decay: false lr_decay_rate: 0.3 lr_decay_step: - - 5 - - 20 - - 40 - - 70 + - 5 + - 20 + - 40 + - 70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 6 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STGNRDE/BJTaxi-InFlow.yaml b/config/STGNRDE/BJTaxi-InFlow.yaml index 891d32b..d1d6275 100644 --- a/config/STGNRDE/BJTaxi-InFlow.yaml +++ b/config/STGNRDE/BJTaxi-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STGNRDE seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: adp_opt: false cheb_k: 3 @@ -28,8 +30,10 @@ model: interpolation: cubic model_type: rde num_layers: 2 + num_nodes: 1024 output_dim: 1 solver: rk4 + train: batch_size: 32 debug: false @@ -42,16 +46,15 @@ train: lr_decay: false lr_decay_rate: 0.3 lr_decay_step: - - 5 - - 20 - - 40 - - 70 + - 5 + - 20 + - 40 + - 70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STGNRDE/BJTaxi-OutFlow.yaml b/config/STGNRDE/BJTaxi-OutFlow.yaml index 8646195..36ec5b2 100644 --- a/config/STGNRDE/BJTaxi-OutFlow.yaml +++ b/config/STGNRDE/BJTaxi-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STGNRDE seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: adp_opt: false cheb_k: 3 @@ -28,8 +30,10 @@ model: interpolation: cubic model_type: rde num_layers: 2 + num_nodes: 1024 output_dim: 1 solver: rk4 + train: batch_size: 32 debug: false @@ -42,16 +46,15 @@ train: lr_decay: false lr_decay_rate: 0.3 lr_decay_step: - - 5 - - 20 - - 40 - - 70 + - 5 + - 20 + - 40 + - 70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STGNRDE/METR-LA.yaml b/config/STGNRDE/METR-LA.yaml index 00b2934..7eafd79 100644 --- a/config/STGNRDE/METR-LA.yaml +++ b/config/STGNRDE/METR-LA.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STGNRDE seed: 2023 + data: batch_size: 16 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 288 test_ratio: 0.2 val_ratio: 0.2 + model: adp_opt: false cheb_k: 3 @@ -28,8 +30,10 @@ model: interpolation: cubic model_type: rde num_layers: 2 + num_nodes: 207 output_dim: 1 solver: rk4 + train: batch_size: 16 debug: false @@ -42,16 +46,15 @@ train: lr_decay: false lr_decay_rate: 0.3 lr_decay_step: - - 5 - - 20 - - 40 - - 70 + - 5 + - 20 + - 40 + - 70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STGNRDE/NYCBike-InFlow.yaml b/config/STGNRDE/NYCBike-InFlow.yaml index f25a31b..c204cdb 100644 --- a/config/STGNRDE/NYCBike-InFlow.yaml +++ b/config/STGNRDE/NYCBike-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STGNRDE seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: adp_opt: false cheb_k: 3 @@ -28,8 +30,10 @@ model: interpolation: cubic model_type: rde num_layers: 2 + num_nodes: 128 output_dim: 1 solver: rk4 + train: batch_size: 32 debug: false @@ -42,16 +46,15 @@ train: lr_decay: false lr_decay_rate: 0.3 lr_decay_step: - - 5 - - 20 - - 40 - - 70 + - 5 + - 20 + - 40 + - 70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STGNRDE/NYCBike-OutFlow.yaml b/config/STGNRDE/NYCBike-OutFlow.yaml index 8a3336d..27d11ee 100644 --- a/config/STGNRDE/NYCBike-OutFlow.yaml +++ b/config/STGNRDE/NYCBike-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STGNRDE seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: adp_opt: false cheb_k: 3 @@ -28,8 +30,10 @@ model: interpolation: cubic model_type: rde num_layers: 2 + num_nodes: 128 output_dim: 1 solver: rk4 + train: batch_size: 32 debug: false @@ -42,16 +46,15 @@ train: lr_decay: false lr_decay_rate: 0.3 lr_decay_step: - - 5 - - 20 - - 40 - - 70 + - 5 + - 20 + - 40 + - 70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STGNRDE/PEMSD3.yaml b/config/STGNRDE/PEMSD3.yaml index 14a4ba9..af1529a 100644 --- a/config/STGNRDE/PEMSD3.yaml +++ b/config/STGNRDE/PEMSD3.yaml @@ -30,6 +30,7 @@ model: interpolation: cubic model_type: rde num_layers: 2 + num_nodes: 358 output_dim: 1 solver: rk4 @@ -44,13 +45,16 @@ train: loss_func: mae lr_decay: false lr_decay_rate: 0.3 - lr_decay_step: [5, 20, 40, 70] + lr_decay_step: + - 5 + - 20 + - 40 + - 70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STGNRDE/PEMSD4.yaml b/config/STGNRDE/PEMSD4.yaml index aadfe01..b0d392a 100644 --- a/config/STGNRDE/PEMSD4.yaml +++ b/config/STGNRDE/PEMSD4.yaml @@ -30,6 +30,7 @@ model: interpolation: cubic model_type: rde num_layers: 2 + num_nodes: 307 output_dim: 1 solver: rk4 @@ -44,13 +45,16 @@ train: loss_func: mae lr_decay: false lr_decay_rate: 0.3 - lr_decay_step: [5, 20, 40, 70] + lr_decay_step: + - 5 + - 20 + - 40 + - 70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STGNRDE/PEMSD7.yaml b/config/STGNRDE/PEMSD7.yaml index 1d068f0..4b8e399 100644 --- a/config/STGNRDE/PEMSD7.yaml +++ b/config/STGNRDE/PEMSD7.yaml @@ -30,6 +30,7 @@ model: interpolation: cubic model_type: rde num_layers: 2 + num_nodes: 883 output_dim: 1 solver: rk4 @@ -44,13 +45,16 @@ train: loss_func: mae lr_decay: false lr_decay_rate: 0.3 - lr_decay_step: [5, 20, 40, 70] + lr_decay_step: + - 5 + - 20 + - 40 + - 70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STGNRDE/PEMSD8.yaml b/config/STGNRDE/PEMSD8.yaml index 66c53bd..e765d25 100644 --- a/config/STGNRDE/PEMSD8.yaml +++ b/config/STGNRDE/PEMSD8.yaml @@ -30,6 +30,7 @@ model: interpolation: cubic model_type: rde num_layers: 2 + num_nodes: 170 output_dim: 1 solver: rk4 @@ -44,13 +45,16 @@ train: loss_func: mae lr_decay: false lr_decay_rate: 0.3 - lr_decay_step: [5, 20, 40, 70] + lr_decay_step: + - 5 + - 20 + - 40 + - 70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STGNRDE/SolarEnergy.yaml b/config/STGNRDE/SolarEnergy.yaml index c6a96b7..a9512d8 100644 --- a/config/STGNRDE/SolarEnergy.yaml +++ b/config/STGNRDE/SolarEnergy.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STGNRDE seed: 2023 + data: batch_size: 16 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 + model: adp_opt: false cheb_k: 3 @@ -28,8 +30,10 @@ model: interpolation: cubic model_type: rde num_layers: 2 + num_nodes: 137 output_dim: 137 solver: rk4 + train: batch_size: 16 debug: false @@ -42,16 +46,15 @@ train: lr_decay: false lr_decay_rate: 0.3 lr_decay_step: - - 5 - - 20 - - 40 - - 70 + - 5 + - 20 + - 40 + - 70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 137 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STGODE/AirQuality.yaml b/config/STGODE/AirQuality.yaml index 58bd244..14ad5b1 100644 --- a/config/STGODE/AirQuality.yaml +++ b/config/STGODE/AirQuality.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STGODE seed: 2023 + data: batch_size: 16 column_wise: false @@ -12,20 +13,23 @@ data: input_dim: 6 lag: 24 normalizer: std - num_nodes: 35 + num_nodes: 12 steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 + model: history: 24 horizon: 24 input_dim: 6 num_features: 6 + num_nodes: 12 output_dim: 6 sigma1: 0.1 sigma2: 10 thres1: 0.6 thres2: 0.5 + train: batch_size: 16 debug: false @@ -39,11 +43,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 6 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STGODE/BJTaxi-InFlow.yaml b/config/STGODE/BJTaxi-InFlow.yaml index d596a5c..5637bf5 100644 --- a/config/STGODE/BJTaxi-InFlow.yaml +++ b/config/STGODE/BJTaxi-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STGODE seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,16 +17,19 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: history: 24 horizon: 24 input_dim: 1 num_features: 1 + num_nodes: 1024 output_dim: 1 sigma1: 0.1 sigma2: 10 thres1: 0.6 thres2: 0.5 + train: batch_size: 32 debug: false @@ -39,11 +43,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STGODE/BJTaxi-OutFlow.yaml b/config/STGODE/BJTaxi-OutFlow.yaml index f2a476f..4ee73d3 100644 --- a/config/STGODE/BJTaxi-OutFlow.yaml +++ b/config/STGODE/BJTaxi-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STGODE seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,16 +17,19 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: history: 24 horizon: 24 input_dim: 1 num_features: 1 + num_nodes: 1024 output_dim: 1 sigma1: 0.1 sigma2: 10 thres1: 0.6 thres2: 0.5 + train: batch_size: 32 debug: false @@ -39,11 +43,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STGODE/METR-LA.yaml b/config/STGODE/METR-LA.yaml index 4527f0c..895050f 100644 --- a/config/STGODE/METR-LA.yaml +++ b/config/STGODE/METR-LA.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STGODE seed: 2023 + data: batch_size: 16 column_wise: false @@ -16,16 +17,19 @@ data: steps_per_day: 288 test_ratio: 0.2 val_ratio: 0.2 + model: history: 12 horizon: 12 input_dim: 1 num_features: 1 + num_nodes: 207 output_dim: 1 sigma1: 0.1 sigma2: 10 thres1: 0.6 thres2: 0.5 + train: batch_size: 16 debug: false @@ -39,11 +43,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STGODE/NYCBike-InFlow.yaml b/config/STGODE/NYCBike-InFlow.yaml index 68f9e95..c8b1757 100644 --- a/config/STGODE/NYCBike-InFlow.yaml +++ b/config/STGODE/NYCBike-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STGODE seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,20 +13,23 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: history: 24 horizon: 24 input_dim: 1 num_features: 1 + num_nodes: 128 output_dim: 1 sigma1: 0.1 sigma2: 10 thres1: 0.6 thres2: 0.5 + train: batch_size: 32 debug: false @@ -39,11 +43,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STGODE/NYCBike-OutFlow.yaml b/config/STGODE/NYCBike-OutFlow.yaml index a4fabdd..858c455 100644 --- a/config/STGODE/NYCBike-OutFlow.yaml +++ b/config/STGODE/NYCBike-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STGODE seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,20 +13,23 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: history: 24 horizon: 24 input_dim: 1 num_features: 1 + num_nodes: 128 output_dim: 1 sigma1: 0.1 sigma2: 10 thres1: 0.6 thres2: 0.5 + train: batch_size: 32 debug: false @@ -39,11 +43,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STGODE/PEMSD3.yaml b/config/STGODE/PEMSD3.yaml index 11cd3b3..c4b5790 100755 --- a/config/STGODE/PEMSD3.yaml +++ b/config/STGODE/PEMSD3.yaml @@ -23,6 +23,7 @@ model: horizon: 12 input_dim: 1 num_features: 1 + num_nodes: 358 output_dim: 1 sigma1: 0.1 sigma2: 10 @@ -42,11 +43,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STGODE/PEMSD4.yaml b/config/STGODE/PEMSD4.yaml index db52560..ba17e0c 100755 --- a/config/STGODE/PEMSD4.yaml +++ b/config/STGODE/PEMSD4.yaml @@ -23,6 +23,7 @@ model: horizon: 12 input_dim: 1 num_features: 1 + num_nodes: 307 output_dim: 1 sigma1: 0.1 sigma2: 10 @@ -42,11 +43,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STGODE/PEMSD7.yaml b/config/STGODE/PEMSD7.yaml index eadd560..d5f6442 100755 --- a/config/STGODE/PEMSD7.yaml +++ b/config/STGODE/PEMSD7.yaml @@ -23,6 +23,7 @@ model: horizon: 12 input_dim: 1 num_features: 1 + num_nodes: 883 output_dim: 1 sigma1: 0.1 sigma2: 10 @@ -42,11 +43,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STGODE/PEMSD8.yaml b/config/STGODE/PEMSD8.yaml index 70fc37c..19d7e66 100755 --- a/config/STGODE/PEMSD8.yaml +++ b/config/STGODE/PEMSD8.yaml @@ -23,6 +23,7 @@ model: horizon: 12 input_dim: 1 num_features: 1 + num_nodes: 170 output_dim: 1 sigma1: 0.1 sigma2: 10 @@ -42,11 +43,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STGODE/SolarEnergy.yaml b/config/STGODE/SolarEnergy.yaml index 304df8d..1275bdc 100644 --- a/config/STGODE/SolarEnergy.yaml +++ b/config/STGODE/SolarEnergy.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STGODE seed: 2023 + data: batch_size: 16 column_wise: false @@ -16,16 +17,19 @@ data: steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 + model: history: 24 horizon: 24 input_dim: 137 num_features: 137 + num_nodes: 137 output_dim: 137 sigma1: 0.1 sigma2: 10 thres1: 0.6 thres2: 0.5 + train: batch_size: 16 debug: false @@ -39,11 +43,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 137 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STID/AirQuality.yaml b/config/STID/AirQuality.yaml index f8abb05..f499161 100755 --- a/config/STID/AirQuality.yaml +++ b/config/STID/AirQuality.yaml @@ -13,7 +13,7 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 35 + num_nodes: 12 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 @@ -31,7 +31,7 @@ model: input_len: 24 node_dim: 32 num_layer: 3 - num_nodes: 35 + num_nodes: 12 output_dim: 1 output_len: 24 temp_dim_diw: 32 @@ -51,7 +51,7 @@ train: lr_decay_rate: 0.3 lr_decay_step: 1,50,80 lr_init: 0.002 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.001 max_grad_norm: 5 output_dim: 1 diff --git a/config/STID/BJTaxi-InFlow.yaml b/config/STID/BJTaxi-InFlow.yaml index 57b8e7f..59e9501 100644 --- a/config/STID/BJTaxi-InFlow.yaml +++ b/config/STID/BJTaxi-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STID seed: 2023 + data: batch_size: 64 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: batch_size: 64 day_of_week_size: 7 @@ -35,6 +37,7 @@ model: temp_dim_diw: 32 temp_dim_tid: 32 time_of_day_size: 288 + train: batch_size: 64 debug: true @@ -48,7 +51,7 @@ train: lr_decay_rate: 0.3 lr_decay_step: 1,50,80 lr_init: 0.002 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.001 max_grad_norm: 5 output_dim: 1 diff --git a/config/STID/BJTaxi-OutFlow.yaml b/config/STID/BJTaxi-OutFlow.yaml index 4a10026..e2fdf43 100644 --- a/config/STID/BJTaxi-OutFlow.yaml +++ b/config/STID/BJTaxi-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STID seed: 2023 + data: batch_size: 64 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: batch_size: 64 day_of_week_size: 7 @@ -35,6 +37,7 @@ model: temp_dim_diw: 32 temp_dim_tid: 32 time_of_day_size: 288 + train: batch_size: 64 debug: true @@ -48,7 +51,7 @@ train: lr_decay_rate: 0.3 lr_decay_step: 1,50,80 lr_init: 0.002 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.001 max_grad_norm: 5 output_dim: 1 diff --git a/config/STID/BJTaxi_Inflow.yaml b/config/STID/BJTaxi_InFlow.yaml similarity index 97% rename from config/STID/BJTaxi_Inflow.yaml rename to config/STID/BJTaxi_InFlow.yaml index d29f33f..d50ba22 100755 --- a/config/STID/BJTaxi_Inflow.yaml +++ b/config/STID/BJTaxi_InFlow.yaml @@ -51,11 +51,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 1,50,80 lr_init: 0.002 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 1 weight_decay: 0.0001 diff --git a/config/STID/BJTaxi_Outflow.yaml b/config/STID/BJTaxi_OutFlow.yaml similarity index 97% rename from config/STID/BJTaxi_Outflow.yaml rename to config/STID/BJTaxi_OutFlow.yaml index 4c3b344..e2fdf43 100755 --- a/config/STID/BJTaxi_Outflow.yaml +++ b/config/STID/BJTaxi_OutFlow.yaml @@ -51,11 +51,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 1,50,80 lr_init: 0.002 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.001 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 1 weight_decay: 0.0001 diff --git a/config/STID/METR-LA.yaml b/config/STID/METR-LA.yaml index d79894a..7ceb4f0 100755 --- a/config/STID/METR-LA.yaml +++ b/config/STID/METR-LA.yaml @@ -51,11 +51,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 1,50,80 lr_init: 0.002 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 1 weight_decay: 0.0001 diff --git a/config/STID/NYCBike-InFlow.yaml b/config/STID/NYCBike-InFlow.yaml index 81d392d..e509007 100644 --- a/config/STID/NYCBike-InFlow.yaml +++ b/config/STID/NYCBike-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STID seed: 2023 + data: batch_size: 64 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: batch_size: 64 day_of_week_size: 7 @@ -29,12 +31,13 @@ model: input_len: 24 node_dim: 32 num_layer: 3 - num_nodes: 1024 + num_nodes: 128 output_dim: 1 output_len: 24 temp_dim_diw: 32 temp_dim_tid: 32 time_of_day_size: 288 + train: batch_size: 64 debug: true @@ -48,7 +51,7 @@ train: lr_decay_rate: 0.3 lr_decay_step: 1,50,80 lr_init: 0.002 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.001 max_grad_norm: 5 output_dim: 1 diff --git a/config/STID/NYCBike-OutFlow.yaml b/config/STID/NYCBike-OutFlow.yaml index dc305ce..155baf3 100644 --- a/config/STID/NYCBike-OutFlow.yaml +++ b/config/STID/NYCBike-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STID seed: 2023 + data: batch_size: 64 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: batch_size: 64 day_of_week_size: 7 @@ -29,12 +31,13 @@ model: input_len: 24 node_dim: 32 num_layer: 3 - num_nodes: 1024 + num_nodes: 128 output_dim: 1 output_len: 24 temp_dim_diw: 32 temp_dim_tid: 32 time_of_day_size: 288 + train: batch_size: 64 debug: true @@ -48,7 +51,7 @@ train: lr_decay_rate: 0.3 lr_decay_step: 1,50,80 lr_init: 0.002 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.001 max_grad_norm: 5 output_dim: 1 diff --git a/config/STID/NYCBike_Inflow.yaml b/config/STID/NYCBike_InFlow.yaml similarity index 97% rename from config/STID/NYCBike_Inflow.yaml rename to config/STID/NYCBike_InFlow.yaml index e014c20..e509007 100755 --- a/config/STID/NYCBike_Inflow.yaml +++ b/config/STID/NYCBike_InFlow.yaml @@ -51,11 +51,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 1,50,80 lr_init: 0.002 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.001 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 1 weight_decay: 0.0001 diff --git a/config/STID/NYCBike_Outflow.yaml b/config/STID/NYCBike_OutFlow.yaml similarity index 97% rename from config/STID/NYCBike_Outflow.yaml rename to config/STID/NYCBike_OutFlow.yaml index 634600a..155baf3 100755 --- a/config/STID/NYCBike_Outflow.yaml +++ b/config/STID/NYCBike_OutFlow.yaml @@ -51,11 +51,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 1,50,80 lr_init: 0.002 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.001 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 1 weight_decay: 0.0001 diff --git a/config/STID/PEMS-BAY.yaml b/config/STID/PEMS-BAY.yaml index 176c39f..561102d 100755 --- a/config/STID/PEMS-BAY.yaml +++ b/config/STID/PEMS-BAY.yaml @@ -51,7 +51,7 @@ train: lr_decay_rate: 0.3 lr_decay_step: 1,50,80 lr_init: 0.002 - mae_thresh: + mae_thresh: null mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 diff --git a/config/STID/PEMSD4.yaml b/config/STID/PEMSD4.yaml index ddfaf8f..84dee4d 100755 --- a/config/STID/PEMSD4.yaml +++ b/config/STID/PEMSD4.yaml @@ -50,11 +50,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 1,50,80 lr_init: 0.002 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 1 weight_decay: 0.0001 diff --git a/config/STID/SolarEnergy.yaml b/config/STID/SolarEnergy.yaml index 6fa3ad6..0d787c9 100755 --- a/config/STID/SolarEnergy.yaml +++ b/config/STID/SolarEnergy.yaml @@ -51,11 +51,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 1,50,80 lr_init: 0.002 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.001 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 1 weight_decay: 0.0001 diff --git a/config/STIDGCN/AirQuality.yaml b/config/STIDGCN/AirQuality.yaml index 116549e..5af3fd4 100644 --- a/config/STIDGCN/AirQuality.yaml +++ b/config/STIDGCN/AirQuality.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STIDGCN seed: 2023 + data: batch_size: 16 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 6 lag: 24 normalizer: std - num_nodes: 35 + num_nodes: 12 steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 + model: channels: 32 dropout: 0.1 @@ -23,7 +25,9 @@ model: history: 24 horizon: 24 input_dim: 3 + num_nodes: 12 output_dim: 6 + train: batch_size: 16 debug: false @@ -37,11 +41,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 6 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STIDGCN/BJTaxi-InFlow.yaml b/config/STIDGCN/BJTaxi-InFlow.yaml index 06c2aa5..26f8c52 100644 --- a/config/STIDGCN/BJTaxi-InFlow.yaml +++ b/config/STIDGCN/BJTaxi-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STIDGCN seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: channels: 32 dropout: 0.1 @@ -23,7 +25,9 @@ model: history: 24 horizon: 24 input_dim: 3 + num_nodes: 1024 output_dim: 1 + train: batch_size: 32 debug: false @@ -37,11 +41,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STIDGCN/BJTaxi-OutFlow.yaml b/config/STIDGCN/BJTaxi-OutFlow.yaml index dae5ec9..f09fa95 100644 --- a/config/STIDGCN/BJTaxi-OutFlow.yaml +++ b/config/STIDGCN/BJTaxi-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STIDGCN seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: channels: 32 dropout: 0.1 @@ -23,7 +25,9 @@ model: history: 24 horizon: 24 input_dim: 3 + num_nodes: 1024 output_dim: 1 + train: batch_size: 32 debug: false @@ -37,11 +41,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STIDGCN/METR-LA.yaml b/config/STIDGCN/METR-LA.yaml index fac77f5..1022a11 100644 --- a/config/STIDGCN/METR-LA.yaml +++ b/config/STIDGCN/METR-LA.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STIDGCN seed: 2023 + data: batch_size: 16 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 288 test_ratio: 0.2 val_ratio: 0.2 + model: channels: 32 dropout: 0.1 @@ -23,7 +25,9 @@ model: history: 12 horizon: 12 input_dim: 3 + num_nodes: 207 output_dim: 1 + train: batch_size: 16 debug: false @@ -37,11 +41,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STIDGCN/NYCBike-InFlow.yaml b/config/STIDGCN/NYCBike-InFlow.yaml index 1237d5c..df6f976 100644 --- a/config/STIDGCN/NYCBike-InFlow.yaml +++ b/config/STIDGCN/NYCBike-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STIDGCN seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: channels: 32 dropout: 0.1 @@ -23,7 +25,9 @@ model: history: 24 horizon: 24 input_dim: 3 + num_nodes: 128 output_dim: 1 + train: batch_size: 32 debug: false @@ -37,11 +41,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STIDGCN/NYCBike-OutFlow.yaml b/config/STIDGCN/NYCBike-OutFlow.yaml index 3f95335..e7159a2 100644 --- a/config/STIDGCN/NYCBike-OutFlow.yaml +++ b/config/STIDGCN/NYCBike-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STIDGCN seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: channels: 32 dropout: 0.1 @@ -23,7 +25,9 @@ model: history: 24 horizon: 24 input_dim: 3 + num_nodes: 128 output_dim: 1 + train: batch_size: 32 debug: false @@ -37,11 +41,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STIDGCN/PEMSD3.yaml b/config/STIDGCN/PEMSD3.yaml index d4548cc..a1fc024 100644 --- a/config/STIDGCN/PEMSD3.yaml +++ b/config/STIDGCN/PEMSD3.yaml @@ -25,6 +25,7 @@ model: history: 12 horizon: 12 input_dim: 3 + num_nodes: 358 output_dim: 1 train: @@ -40,11 +41,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STIDGCN/PEMSD4.yaml b/config/STIDGCN/PEMSD4.yaml index 35c69be..edd4118 100644 --- a/config/STIDGCN/PEMSD4.yaml +++ b/config/STIDGCN/PEMSD4.yaml @@ -25,6 +25,7 @@ model: history: 12 horizon: 12 input_dim: 3 + num_nodes: 307 output_dim: 1 train: @@ -40,11 +41,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STIDGCN/PEMSD7.yaml b/config/STIDGCN/PEMSD7.yaml index ba92d98..942ba1b 100644 --- a/config/STIDGCN/PEMSD7.yaml +++ b/config/STIDGCN/PEMSD7.yaml @@ -25,6 +25,7 @@ model: history: 12 horizon: 12 input_dim: 3 + num_nodes: 883 output_dim: 1 train: @@ -40,11 +41,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STIDGCN/PEMSD8.yaml b/config/STIDGCN/PEMSD8.yaml index fca7310..071ab05 100644 --- a/config/STIDGCN/PEMSD8.yaml +++ b/config/STIDGCN/PEMSD8.yaml @@ -25,6 +25,7 @@ model: history: 12 horizon: 12 input_dim: 3 + num_nodes: 170 output_dim: 1 train: @@ -40,11 +41,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STIDGCN/SolarEnergy.yaml b/config/STIDGCN/SolarEnergy.yaml index 243b0f8..e4d66ba 100644 --- a/config/STIDGCN/SolarEnergy.yaml +++ b/config/STIDGCN/SolarEnergy.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STIDGCN seed: 2023 + data: batch_size: 16 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 + model: channels: 32 dropout: 0.1 @@ -23,7 +25,9 @@ model: history: 24 horizon: 24 input_dim: 3 + num_nodes: 137 output_dim: 137 + train: batch_size: 16 debug: false @@ -37,11 +41,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 137 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STMLP/AirQuality.yaml b/config/STMLP/AirQuality.yaml index 7166af6..d3bbdb9 100644 --- a/config/STMLP/AirQuality.yaml +++ b/config/STMLP/AirQuality.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STMLP seed: 2023 + data: batch_size: 16 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 6 lag: 24 normalizer: std - num_nodes: 35 + num_nodes: 12 steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 + model: buildA_true: true conv_channels: 32 @@ -30,6 +32,7 @@ model: layers: 3 model_type: stmlp node_dim: 40 + num_nodes: 12 num_split: 1 output_dim: 6 output_window: 24 @@ -42,6 +45,7 @@ model: tanhalpha: 3 task_level: 0 use_curriculum_learning: true + train: batch_size: 16 debug: false @@ -55,12 +59,11 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 6 plot: false real_value: true - seed: 10 teacher_stu: true weight_decay: 0 diff --git a/config/STMLP/BJTaxi-InFlow.yaml b/config/STMLP/BJTaxi-InFlow.yaml index 9b90e0d..1dacb06 100644 --- a/config/STMLP/BJTaxi-InFlow.yaml +++ b/config/STMLP/BJTaxi-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STMLP seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: buildA_true: true conv_channels: 32 @@ -30,6 +32,7 @@ model: layers: 3 model_type: stmlp node_dim: 40 + num_nodes: 1024 num_split: 1 output_dim: 1 output_window: 24 @@ -42,6 +45,7 @@ model: tanhalpha: 3 task_level: 0 use_curriculum_learning: true + train: batch_size: 32 debug: false @@ -55,12 +59,11 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 teacher_stu: true weight_decay: 0 diff --git a/config/STMLP/BJTaxi-OutFlow.yaml b/config/STMLP/BJTaxi-OutFlow.yaml index cf499e3..5b34c75 100644 --- a/config/STMLP/BJTaxi-OutFlow.yaml +++ b/config/STMLP/BJTaxi-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STMLP seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: buildA_true: true conv_channels: 32 @@ -30,6 +32,7 @@ model: layers: 3 model_type: stmlp node_dim: 40 + num_nodes: 1024 num_split: 1 output_dim: 1 output_window: 24 @@ -42,6 +45,7 @@ model: tanhalpha: 3 task_level: 0 use_curriculum_learning: true + train: batch_size: 32 debug: false @@ -55,12 +59,11 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 teacher_stu: true weight_decay: 0 diff --git a/config/STMLP/METR-LA.yaml b/config/STMLP/METR-LA.yaml index 5313959..426dec7 100644 --- a/config/STMLP/METR-LA.yaml +++ b/config/STMLP/METR-LA.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STMLP seed: 2023 + data: batch_size: 16 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 288 test_ratio: 0.2 val_ratio: 0.2 + model: buildA_true: true conv_channels: 32 @@ -30,6 +32,7 @@ model: layers: 3 model_type: stmlp node_dim: 40 + num_nodes: 207 num_split: 1 output_dim: 1 output_window: 12 @@ -42,6 +45,7 @@ model: tanhalpha: 3 task_level: 0 use_curriculum_learning: true + train: batch_size: 16 debug: false @@ -55,12 +59,11 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 teacher_stu: true weight_decay: 0 diff --git a/config/STMLP/NYCBike-InFlow.yaml b/config/STMLP/NYCBike-InFlow.yaml index 053deab..ccbc983 100644 --- a/config/STMLP/NYCBike-InFlow.yaml +++ b/config/STMLP/NYCBike-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STMLP seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: buildA_true: true conv_channels: 32 @@ -30,6 +32,7 @@ model: layers: 3 model_type: stmlp node_dim: 40 + num_nodes: 128 num_split: 1 output_dim: 1 output_window: 24 @@ -42,6 +45,7 @@ model: tanhalpha: 3 task_level: 0 use_curriculum_learning: true + train: batch_size: 32 debug: false @@ -55,12 +59,11 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 teacher_stu: true weight_decay: 0 diff --git a/config/STMLP/NYCBike-OutFlow.yaml b/config/STMLP/NYCBike-OutFlow.yaml index 0a920cc..a709a21 100644 --- a/config/STMLP/NYCBike-OutFlow.yaml +++ b/config/STMLP/NYCBike-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STMLP seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: buildA_true: true conv_channels: 32 @@ -30,6 +32,7 @@ model: layers: 3 model_type: stmlp node_dim: 40 + num_nodes: 128 num_split: 1 output_dim: 1 output_window: 24 @@ -42,6 +45,7 @@ model: tanhalpha: 3 task_level: 0 use_curriculum_learning: true + train: batch_size: 32 debug: false @@ -55,12 +59,11 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 teacher_stu: true weight_decay: 0 diff --git a/config/STMLP/PEMSD3.yaml b/config/STMLP/PEMSD3.yaml index 55372aa..1bbaaad 100644 --- a/config/STMLP/PEMSD3.yaml +++ b/config/STMLP/PEMSD3.yaml @@ -32,6 +32,7 @@ model: layers: 3 model_type: stmlp node_dim: 40 + num_nodes: 358 num_split: 1 output_dim: 1 output_window: 12 @@ -58,12 +59,11 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 teacher_stu: true weight_decay: 0 diff --git a/config/STMLP/PEMSD4.yaml b/config/STMLP/PEMSD4.yaml index 780bf77..f90156b 100644 --- a/config/STMLP/PEMSD4.yaml +++ b/config/STMLP/PEMSD4.yaml @@ -32,6 +32,7 @@ model: layers: 3 model_type: stmlp node_dim: 40 + num_nodes: 307 num_split: 1 output_dim: 1 output_window: 12 @@ -58,13 +59,12 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 teacher: true teacher_stu: true weight_decay: 0 diff --git a/config/STMLP/PEMSD7.yaml b/config/STMLP/PEMSD7.yaml index bae2ec9..6fb1de2 100644 --- a/config/STMLP/PEMSD7.yaml +++ b/config/STMLP/PEMSD7.yaml @@ -32,6 +32,7 @@ model: layers: 3 model_type: stmlp node_dim: 40 + num_nodes: 883 num_split: 1 output_dim: 1 output_window: 12 @@ -58,12 +59,11 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 teacher_stu: true weight_decay: 0 diff --git a/config/STMLP/PEMSD8.yaml b/config/STMLP/PEMSD8.yaml index 6532996..5858ec5 100644 --- a/config/STMLP/PEMSD8.yaml +++ b/config/STMLP/PEMSD8.yaml @@ -32,6 +32,7 @@ model: layers: 3 model_type: stmlp node_dim: 40 + num_nodes: 170 num_split: 1 output_dim: 1 output_window: 12 @@ -58,12 +59,11 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 teacher_stu: true weight_decay: 0 diff --git a/config/STMLP/SolarEnergy.yaml b/config/STMLP/SolarEnergy.yaml index ca6bc0d..627ba66 100644 --- a/config/STMLP/SolarEnergy.yaml +++ b/config/STMLP/SolarEnergy.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STMLP seed: 2023 + data: batch_size: 16 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 + model: buildA_true: true conv_channels: 32 @@ -30,6 +32,7 @@ model: layers: 3 model_type: stmlp node_dim: 40 + num_nodes: 137 num_split: 1 output_dim: 137 output_window: 24 @@ -42,6 +45,7 @@ model: tanhalpha: 3 task_level: 0 use_curriculum_learning: true + train: batch_size: 16 debug: false @@ -55,12 +59,11 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 137 plot: false real_value: true - seed: 10 teacher_stu: true weight_decay: 0 diff --git a/config/STNorm/AirQuality.yaml b/config/STNorm/AirQuality.yaml new file mode 100644 index 0000000..384633d --- /dev/null +++ b/config/STNorm/AirQuality.yaml @@ -0,0 +1,52 @@ +basic: + dataset: AirQuality + device: cuda:0 + mode: train + model: STNorm + seed: 2023 + +data: + batch_size: 64 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 6 + lag: 24 + normalizer: std + num_nodes: 35 + steps_per_day: 24 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + dropout: 0.2 + blocks: 2 + layers: 2 + snorm_bool: True + tnorm_bool: True + num_nodes: 35 + in_dim: 6 + out_dim: 6 + channels: 32 + kernel_size: 2 + +train: + batch_size: 64 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.003 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 6 + plot: false + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/STNorm/BJTaxi-InFlow.yaml b/config/STNorm/BJTaxi-InFlow.yaml new file mode 100644 index 0000000..13130be --- /dev/null +++ b/config/STNorm/BJTaxi-InFlow.yaml @@ -0,0 +1,52 @@ +basic: + dataset: BJTaxi-InFlow + device: cuda:0 + mode: train + model: STNorm + seed: 2023 + +data: + batch_size: 64 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 1024 + steps_per_day: 48 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + dropout: 0.2 + blocks: 2 + layers: 2 + snorm_bool: True + tnorm_bool: True + num_nodes: 1024 + in_dim: 1 + out_dim: 1 + channels: 32 + kernel_size: 2 + +train: + batch_size: 64 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.003 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/STNorm/BJTaxi-OutFlow.yaml b/config/STNorm/BJTaxi-OutFlow.yaml new file mode 100644 index 0000000..fec550a --- /dev/null +++ b/config/STNorm/BJTaxi-OutFlow.yaml @@ -0,0 +1,52 @@ +basic: + dataset: BJTaxi-OutFlow + device: cuda:0 + mode: train + model: STNorm + seed: 2023 + +data: + batch_size: 64 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 1024 + steps_per_day: 48 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + dropout: 0.2 + blocks: 2 + layers: 2 + snorm_bool: True + tnorm_bool: True + num_nodes: 1024 + in_dim: 1 + out_dim: 1 + channels: 32 + kernel_size: 2 + +train: + batch_size: 64 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.003 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/STNorm/METR-LA.yaml b/config/STNorm/METR-LA.yaml new file mode 100644 index 0000000..f48c978 --- /dev/null +++ b/config/STNorm/METR-LA.yaml @@ -0,0 +1,52 @@ +basic: + dataset: METR-LA + device: cuda:0 + mode: train + model: STNorm + seed: 2023 + +data: + batch_size: 16 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 207 + steps_per_day: 288 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + dropout: 0.2 + blocks: 2 + layers: 2 + snorm_bool: True + tnorm_bool: True + num_nodes: 207 + in_dim: 1 + out_dim: 1 + channels: 32 + kernel_size: 2 + +train: + batch_size: 16 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.003 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/STNorm/NYCBike-InFlow.yaml b/config/STNorm/NYCBike-InFlow.yaml new file mode 100644 index 0000000..57ad401 --- /dev/null +++ b/config/STNorm/NYCBike-InFlow.yaml @@ -0,0 +1,52 @@ +basic: + dataset: NYCBike-InFlow + device: cuda:0 + mode: train + model: STNorm + seed: 2023 + +data: + batch_size: 64 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 128 + steps_per_day: 48 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + dropout: 0.2 + blocks: 2 + layers: 2 + snorm_bool: True + tnorm_bool: True + num_nodes: 128 + in_dim: 1 + out_dim: 1 + channels: 32 + kernel_size: 2 + +train: + batch_size: 64 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.003 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/STNorm/NYCBike-OutFlow.yaml b/config/STNorm/NYCBike-OutFlow.yaml new file mode 100644 index 0000000..4f32f0a --- /dev/null +++ b/config/STNorm/NYCBike-OutFlow.yaml @@ -0,0 +1,52 @@ +basic: + dataset: NYCBike-OutFlow + device: cuda:0 + mode: train + model: STNorm + seed: 2023 + +data: + batch_size: 64 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 128 + steps_per_day: 48 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + dropout: 0.2 + blocks: 2 + layers: 2 + snorm_bool: True + tnorm_bool: True + num_nodes: 128 + in_dim: 1 + out_dim: 1 + channels: 32 + kernel_size: 2 + +train: + batch_size: 64 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.003 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/STNorm/PEMS-BAY.yaml b/config/STNorm/PEMS-BAY.yaml new file mode 100644 index 0000000..20f4b5d --- /dev/null +++ b/config/STNorm/PEMS-BAY.yaml @@ -0,0 +1,52 @@ +basic: + dataset: PEMS-BAY + device: cuda:0 + mode: train + model: STNorm + seed: 2023 + +data: + batch_size: 64 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 325 + steps_per_day: 288 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + dropout: 0.2 + blocks: 2 + layers: 2 + snorm_bool: True + tnorm_bool: True + num_nodes: 325 + in_dim: 1 + out_dim: 1 + channels: 32 + kernel_size: 2 + +train: + batch_size: 64 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.003 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/STNorm/SolarEnergy.yaml b/config/STNorm/SolarEnergy.yaml new file mode 100644 index 0000000..d1be59c --- /dev/null +++ b/config/STNorm/SolarEnergy.yaml @@ -0,0 +1,52 @@ +basic: + dataset: SolarEnergy + device: cuda:0 + mode: train + model: STNorm + seed: 2023 + +data: + batch_size: 64 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 137 + steps_per_day: 24 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + dropout: 0.2 + blocks: 2 + layers: 2 + snorm_bool: True + tnorm_bool: True + num_nodes: 137 + in_dim: 1 + out_dim: 1 + channels: 32 + kernel_size: 2 + +train: + batch_size: 64 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.003 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/STSGCN/AirQuality.yaml b/config/STSGCN/AirQuality.yaml index 0b65da7..d9ccba1 100644 --- a/config/STSGCN/AirQuality.yaml +++ b/config/STSGCN/AirQuality.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STSGCN seed: 2023 + data: batch_size: 16 column_wise: false @@ -12,36 +13,39 @@ data: input_dim: 6 lag: 24 normalizer: std - num_nodes: 35 + num_nodes: 12 steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 + model: activation: GLU construct_type: connectivity first_layer_embedding_size: 64 hidden_dims: - - - 64 - - 64 - - 64 - - - 64 - - 64 - - 64 - - - 64 - - 64 - - 64 - - - 64 - - 64 - - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 history: 24 horizon: 24 input_dim: 6 + num_nodes: 12 out_layer_dim: 128 output_dim: 6 spatial_emb: true strides: 3 temporal_emb: true use_mask: true + train: batch_size: 16 debug: false @@ -55,11 +59,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 6 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STSGCN/BJTaxi-InFlow.yaml b/config/STSGCN/BJTaxi-InFlow.yaml index 074da90..ca137d5 100644 --- a/config/STSGCN/BJTaxi-InFlow.yaml +++ b/config/STSGCN/BJTaxi-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STSGCN seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,32 +17,35 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: activation: GLU construct_type: connectivity first_layer_embedding_size: 64 hidden_dims: - - - 64 - - 64 - - 64 - - - 64 - - 64 - - 64 - - - 64 - - 64 - - 64 - - - 64 - - 64 - - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 history: 24 horizon: 24 input_dim: 1 + num_nodes: 1024 out_layer_dim: 128 output_dim: 1 spatial_emb: true strides: 3 temporal_emb: true use_mask: true + train: batch_size: 32 debug: false @@ -55,11 +59,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STSGCN/BJTaxi-OutFlow.yaml b/config/STSGCN/BJTaxi-OutFlow.yaml index 1b395db..460d836 100644 --- a/config/STSGCN/BJTaxi-OutFlow.yaml +++ b/config/STSGCN/BJTaxi-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STSGCN seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,32 +17,35 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: activation: GLU construct_type: connectivity first_layer_embedding_size: 64 hidden_dims: - - - 64 - - 64 - - 64 - - - 64 - - 64 - - 64 - - - 64 - - 64 - - 64 - - - 64 - - 64 - - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 history: 24 horizon: 24 input_dim: 1 + num_nodes: 1024 out_layer_dim: 128 output_dim: 1 spatial_emb: true strides: 3 temporal_emb: true use_mask: true + train: batch_size: 32 debug: false @@ -55,11 +59,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STSGCN/METR-LA.yaml b/config/STSGCN/METR-LA.yaml index cd54e19..898a399 100644 --- a/config/STSGCN/METR-LA.yaml +++ b/config/STSGCN/METR-LA.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STSGCN seed: 2023 + data: batch_size: 16 column_wise: false @@ -16,32 +17,35 @@ data: steps_per_day: 288 test_ratio: 0.2 val_ratio: 0.2 + model: activation: GLU construct_type: connectivity first_layer_embedding_size: 64 hidden_dims: - - - 64 - - 64 - - 64 - - - 64 - - 64 - - 64 - - - 64 - - 64 - - 64 - - - 64 - - 64 - - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 history: 12 horizon: 12 input_dim: 1 + num_nodes: 207 out_layer_dim: 128 output_dim: 1 spatial_emb: true strides: 3 temporal_emb: true use_mask: true + train: batch_size: 16 debug: false @@ -55,11 +59,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STSGCN/NYCBike-InFlow.yaml b/config/STSGCN/NYCBike-InFlow.yaml index 02752c5..143e0fa 100644 --- a/config/STSGCN/NYCBike-InFlow.yaml +++ b/config/STSGCN/NYCBike-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STSGCN seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,36 +13,39 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: activation: GLU construct_type: connectivity first_layer_embedding_size: 64 hidden_dims: - - - 64 - - 64 - - 64 - - - 64 - - 64 - - 64 - - - 64 - - 64 - - 64 - - - 64 - - 64 - - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 history: 24 horizon: 24 input_dim: 1 + num_nodes: 128 out_layer_dim: 128 output_dim: 1 spatial_emb: true strides: 3 temporal_emb: true use_mask: true + train: batch_size: 32 debug: false @@ -55,11 +59,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STSGCN/NYCBike-OutFlow.yaml b/config/STSGCN/NYCBike-OutFlow.yaml index 868d4d1..b78fcb0 100644 --- a/config/STSGCN/NYCBike-OutFlow.yaml +++ b/config/STSGCN/NYCBike-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STSGCN seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,36 +13,39 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: activation: GLU construct_type: connectivity first_layer_embedding_size: 64 hidden_dims: - - - 64 - - 64 - - 64 - - - 64 - - 64 - - 64 - - - 64 - - 64 - - 64 - - - 64 - - 64 - - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 history: 24 horizon: 24 input_dim: 1 + num_nodes: 128 out_layer_dim: 128 output_dim: 1 spatial_emb: true strides: 3 temporal_emb: true use_mask: true + train: batch_size: 32 debug: false @@ -55,11 +59,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STSGCN/PEMSD3.yaml b/config/STSGCN/PEMSD3.yaml index 0b08995..38250e8 100755 --- a/config/STSGCN/PEMSD3.yaml +++ b/config/STSGCN/PEMSD3.yaml @@ -22,10 +22,23 @@ model: activation: GLU construct_type: connectivity first_layer_embedding_size: 64 - hidden_dims: [[64, 64, 64], [64, 64, 64], [64, 64, 64], [64, 64, 64]] + hidden_dims: + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 history: 12 horizon: 12 input_dim: 1 + num_nodes: 358 out_layer_dim: 128 output_dim: 1 spatial_emb: true @@ -46,11 +59,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STSGCN/PEMSD4.yaml b/config/STSGCN/PEMSD4.yaml index b5ef9fe..bfa33c2 100755 --- a/config/STSGCN/PEMSD4.yaml +++ b/config/STSGCN/PEMSD4.yaml @@ -22,10 +22,23 @@ model: activation: GLU construct_type: connectivity first_layer_embedding_size: 64 - hidden_dims: [[64, 64, 64], [64, 64, 64], [64, 64, 64], [64, 64, 64]] + hidden_dims: + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 history: 12 horizon: 12 input_dim: 1 + num_nodes: 307 out_layer_dim: 128 output_dim: 1 spatial_emb: true @@ -46,11 +59,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STSGCN/PEMSD7.yaml b/config/STSGCN/PEMSD7.yaml index c13cde9..e1ef225 100755 --- a/config/STSGCN/PEMSD7.yaml +++ b/config/STSGCN/PEMSD7.yaml @@ -22,10 +22,23 @@ model: activation: GLU construct_type: connectivity first_layer_embedding_size: 64 - hidden_dims: [[64, 64, 64], [64, 64, 64], [64, 64, 64], [64, 64, 64]] + hidden_dims: + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 history: 12 horizon: 12 input_dim: 1 + num_nodes: 883 out_layer_dim: 128 output_dim: 1 spatial_emb: true @@ -46,11 +59,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STSGCN/PEMSD8.yaml b/config/STSGCN/PEMSD8.yaml index df35209..183646d 100755 --- a/config/STSGCN/PEMSD8.yaml +++ b/config/STSGCN/PEMSD8.yaml @@ -22,10 +22,23 @@ model: activation: GLU construct_type: connectivity first_layer_embedding_size: 64 - hidden_dims: [[64, 64, 64], [64, 64, 64], [64, 64, 64], [64, 64, 64]] + hidden_dims: + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 history: 12 horizon: 12 input_dim: 1 + num_nodes: 170 out_layer_dim: 128 output_dim: 1 spatial_emb: true @@ -46,11 +59,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/STSGCN/SolarEnergy.yaml b/config/STSGCN/SolarEnergy.yaml index 9baa783..3f25c2a 100644 --- a/config/STSGCN/SolarEnergy.yaml +++ b/config/STSGCN/SolarEnergy.yaml @@ -4,6 +4,7 @@ basic: mode: train model: STSGCN seed: 2023 + data: batch_size: 16 column_wise: false @@ -16,32 +17,35 @@ data: steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 + model: activation: GLU construct_type: connectivity first_layer_embedding_size: 64 hidden_dims: - - - 64 - - 64 - - 64 - - - 64 - - 64 - - 64 - - - 64 - - 64 - - 64 - - - 64 - - 64 - - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 + - - 64 + - 64 + - 64 history: 24 horizon: 24 input_dim: 137 + num_nodes: 137 out_layer_dim: 128 output_dim: 137 spatial_emb: true strides: 3 temporal_emb: true use_mask: true + train: batch_size: 16 debug: false @@ -55,11 +59,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 137 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/ST_SSL/AirQuality.yaml b/config/ST_SSL/AirQuality.yaml index a1ecc1d..e2459b3 100644 --- a/config/ST_SSL/AirQuality.yaml +++ b/config/ST_SSL/AirQuality.yaml @@ -4,6 +4,7 @@ basic: mode: train model: ST_SSL seed: 2023 + data: batch_size: 16 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 6 lag: 24 normalizer: std - num_nodes: 35 + num_nodes: 12 steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 + model: d_model: 64 dropout: 0.1 @@ -24,10 +26,12 @@ model: input_dim: 6 n_his: 24 nmb_prototype: 10 + num_nodes: 12 output_dim: 6 percent: 0.1 shm_temp: 0.1 yita: 0.5 + train: batch_size: 16 debug: false @@ -41,11 +45,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 6 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/ST_SSL/BJTaxi-InFlow.yaml b/config/ST_SSL/BJTaxi-InFlow.yaml index 4dbf256..6da077f 100644 --- a/config/ST_SSL/BJTaxi-InFlow.yaml +++ b/config/ST_SSL/BJTaxi-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: ST_SSL seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: d_model: 64 dropout: 0.1 @@ -24,10 +26,12 @@ model: input_dim: 1 n_his: 24 nmb_prototype: 10 + num_nodes: 1024 output_dim: 1 percent: 0.1 shm_temp: 0.1 yita: 0.5 + train: batch_size: 32 debug: false @@ -41,11 +45,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/ST_SSL/BJTaxi-OutFlow.yaml b/config/ST_SSL/BJTaxi-OutFlow.yaml index 6801117..969be92 100644 --- a/config/ST_SSL/BJTaxi-OutFlow.yaml +++ b/config/ST_SSL/BJTaxi-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: ST_SSL seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: d_model: 64 dropout: 0.1 @@ -24,10 +26,12 @@ model: input_dim: 1 n_his: 24 nmb_prototype: 10 + num_nodes: 1024 output_dim: 1 percent: 0.1 shm_temp: 0.1 yita: 0.5 + train: batch_size: 32 debug: false @@ -41,11 +45,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/ST_SSL/METR-LA.yaml b/config/ST_SSL/METR-LA.yaml index d80ccb9..7805bc4 100644 --- a/config/ST_SSL/METR-LA.yaml +++ b/config/ST_SSL/METR-LA.yaml @@ -4,6 +4,7 @@ basic: mode: train model: ST_SSL seed: 2023 + data: batch_size: 16 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 288 test_ratio: 0.2 val_ratio: 0.2 + model: d_model: 64 dropout: 0.1 @@ -24,10 +26,12 @@ model: input_dim: 1 n_his: 12 nmb_prototype: 10 + num_nodes: 207 output_dim: 1 percent: 0.1 shm_temp: 0.1 yita: 0.5 + train: batch_size: 16 debug: false @@ -41,11 +45,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/ST_SSL/NYCBike-InFlow.yaml b/config/ST_SSL/NYCBike-InFlow.yaml index 3283cb6..af85c3a 100644 --- a/config/ST_SSL/NYCBike-InFlow.yaml +++ b/config/ST_SSL/NYCBike-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: ST_SSL seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: d_model: 64 dropout: 0.1 @@ -24,10 +26,12 @@ model: input_dim: 1 n_his: 24 nmb_prototype: 10 + num_nodes: 128 output_dim: 1 percent: 0.1 shm_temp: 0.1 yita: 0.5 + train: batch_size: 32 debug: false @@ -41,11 +45,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/ST_SSL/NYCBike-OutFlow.yaml b/config/ST_SSL/NYCBike-OutFlow.yaml index 3a3e06f..e3b0c3c 100644 --- a/config/ST_SSL/NYCBike-OutFlow.yaml +++ b/config/ST_SSL/NYCBike-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: ST_SSL seed: 2023 + data: batch_size: 32 column_wise: false @@ -12,10 +13,11 @@ data: input_dim: 1 lag: 24 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: d_model: 64 dropout: 0.1 @@ -24,10 +26,12 @@ model: input_dim: 1 n_his: 24 nmb_prototype: 10 + num_nodes: 128 output_dim: 1 percent: 0.1 shm_temp: 0.1 yita: 0.5 + train: batch_size: 32 debug: false @@ -41,11 +45,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/ST_SSL/PEMSD3.yaml b/config/ST_SSL/PEMSD3.yaml index 70dc619..c9c933c 100644 --- a/config/ST_SSL/PEMSD3.yaml +++ b/config/ST_SSL/PEMSD3.yaml @@ -26,6 +26,7 @@ model: input_dim: 1 n_his: 12 nmb_prototype: 10 + num_nodes: 358 output_dim: 1 percent: 0.1 shm_temp: 0.1 @@ -44,11 +45,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/ST_SSL/PEMSD4.yaml b/config/ST_SSL/PEMSD4.yaml index 2aab8c8..7a53a20 100644 --- a/config/ST_SSL/PEMSD4.yaml +++ b/config/ST_SSL/PEMSD4.yaml @@ -26,6 +26,7 @@ model: input_dim: 1 n_his: 12 nmb_prototype: 10 + num_nodes: 307 output_dim: 1 percent: 0.1 shm_temp: 0.1 @@ -44,11 +45,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/ST_SSL/PEMSD7.yaml b/config/ST_SSL/PEMSD7.yaml index 4ec9986..75019a3 100644 --- a/config/ST_SSL/PEMSD7.yaml +++ b/config/ST_SSL/PEMSD7.yaml @@ -26,6 +26,7 @@ model: input_dim: 1 n_his: 12 nmb_prototype: 10 + num_nodes: 883 output_dim: 1 percent: 0.1 shm_temp: 0.1 @@ -44,11 +45,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/ST_SSL/PEMSD8.yaml b/config/ST_SSL/PEMSD8.yaml index 1129639..9aeb16f 100644 --- a/config/ST_SSL/PEMSD8.yaml +++ b/config/ST_SSL/PEMSD8.yaml @@ -26,6 +26,7 @@ model: input_dim: 1 n_his: 12 nmb_prototype: 10 + num_nodes: 170 output_dim: 1 percent: 0.1 shm_temp: 0.1 @@ -44,11 +45,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/ST_SSL/SolarEnergy.yaml b/config/ST_SSL/SolarEnergy.yaml index cbe6d71..2752c44 100644 --- a/config/ST_SSL/SolarEnergy.yaml +++ b/config/ST_SSL/SolarEnergy.yaml @@ -4,6 +4,7 @@ basic: mode: train model: ST_SSL seed: 2023 + data: batch_size: 16 column_wise: false @@ -16,6 +17,7 @@ data: steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 + model: d_model: 64 dropout: 0.1 @@ -24,10 +26,12 @@ model: input_dim: 137 n_his: 24 nmb_prototype: 10 + num_nodes: 137 output_dim: 137 percent: 0.1 shm_temp: 0.1 yita: 0.5 + train: batch_size: 16 debug: false @@ -41,11 +45,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 137 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/TCN/AirQuality.yaml b/config/TCN/AirQuality.yaml index 3b19a78..c04eeac 100644 --- a/config/TCN/AirQuality.yaml +++ b/config/TCN/AirQuality.yaml @@ -13,7 +13,7 @@ data: input_dim: 6 lag: 24 normalizer: std - num_nodes: 35 + num_nodes: 12 steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 @@ -21,10 +21,14 @@ data: model: batch_size: 16 dropout: 0.2 - hidden_channels: [32, 64, 32] + hidden_channels: + - 32 + - 64 + - 32 input_dim: 6 kernel_size: 3 num_layers: 3 + num_nodes: 12 output_dim: 6 train: @@ -40,11 +44,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 6 plot: false real_value: true - seed: 10 - weight_decay: 0 \ No newline at end of file + weight_decay: 0 diff --git a/config/TCN/BJTaxi-InFlow.yaml b/config/TCN/BJTaxi-InFlow.yaml index 68cf5e3..c49b1dc 100644 --- a/config/TCN/BJTaxi-InFlow.yaml +++ b/config/TCN/BJTaxi-InFlow.yaml @@ -21,10 +21,14 @@ data: model: batch_size: 32 dropout: 0.2 - hidden_channels: [32, 64, 32] + hidden_channels: + - 32 + - 64 + - 32 input_dim: 1 kernel_size: 3 num_layers: 3 + num_nodes: 1024 output_dim: 1 train: @@ -40,11 +44,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 - weight_decay: 0 \ No newline at end of file + weight_decay: 0 diff --git a/config/TCN/BJTaxi-OutFlow.yaml b/config/TCN/BJTaxi-OutFlow.yaml index 377bb5b..077c9a3 100644 --- a/config/TCN/BJTaxi-OutFlow.yaml +++ b/config/TCN/BJTaxi-OutFlow.yaml @@ -21,10 +21,14 @@ data: model: batch_size: 32 dropout: 0.2 - hidden_channels: [32, 64, 32] + hidden_channels: + - 32 + - 64 + - 32 input_dim: 1 kernel_size: 3 num_layers: 3 + num_nodes: 1024 output_dim: 1 train: @@ -40,11 +44,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 - weight_decay: 0 \ No newline at end of file + weight_decay: 0 diff --git a/config/TCN/METR-LA.yaml b/config/TCN/METR-LA.yaml index 10fc585..a588114 100644 --- a/config/TCN/METR-LA.yaml +++ b/config/TCN/METR-LA.yaml @@ -21,10 +21,14 @@ data: model: batch_size: 16 dropout: 0.2 - hidden_channels: [32, 64, 32] + hidden_channels: + - 32 + - 64 + - 32 input_dim: 1 kernel_size: 3 num_layers: 3 + num_nodes: 207 output_dim: 1 train: @@ -40,11 +44,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 - weight_decay: 0 \ No newline at end of file + weight_decay: 0 diff --git a/config/TCN/NYCBike-InFlow.yaml b/config/TCN/NYCBike-InFlow.yaml index cd5242c..f5005d0 100644 --- a/config/TCN/NYCBike-InFlow.yaml +++ b/config/TCN/NYCBike-InFlow.yaml @@ -21,10 +21,14 @@ data: model: batch_size: 32 dropout: 0.2 - hidden_channels: [32, 64, 32] + hidden_channels: + - 32 + - 64 + - 32 input_dim: 1 kernel_size: 3 num_layers: 3 + num_nodes: 128 output_dim: 1 train: @@ -40,11 +44,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 - weight_decay: 0 \ No newline at end of file + weight_decay: 0 diff --git a/config/TCN/NYCBike-OutFlow.yaml b/config/TCN/NYCBike-OutFlow.yaml index 761431b..0d06c3b 100644 --- a/config/TCN/NYCBike-OutFlow.yaml +++ b/config/TCN/NYCBike-OutFlow.yaml @@ -21,10 +21,14 @@ data: model: batch_size: 32 dropout: 0.2 - hidden_channels: [32, 64, 32] + hidden_channels: + - 32 + - 64 + - 32 input_dim: 1 kernel_size: 3 num_layers: 3 + num_nodes: 128 output_dim: 1 train: @@ -40,11 +44,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 - weight_decay: 0 \ No newline at end of file + weight_decay: 0 diff --git a/config/TCN/PEMSD3.yaml b/config/TCN/PEMSD3.yaml index 47a59f2..00396e0 100755 --- a/config/TCN/PEMSD3.yaml +++ b/config/TCN/PEMSD3.yaml @@ -21,10 +21,14 @@ data: model: batch_size: 64 dropout: 0.2 - hidden_channels: [32, 64, 32] + hidden_channels: + - 32 + - 64 + - 32 input_dim: 1 kernel_size: 3 num_layers: 3 + num_nodes: 358 output_dim: 1 train: @@ -40,11 +44,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/TCN/PEMSD4.yaml b/config/TCN/PEMSD4.yaml index 810859f..552df9e 100755 --- a/config/TCN/PEMSD4.yaml +++ b/config/TCN/PEMSD4.yaml @@ -21,10 +21,14 @@ data: model: batch_size: 64 dropout: 0.2 - hidden_channels: [32, 64, 32] + hidden_channels: + - 32 + - 64 + - 32 input_dim: 1 kernel_size: 3 num_layers: 3 + num_nodes: 307 output_dim: 1 train: @@ -40,11 +44,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/TCN/PEMSD7.yaml b/config/TCN/PEMSD7.yaml index 6436803..ca414f2 100755 --- a/config/TCN/PEMSD7.yaml +++ b/config/TCN/PEMSD7.yaml @@ -21,10 +21,14 @@ data: model: batch_size: 64 dropout: 0.2 - hidden_channels: [32, 64, 32] + hidden_channels: + - 32 + - 64 + - 32 input_dim: 1 kernel_size: 3 num_layers: 3 + num_nodes: 883 output_dim: 1 train: @@ -40,11 +44,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/TCN/PEMSD8.yaml b/config/TCN/PEMSD8.yaml index d47fdc5..e0f4761 100755 --- a/config/TCN/PEMSD8.yaml +++ b/config/TCN/PEMSD8.yaml @@ -21,10 +21,14 @@ data: model: batch_size: 64 dropout: 0.2 - hidden_channels: [32, 64, 32] + hidden_channels: + - 32 + - 64 + - 32 input_dim: 1 kernel_size: 3 num_layers: 3 + num_nodes: 170 output_dim: 1 train: @@ -40,11 +44,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/TCN/SolarEnergy.yaml b/config/TCN/SolarEnergy.yaml index d620185..b12c1bf 100644 --- a/config/TCN/SolarEnergy.yaml +++ b/config/TCN/SolarEnergy.yaml @@ -21,10 +21,14 @@ data: model: batch_size: 64 dropout: 0.2 - hidden_channels: [32, 64, 32] + hidden_channels: + - 32 + - 64 + - 32 input_dim: 1 kernel_size: 3 num_layers: 3 + num_nodes: 137 output_dim: 1 train: @@ -40,11 +44,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.001 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 - weight_decay: 0 \ No newline at end of file + weight_decay: 0 diff --git a/config/TWDGCN/AirQuality.yaml b/config/TWDGCN/AirQuality.yaml index b57aa40..6cef32e 100644 --- a/config/TWDGCN/AirQuality.yaml +++ b/config/TWDGCN/AirQuality.yaml @@ -4,29 +4,34 @@ basic: mode: train model: TWDGCN seed: 2023 + data: - batch_size: 16 + batch_size: 64 column_wise: false days_per_week: 7 horizon: 24 - input_dim: 6 + input_dim: 1 lag: 24 normalizer: std - num_nodes: 35 + num_nodes: 12 steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_order: 2 embed_dim: 12 - input_dim: 6 + horizon: 24 + input_dim: 1 num_layers: 1 - output_dim: 6 + num_nodes: 12 + output_dim: 1 rnn_units: 64 use_day: true use_week: false + train: - batch_size: 16 + batch_size: 64 debug: false early_stop: true early_stop_patience: 15 @@ -38,11 +43,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.001 mape_thresh: 0.0 max_grad_norm: 5 - output_dim: 6 + output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/TWDGCN/BJTaxi-InFlow.yaml b/config/TWDGCN/BJTaxi-InFlow.yaml index 1ee9c33..63b28d1 100644 --- a/config/TWDGCN/BJTaxi-InFlow.yaml +++ b/config/TWDGCN/BJTaxi-InFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: TWDGCN seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,15 +17,19 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_order: 2 embed_dim: 12 + horizon: 24 input_dim: 1 num_layers: 1 + num_nodes: 1024 output_dim: 1 rnn_units: 64 use_day: true use_week: false + train: batch_size: 32 debug: false @@ -38,11 +43,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/TWDGCN/BJTaxi-OutFlow.yaml b/config/TWDGCN/BJTaxi-OutFlow.yaml index bb2933b..feb1636 100644 --- a/config/TWDGCN/BJTaxi-OutFlow.yaml +++ b/config/TWDGCN/BJTaxi-OutFlow.yaml @@ -4,6 +4,7 @@ basic: mode: train model: TWDGCN seed: 2023 + data: batch_size: 32 column_wise: false @@ -16,15 +17,19 @@ data: steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_order: 2 embed_dim: 12 + horizon: 24 input_dim: 1 num_layers: 1 + num_nodes: 1024 output_dim: 1 rnn_units: 64 use_day: true use_week: false + train: batch_size: 32 debug: false @@ -38,11 +43,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/TWDGCN/Hainan.yaml b/config/TWDGCN/Hainan.yaml index d32a56b..058ca11 100755 --- a/config/TWDGCN/Hainan.yaml +++ b/config/TWDGCN/Hainan.yaml @@ -13,7 +13,7 @@ data: input_dim: 1 lag: 12 normalizer: std - num_nodes: 13 + num_nodes: 200 steps_per_day: 288 test_ratio: 0.2 val_ratio: 0.2 @@ -25,6 +25,7 @@ model: horizon: 12 input_dim: 1 num_layers: 1 + num_nodes: 200 output_dim: 1 rnn_units: 32 use_day: true @@ -47,7 +48,7 @@ train: - 40 - 70 lr_init: 0.003 - mae_thresh: + mae_thresh: null mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 diff --git a/config/TWDGCN/METR-LA.yaml b/config/TWDGCN/METR-LA.yaml index 42eb251..29015a7 100644 --- a/config/TWDGCN/METR-LA.yaml +++ b/config/TWDGCN/METR-LA.yaml @@ -4,27 +4,32 @@ basic: mode: train model: TWDGCN seed: 2023 + data: batch_size: 16 column_wise: false days_per_week: 7 - horizon: 12 + horizon: 24 input_dim: 1 - lag: 12 + lag: 24 normalizer: std num_nodes: 207 steps_per_day: 288 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_order: 2 embed_dim: 12 + horizon: 24 input_dim: 1 num_layers: 1 + num_nodes: 207 output_dim: 1 rnn_units: 64 use_day: true use_week: false + train: batch_size: 16 debug: false @@ -38,11 +43,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/TWDGCN/NYCBike-InFlow.yaml b/config/TWDGCN/NYCBike-InFlow.yaml index 060bdeb..0ca0c1d 100644 --- a/config/TWDGCN/NYCBike-InFlow.yaml +++ b/config/TWDGCN/NYCBike-InFlow.yaml @@ -4,27 +4,32 @@ basic: mode: train model: TWDGCN seed: 2023 + data: batch_size: 32 column_wise: false days_per_week: 7 - horizon: 24 + horizon: 12 input_dim: 1 - lag: 24 + lag: 12 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_order: 2 embed_dim: 12 + horizon: 12 input_dim: 1 num_layers: 1 + num_nodes: 128 output_dim: 1 rnn_units: 64 use_day: true use_week: false + train: batch_size: 32 debug: false @@ -38,11 +43,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' - mape_thresh: 0.0 + mae_thresh: 0.0 + mape_thresh: 0.001 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/TWDGCN/NYCBike-OutFlow.yaml b/config/TWDGCN/NYCBike-OutFlow.yaml index fd50df1..7226490 100644 --- a/config/TWDGCN/NYCBike-OutFlow.yaml +++ b/config/TWDGCN/NYCBike-OutFlow.yaml @@ -4,27 +4,32 @@ basic: mode: train model: TWDGCN seed: 2023 + data: batch_size: 32 column_wise: false days_per_week: 7 - horizon: 24 + horizon: 12 input_dim: 1 - lag: 24 + lag: 12 normalizer: std - num_nodes: 1024 + num_nodes: 128 steps_per_day: 48 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_order: 2 embed_dim: 12 + horizon: 12 input_dim: 1 num_layers: 1 + num_nodes: 128 output_dim: 1 rnn_units: 64 use_day: true use_week: false + train: batch_size: 32 debug: false @@ -38,11 +43,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' - mape_thresh: 0.0 + mae_thresh: 0.0 + mape_thresh: 0.001 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/TWDGCN/PEMSD3.yaml b/config/TWDGCN/PEMSD3.yaml index 7227a76..04a0311 100755 --- a/config/TWDGCN/PEMSD3.yaml +++ b/config/TWDGCN/PEMSD3.yaml @@ -21,8 +21,10 @@ data: model: cheb_order: 2 embed_dim: 12 + horizon: 12 input_dim: 1 num_layers: 1 + num_nodes: 358 output_dim: 1 rnn_units: 64 use_day: true @@ -41,11 +43,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: + mae_thresh: 0.0 mape_thresh: 0.0 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/TWDGCN/PEMSD4.yaml b/config/TWDGCN/PEMSD4.yaml index 22d540b..131c972 100755 --- a/config/TWDGCN/PEMSD4.yaml +++ b/config/TWDGCN/PEMSD4.yaml @@ -21,8 +21,10 @@ data: model: cheb_order: 2 embed_dim: 12 + horizon: 12 input_dim: 1 num_layers: 1 + num_nodes: 307 output_dim: 1 rnn_units: 64 use_day: true @@ -41,11 +43,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: - mape_thresh: 0.0 + mae_thresh: 0.0 + mape_thresh: 0.001 max_grad_norm: 5 output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/TWDGCN/PEMSD7(L).yaml b/config/TWDGCN/PEMSD7(L).yaml index f83a8ad..1cb5f73 100755 --- a/config/TWDGCN/PEMSD7(L).yaml +++ b/config/TWDGCN/PEMSD7(L).yaml @@ -51,5 +51,4 @@ train: output_dim: 1 plot: true real_value: true - seed: 12 weight_decay: 0 diff --git a/config/TWDGCN/PEMSD7(M).yaml b/config/TWDGCN/PEMSD7(M).yaml index c1bcd2f..19fc9a8 100755 --- a/config/TWDGCN/PEMSD7(M).yaml +++ b/config/TWDGCN/PEMSD7(M).yaml @@ -51,5 +51,4 @@ train: output_dim: 1 plot: true real_value: true - seed: 12 weight_decay: 0 diff --git a/config/TWDGCN/PEMSD7.yaml b/config/TWDGCN/PEMSD7.yaml index 6854017..6861724 100755 --- a/config/TWDGCN/PEMSD7.yaml +++ b/config/TWDGCN/PEMSD7.yaml @@ -21,8 +21,10 @@ data: model: cheb_order: 2 embed_dim: 12 + horizon: 12 input_dim: 1 num_layers: 1 + num_nodes: 883 output_dim: 1 rnn_units: 64 use_day: true @@ -51,5 +53,4 @@ train: output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/TWDGCN/PEMSD8.yaml b/config/TWDGCN/PEMSD8.yaml index 857b9eb..37cceea 100755 --- a/config/TWDGCN/PEMSD8.yaml +++ b/config/TWDGCN/PEMSD8.yaml @@ -21,8 +21,10 @@ data: model: cheb_order: 2 embed_dim: 12 + horizon: 12 input_dim: 1 num_layers: 1 + num_nodes: 170 output_dim: 1 rnn_units: 64 use_day: true @@ -47,5 +49,4 @@ train: output_dim: 1 plot: false real_value: true - seed: 3407 weight_decay: 0 diff --git a/config/TWDGCN/SolarEnergy.yaml b/config/TWDGCN/SolarEnergy.yaml index 2403f5c..da5d9db 100644 --- a/config/TWDGCN/SolarEnergy.yaml +++ b/config/TWDGCN/SolarEnergy.yaml @@ -4,29 +4,34 @@ basic: mode: train model: TWDGCN seed: 2023 + data: - batch_size: 16 + batch_size: 64 column_wise: false days_per_week: 7 horizon: 24 - input_dim: 137 + input_dim: 1 lag: 24 normalizer: std num_nodes: 137 steps_per_day: 24 test_ratio: 0.2 val_ratio: 0.2 + model: cheb_order: 2 embed_dim: 12 - input_dim: 137 + horizon: 24 + input_dim: 1 num_layers: 1 - output_dim: 137 + num_nodes: 137 + output_dim: 1 rnn_units: 64 use_day: true use_week: false + train: - batch_size: 16 + batch_size: 64 debug: false early_stop: true early_stop_patience: 15 @@ -38,11 +43,10 @@ train: lr_decay_rate: 0.3 lr_decay_step: 5,20,40,70 lr_init: 0.003 - mae_thresh: '' - mape_thresh: 0.0 + mae_thresh: 0.0 + mape_thresh: 0.001 max_grad_norm: 5 - output_dim: 137 + output_dim: 1 plot: false real_value: true - seed: 10 weight_decay: 0 diff --git a/config/args_parser.py b/config/args_parser.py index ebd7bda..256c1f7 100755 --- a/config/args_parser.py +++ b/config/args_parser.py @@ -15,39 +15,5 @@ def parse_args(): config = yaml.safe_load(file) else: raise ValueError("Configuration file path must be provided using --config") - - # Update configuration with command-line arguments - # Merge 'basic' configuration into the root dictionary - # config.update(config.get('basic', {})) - - # Add adaptive configuration based on external commands - if "data" in config and "type" in config["data"]: - config["data"]["type"] = config["basic"].get("dataset", config["data"]["type"]) - if "model" in config and "type" in config["model"]: - config["model"]["type"] = config["basic"].get("model", config["model"]["type"]) - if "model" in config and "rnn_units" in config["model"]: - config["model"]["rnn_units"] = config["basic"].get( - "rnn", config["model"]["rnn_units"] - ) - if "model" in config and "embed_dim" in config["model"]: - config["model"]["embed_dim"] = config["basic"].get( - "emb", config["model"]["embed_dim"] - ) - if "data" in config and "sample" in config["data"]: - config["data"]["sample"] = config["basic"].get( - "sample", config["data"]["sample"] - ) - if "train" in config and "device" in config["train"]: - config["train"]["device"] = config["basic"].get( - "device", config["train"]["device"] - ) - if "train" in config and "debug" in config["train"]: - config["train"]["debug"] = config["basic"].get( - "debug", config["train"]["debug"] - ) - if "cuda" in config: - config["cuda"] = config["basic"].get("cuda", config["cuda"]) - if "mode" in config: - config["mode"] = config["basic"].get("mode", config["mode"]) - + return config diff --git a/config/iTransformer/AirQuality.yaml b/config/iTransformer/AirQuality.yaml new file mode 100644 index 0000000..b27d72c --- /dev/null +++ b/config/iTransformer/AirQuality.yaml @@ -0,0 +1,52 @@ +basic: + dataset: AirQuality + device: cuda:0 + mode: train + model: iTransformer + seed: 2023 + +data: + batch_size: 16 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 6 + lag: 24 + normalizer: std + num_nodes: 35 + steps_per_day: 24 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + activation: gelu + seq_len: 24 + pred_len: 24 + d_model: 128 + d_ff: 2048 + dropout: 0.1 + e_layers: 2 + n_heads: 8 + output_attention: False + + +train: + batch_size: 16 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.0001 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 6 + plot: false + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/iTransformer/BJTaxi-InFlow.yaml b/config/iTransformer/BJTaxi-InFlow.yaml new file mode 100644 index 0000000..1df1a67 --- /dev/null +++ b/config/iTransformer/BJTaxi-InFlow.yaml @@ -0,0 +1,52 @@ +basic: + dataset: BJTaxi-InFlow + device: cuda:0 + mode: train + model: iTransformer + seed: 2023 + +data: + batch_size: 16 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 1024 + steps_per_day: 48 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + activation: gelu + seq_len: 24 + pred_len: 24 + d_model: 128 + d_ff: 2048 + dropout: 0.1 + e_layers: 2 + n_heads: 8 + output_attention: False + + +train: + batch_size: 16 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.0001 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/iTransformer/BJTaxi-OutFlow.yaml b/config/iTransformer/BJTaxi-OutFlow.yaml new file mode 100644 index 0000000..8da0e92 --- /dev/null +++ b/config/iTransformer/BJTaxi-OutFlow.yaml @@ -0,0 +1,52 @@ +basic: + dataset: BJTaxi-OutFlow + device: cuda:0 + mode: train + model: iTransformer + seed: 2023 + +data: + batch_size: 16 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 1024 + steps_per_day: 48 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + activation: gelu + seq_len: 24 + pred_len: 24 + d_model: 128 + d_ff: 2048 + dropout: 0.1 + e_layers: 2 + n_heads: 8 + output_attention: False + + +train: + batch_size: 16 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.0001 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/iTransformer/METR-LA.yaml b/config/iTransformer/METR-LA.yaml new file mode 100644 index 0000000..996e44c --- /dev/null +++ b/config/iTransformer/METR-LA.yaml @@ -0,0 +1,52 @@ +basic: + dataset: METR-LA + device: cuda:1 + mode: train + model: iTransformer + seed: 2023 + +data: + batch_size: 64 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 207 + steps_per_day: 288 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + activation: gelu + seq_len: 24 + pred_len: 24 + d_model: 128 + d_ff: 2048 + dropout: 0.1 + e_layers: 2 + n_heads: 8 + output_attention: False + + +train: + batch_size: 64 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.0001 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/iTransformer/NYCBike-InFlow.yaml b/config/iTransformer/NYCBike-InFlow.yaml new file mode 100644 index 0000000..fdb4dce --- /dev/null +++ b/config/iTransformer/NYCBike-InFlow.yaml @@ -0,0 +1,52 @@ +basic: + dataset: NYCBike-InFlow + device: cuda:0 + mode: train + model: iTransformer + seed: 2023 + +data: + batch_size: 64 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 128 + steps_per_day: 48 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + activation: gelu + seq_len: 24 + pred_len: 24 + d_model: 128 + d_ff: 2048 + dropout: 0.1 + e_layers: 2 + n_heads: 8 + output_attention: False + + +train: + batch_size: 64 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.0001 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/iTransformer/NYCBike-OutFlow.yaml b/config/iTransformer/NYCBike-OutFlow.yaml new file mode 100644 index 0000000..7401648 --- /dev/null +++ b/config/iTransformer/NYCBike-OutFlow.yaml @@ -0,0 +1,52 @@ +basic: + dataset: NYCBike-OutFlow + device: cuda:0 + mode: train + model: iTransformer + seed: 2023 + +data: + batch_size: 64 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 128 + steps_per_day: 48 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + activation: gelu + seq_len: 24 + pred_len: 24 + d_model: 128 + d_ff: 2048 + dropout: 0.1 + e_layers: 2 + n_heads: 8 + output_attention: False + + +train: + batch_size: 64 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.0001 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/iTransformer/PEMS-BAY.yaml b/config/iTransformer/PEMS-BAY.yaml new file mode 100644 index 0000000..80d354a --- /dev/null +++ b/config/iTransformer/PEMS-BAY.yaml @@ -0,0 +1,52 @@ +basic: + dataset: PEMS-BAY + device: cuda:0 + mode: train + model: iTransformer + seed: 2023 + +data: + batch_size: 64 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 325 + steps_per_day: 288 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + activation: gelu + seq_len: 24 + pred_len: 24 + d_model: 128 + d_ff: 2048 + dropout: 0.1 + e_layers: 2 + n_heads: 8 + output_attention: False + + +train: + batch_size: 64 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.0001 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/iTransformer/SolarEnergy.yaml b/config/iTransformer/SolarEnergy.yaml new file mode 100644 index 0000000..154be4a --- /dev/null +++ b/config/iTransformer/SolarEnergy.yaml @@ -0,0 +1,52 @@ +basic: + dataset: SolarEnergy + device: cuda:0 + mode: train + model: iTransformer + seed: 2023 + +data: + batch_size: 64 + column_wise: false + days_per_week: 7 + horizon: 24 + input_dim: 1 + lag: 24 + normalizer: std + num_nodes: 137 + steps_per_day: 24 + test_ratio: 0.2 + val_ratio: 0.2 + +model: + activation: gelu + seq_len: 24 + pred_len: 24 + d_model: 128 + d_ff: 2048 + dropout: 0.1 + e_layers: 2 + n_heads: 8 + output_attention: False + + +train: + batch_size: 64 + debug: false + early_stop: true + early_stop_patience: 15 + epochs: 100 + grad_norm: false + log_step: 1000 + loss_func: mae + lr_decay: true + lr_decay_rate: 0.3 + lr_decay_step: 5,20,40,70 + lr_init: 0.0001 + mae_thresh: None + mape_thresh: 0.001 + max_grad_norm: 5 + output_dim: 1 + plot: false + real_value: true + weight_decay: 0 \ No newline at end of file diff --git a/config/tmp.py b/config/tmp.py deleted file mode 100644 index 17cbe0b..0000000 --- a/config/tmp.py +++ /dev/null @@ -1,234 +0,0 @@ -#!/usr/bin/env python3 -import os -from collections import defaultdict -from ruamel.yaml import YAML -from ruamel.yaml.comments import CommentedMap - -yaml = YAML() -yaml.preserve_quotes = True -yaml.indent(mapping=2, sequence=4, offset=2) - -# 允许的 data keys -DATA_ALLOWED_KEYS = { - "lag", - "horizon", - "num_nodes", - "steps_per_day", - "days_per_week", - "test_ratio", - "val_ratio", - "batch_size", - "input_dim", - "column_wise", - "normalizer", -} - -# 全局默认值 -GLOBAL_DEFAULTS = { - "lag": 24, - "horizon": 24, - "num_nodes": 1, - "steps_per_day": 24, - "days_per_week": 7, - "test_ratio": 0.2, - "val_ratio": 0.2, - "batch_size": 16, - "input_dim": 1, - "column_wise": False, - "normalizer": "std", -} - -# train全局默认值 -GLOBAL_TRAIN_DEFAULTS = { - "output_dim": 1 -} - - -def load_yaml(path): - try: - with open(path, "r", encoding="utf-8") as f: - return yaml.load(f) - except Exception: - return None - - -def collect_dataset_defaults(base="."): - """ - 收集每个数据集 data 的 key 默认值,以及 train.output_dim 默认值 - """ - data_defaults = defaultdict(dict) - train_output_defaults = dict() - - for root, _, files in os.walk(base): - for name in files: - if not (name.endswith(".yaml") or name.endswith(".yml")): - continue - path = os.path.join(root, name) - cm = load_yaml(path) - if not isinstance(cm, CommentedMap): - continue - basic = cm.get("basic") - if not isinstance(basic, dict): - continue - dataset = basic.get("dataset") - if dataset is None: - continue - ds = str(dataset) - - # data 默认值 - data_sec = cm.get("data") - if isinstance(data_sec, dict): - for key in DATA_ALLOWED_KEYS: - if key not in data_defaults[ds] and key in data_sec and data_sec[key] is not None: - data_defaults[ds][key] = data_sec[key] - - # train.output_dim 默认值 - train_sec = cm.get("train") - if isinstance(train_sec, dict): - val = train_sec.get("output_dim") - if val is not None and ds not in train_output_defaults: - train_output_defaults[ds] = val - - return data_defaults, train_output_defaults - - -def ensure_basic_seed(cm: CommentedMap, path: str): - if "basic" not in cm or not isinstance(cm["basic"], dict): - cm["basic"] = CommentedMap() - basic = cm["basic"] - if "seed" not in basic: - basic["seed"] = 2023 - print(f"[ADD] {path}: basic.seed = 2023") - - -def fill_data_defaults(cm: CommentedMap, data_defaults: dict, path: str): - if "data" not in cm or not isinstance(cm["data"], dict): - cm["data"] = CommentedMap() - data_sec = cm["data"] - - basic = cm.get("basic", {}) - dataset = str(basic.get("dataset")) if basic and "dataset" in basic else None - - for key in sorted(DATA_ALLOWED_KEYS): - if key in data_sec and data_sec[key] is not None: - continue - if dataset and dataset in data_defaults and key in data_defaults[dataset]: - chosen = data_defaults[dataset][key] - src = f"default_from_dataset[{dataset}]" - else: - chosen = GLOBAL_DEFAULTS[key] - src = "GLOBAL_DEFAULTS" - data_sec[key] = chosen - print(f"[FILL] {path}: data.{key} <- {src} ({repr(chosen)})") - - -def merge_test_log_into_train(cm: CommentedMap, path: str): - """ - 将 test 和 log 的 key 合并到 train,并删除 test 和 log - 同时确保 train.debug 存在 - """ - train_sec = cm.setdefault("train", CommentedMap()) - - for section in ["test", "log"]: - if section in cm and isinstance(cm[section], dict): - for k, v in cm[section].items(): - if k not in train_sec: - train_sec[k] = v - print(f"[MERGE] {path}: train.{k} <- {section}.{k} ({repr(v)})") - del cm[section] - print(f"[DEL] {path}: deleted section '{section}'") - - # train.debug - if "debug" not in train_sec: - train_sec["debug"] = False - print(f"[ADD] {path}: train.debug = False") - - -def fill_train_output_dim(cm: CommentedMap, train_output_defaults: dict, path: str): - train_sec = cm.setdefault("train", CommentedMap()) - if "output_dim" not in train_sec or train_sec["output_dim"] is None: - basic = cm.get("basic", {}) - dataset = str(basic.get("dataset")) if basic and "dataset" in basic else None - if dataset and dataset in train_output_defaults: - val = train_output_defaults[dataset] - src = f"default_from_dataset[{dataset}]" - else: - val = GLOBAL_TRAIN_DEFAULTS["output_dim"] - src = "GLOBAL_TRAIN_DEFAULTS" - train_sec["output_dim"] = val - print(f"[FILL] {path}: train.output_dim <- {src} ({val})") - - -def sync_train_batch_size(cm: CommentedMap, path: str): - """ - 如果 train.batch_size 与 data.batch_size 不一致,以 data 为准 - """ - data_sec = cm.get("data", {}) - train_sec = cm.get("train", {}) - data_bs = data_sec.get("batch_size") - train_bs = train_sec.get("batch_size") - - if data_bs is not None and train_bs != data_bs: - train_sec["batch_size"] = data_bs - print(f"[SYNC] {path}: train.batch_size corrected to match data.batch_size ({data_bs})") - - -def sort_subkeys_and_insert_blanklines(cm: CommentedMap): - for sec in list(cm.keys()): - if isinstance(cm[sec], dict): - sorted_cm = CommentedMap() - for k in sorted(cm[sec].keys()): - sorted_cm[k] = cm[sec][k] - cm[sec] = sorted_cm - - keys = list(cm.keys()) - for i, k in enumerate(keys): - if i == 0: - try: - cm.yaml_set_comment_before_after_key(k, before=None) - except Exception: - pass - else: - try: - cm.yaml_set_comment_before_after_key(k, before="\n") - except Exception: - pass - - -def process_all(base="."): - print(">> Collecting dataset defaults ...") - data_defaults, train_output_defaults = collect_dataset_defaults(base) - print(">> Collected data defaults per dataset:") - for ds, kv in data_defaults.items(): - print(f" - {ds}: {kv}") - print(">> Collected train.output_dim defaults per dataset:") - for ds, val in train_output_defaults.items(): - print(f" - {ds}: output_dim = {val}") - - for root, _, files in os.walk(base): - for name in files: - if not (name.endswith(".yaml") or name.endswith(".yml")): - continue - path = os.path.join(root, name) - cm = load_yaml(path) - if not isinstance(cm, CommentedMap): - print(f"[SKIP] {path}: top-level not mapping or load failed") - continue - - ensure_basic_seed(cm, path) - fill_data_defaults(cm, data_defaults, path) - merge_test_log_into_train(cm, path) - fill_train_output_dim(cm, train_output_defaults, path) - sync_train_batch_size(cm, path) # <-- 新增逻辑 - sort_subkeys_and_insert_blanklines(cm) - - try: - with open(path, "w", encoding="utf-8") as f: - yaml.dump(cm, f) - print(f"[OK] Written: {path}") - except Exception as e: - print(f"[ERROR] Write failed {path}: {e}") - - -if __name__ == "__main__": - process_all(".") diff --git a/dataloader/EXPdataloader.py b/dataloader/EXPdataloader.py index 237bf71..18ebf61 100755 --- a/dataloader/EXPdataloader.py +++ b/dataloader/EXPdataloader.py @@ -1,199 +1,90 @@ import numpy as np import torch -from utils.normalization import normalize_dataset from dataloader.data_selector import load_st_dataset +from utils.normalization import normalize_dataset -def get_dataloader(args, normalizer="std", single=True): - # args should now include 'cycle' - data = load_st_dataset(args["type"], args["sample"]) # [T, N, F] - L, N, F = data.shape - # compute cycle index - cycle_arr = np.arange(L) % args["cycle"] # length-L array +_device = "cuda" if torch.cuda.is_available() else "cpu" +to_tensor = lambda a: torch.as_tensor(a, dtype=torch.float32, device=_device) - # Step 1: sliding windows for X and Y - x = add_window_x(data, args["lag"], args["horizon"], single) - y = add_window_y(data, args["lag"], args["horizon"], single) - # window count = M = L - lag - horizon + 1 - M = x.shape[0] +# Sliding window (stride trick, zero copy) +window = lambda d, w, h, o=0: np.lib.stride_tricks.as_strided( + d[o:], + shape=(len(d) - w - h + 1, w, *d.shape[1:]), + strides=(d.strides[0], d.strides[0], *d.strides[1:]) +) - # Step 2: time features - time_in_day = np.tile( - np.array([i % args["steps_per_day"] / args["steps_per_day"] for i in range(L)]), - (N, 1), - ).T.reshape(L, N, 1) - day_in_week = np.tile( - np.array( - [(i // args["steps_per_day"]) % args["days_per_week"] for i in range(L)] - ), - (N, 1), - ).T.reshape(L, N, 1) +# pad_with_last_sample=True +pad_last = lambda X, Y, bs: ( + (lambda r: ( + (np.concatenate([X, np.repeat(X[-1:], r, 0)], 0), + np.concatenate([Y, np.repeat(Y[-1:], r, 0)], 0)) + if r else (X, Y) + ))((-len(X)) % bs) +) - x_day = add_window_x(time_in_day, args["lag"], args["horizon"], single) - x_week = add_window_x(day_in_week, args["lag"], args["horizon"], single) - x = np.concatenate([x, x_day, x_week], axis=-1) - # del x_day, x_week - # gc.collect() +# Train / Val / Test split +split_by_ratio = lambda d, vr, tr: ( + d[:-(vl := int(len(d) * (vr + tr)))], + d[-vl:-(tl := int(len(d) * tr))], + d[-tl:] +) - # Step 3: extract cycle index per window: take value at end of sequence - cycle_win = np.array([cycle_arr[i + args["lag"]] for i in range(M)]) # shape [M] - # Step 4: split into train/val/test - if args["test_ratio"] > 1: - x_train, x_val, x_test = split_data_by_days( - x, args["val_ratio"], args["test_ratio"] - ) - y_train, y_val, y_test = split_data_by_days( - y, args["val_ratio"], args["test_ratio"] - ) - c_train, c_val, c_test = split_data_by_days( - cycle_win, args["val_ratio"], args["test_ratio"] - ) - else: - x_train, x_val, x_test = split_data_by_ratio( - x, args["val_ratio"], args["test_ratio"] - ) - y_train, y_val, y_test = split_data_by_ratio( - y, args["val_ratio"], args["test_ratio"] - ) - c_train, c_val, c_test = split_data_by_ratio( - cycle_win, args["val_ratio"], args["test_ratio"] - ) - # del x, y, cycle_win - # gc.collect() +def get_dataloader(config, normalizer="std", single_step=True): + data = load_st_dataset(config) + cfg = config["data"] - # Step 5: normalization on X only - scaler = normalize_dataset( - x_train[..., : args["input_dim"]], normalizer, args["column_wise"] - ) - x_train[..., : args["input_dim"]] = scaler.transform( - x_train[..., : args["input_dim"]] - ) - x_val[..., : args["input_dim"]] = scaler.transform(x_val[..., : args["input_dim"]]) - x_test[..., : args["input_dim"]] = scaler.transform( - x_test[..., : args["input_dim"]] + T, N, _ = data.shape + lag, horizon, batch_size, input_dim = ( + cfg["lag"], cfg["horizon"], cfg["batch_size"], cfg["input_dim"] ) - # add time features to Y - y_day = add_window_y(time_in_day, args["lag"], args["horizon"], single) - y_week = add_window_y(day_in_week, args["lag"], args["horizon"], single) - y = np.concatenate([y, y_day, y_week], axis=-1) - # del y_day, y_week, time_in_day, day_in_week - # gc.collect() - - # split Y time-augmented - if args["test_ratio"] > 1: - y_train, y_val, y_test = split_data_by_days( - y, args["val_ratio"], args["test_ratio"] - ) - else: - y_train, y_val, y_test = split_data_by_ratio( - y, args["val_ratio"], args["test_ratio"] - ) - # del y - - # Step 6: create dataloaders including cycle index - train_loader = data_loader_with_cycle( - x_train, y_train, c_train, args["batch_size"], shuffle=True, drop_last=True - ) - val_loader = data_loader_with_cycle( - x_val, y_val, c_val, args["batch_size"], shuffle=False, drop_last=True - ) - test_loader = data_loader_with_cycle( - x_test, y_test, c_test, args["batch_size"], shuffle=False, drop_last=False + # X / Y construction + X = window(data, lag, horizon) + Y = window( + data, + 1 if single_step else horizon, + horizon, + lag if not single_step else lag + horizon - 1 ) - return train_loader, val_loader, test_loader, scaler + # Time features + t = np.arange(T) + time_in_day = np.tile((t % cfg["steps_per_day"]) / cfg["steps_per_day"], (N, 1)).T + day_in_week = np.tile((t // cfg["steps_per_day"]) % cfg["days_per_week"], (N, 1)).T + tf = lambda z: window(z[..., None], lag, horizon) + X = np.concatenate([X, tf(time_in_day), tf(day_in_week)], -1) + Y = np.concatenate([Y, tf(time_in_day), tf(day_in_week)], -1) -def data_loader_with_cycle(X, Y, C, batch_size, shuffle=True, drop_last=True): - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - X_t = torch.tensor(X, dtype=torch.float32, device=device) - Y_t = torch.tensor(Y, dtype=torch.float32, device=device) - C_t = torch.tensor(C, dtype=torch.long, device=device).unsqueeze(-1) # [B,1] - dataset = torch.utils.data.TensorDataset(X_t, Y_t, C_t) - loader = torch.utils.data.DataLoader( - dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last - ) - return loader + # Split + X_train, X_val, X_test = split_by_ratio(X, cfg["val_ratio"], cfg["test_ratio"]) + Y_train, Y_val, Y_test = split_by_ratio(Y, cfg["val_ratio"], cfg["test_ratio"]) - -def split_data_by_days(data, val_days, test_days, interval=30): - t = int((24 * 60) / interval) - test_data = data[-t * int(test_days) :] - val_data = data[-t * int(test_days + val_days) : -t * int(test_days)] - train_data = data[: -t * int(test_days + val_days)] - return train_data, val_data, test_data - - -def split_data_by_ratio(data, val_ratio, test_ratio): - data_len = data.shape[0] - test_data = data[-int(data_len * test_ratio) :] - val_data = data[ - -int(data_len * (test_ratio + val_ratio)) : -int(data_len * test_ratio) + # Channel-wise normalization (fit on train only) + scalers = [ + normalize_dataset(X_train[..., i:i+1], normalizer, cfg["column_wise"]) + for i in range(input_dim) ] - train_data = data[: -int(data_len * (test_ratio + val_ratio))] - return train_data, val_data, test_data + for i, sc in enumerate(scalers): + for d in (X_train, X_val, X_test, Y_train, Y_val, Y_test): + d[..., i:i+1] = sc.transform(d[..., i:i+1]) + # Padding + X_train, Y_train = pad_last(X_train, Y_train, batch_size) + X_val, Y_val = pad_last(X_val, Y_val, batch_size) + X_test, Y_test = pad_last(X_test, Y_test, batch_size) -def data_loader(X, Y, batch_size, shuffle=True, drop_last=True): - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - X = torch.tensor(X, dtype=torch.float32, device=device) - Y = torch.tensor(Y, dtype=torch.float32, device=device) - data = torch.utils.data.TensorDataset(X, Y) - dataloader = torch.utils.data.DataLoader( - data, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last + # DataLoader + make_loader = lambda X, Y, shuffle: torch.utils.data.DataLoader( + torch.utils.data.TensorDataset(to_tensor(X), to_tensor(Y)), + batch_size=batch_size, shuffle=shuffle, drop_last=False ) - return dataloader - -def add_window_x(data, window=3, horizon=1, single=False): - """ - Generate windowed X values from the input data. - - :param data: Input data, shape [B, ...] - :param window: Size of the sliding window - :param horizon: Horizon size - :param single: If True, generate single-step windows, else multi-step - :return: X with shape [B, W, ...] - """ - length = len(data) - end_index = length - horizon - window + 1 - x = [] # Sliding windows - index = 0 - - while index < end_index: - x.append(data[index : index + window]) - index += 1 - - return np.array(x) - - -def add_window_y(data, window=3, horizon=1, single=False): - """ - Generate windowed Y values from the input data. - - :param data: Input data, shape [B, ...] - :param window: Size of the sliding window - :param horizon: Horizon size - :param single: If True, generate single-step windows, else multi-step - :return: Y with shape [B, H, ...] - """ - length = len(data) - end_index = length - horizon - window + 1 - y = [] # Horizon values - index = 0 - - while index < end_index: - if single: - y.append(data[index + window + horizon - 1 : index + window + horizon]) - else: - y.append(data[index + window : index + window + horizon]) - index += 1 - - return np.array(y) - - -if __name__ == "__main__": - res = load_st_dataset("SD", 1) - k = 1 + return ( + make_loader(X_train, Y_train, True), + make_loader(X_val, Y_val, False), + make_loader(X_test, Y_test, False), + scalers + ) diff --git a/dataloader/Informer_loader.py b/dataloader/Informer_loader.py new file mode 100644 index 0000000..1192c03 --- /dev/null +++ b/dataloader/Informer_loader.py @@ -0,0 +1,179 @@ +import numpy as np +import torch +from dataloader.data_selector import load_st_dataset +from utils.normalization import normalize_dataset + + +# ============================================================== +# MAIN ENTRY +# ============================================================== + +def get_dataloader(args, normalizer="std", single=True): + """ + Return dataloaders with x, y, x_mark, y_mark. + This version follows Informer/ETSformer official dataloader behavior. + """ + data = load_st_dataset(args) + args = args["data"] + + x, y, x_mark, y_mark = _prepare_data_with_windows(data, args) + + # --- split --- + split_fn = split_data_by_days if args["test_ratio"] > 1 else split_data_by_ratio + x_train, x_val, x_test = split_fn(x, args["val_ratio"], args["test_ratio"]) + y_train, y_val, y_test = split_fn(y, args["val_ratio"], args["test_ratio"]) + x_mark_train, x_mark_val, x_mark_test = split_fn(x_mark, args["val_ratio"], args["test_ratio"]) + y_mark_train, y_mark_val, y_mark_test = split_fn(y_mark, args["val_ratio"], args["test_ratio"]) + + # --- normalization --- + scaler = _normalize_data(x_train, x_val, x_test, args, normalizer) + _apply_existing_scaler(y_train, y_val, y_test, scaler, args) + + # reshape [b, t, n, c] -> [b*n, t, c] + (x_train, x_val, x_test, + y_train, y_val, y_test, + x_mark_train, x_mark_val, x_mark_test, + y_mark_train, y_mark_val, y_mark_test) = _reshape_tensor( + x_train, x_val, x_test, + y_train, y_val, y_test, + x_mark_train, x_mark_val, x_mark_test, + y_mark_train, y_mark_val, y_mark_test + ) + + # --- dataloaders --- + return ( + _create_dataloader(x_train, y_train, x_mark_train, y_mark_train, + args["batch_size"], True, False), + _create_dataloader(x_val, y_val, x_mark_val, y_mark_val, + args["batch_size"], False, False), + _create_dataloader(x_test, y_test, x_mark_test, y_mark_test, + args["batch_size"], False, False), + scaler + ) + + +# ============================================================== +# Informer-style WINDOW GENERATION +# ============================================================== + +def _prepare_data_with_windows(data, args): + """ + Generate x, y, x_mark, y_mark using Informer slicing rule. + + x: [seq_len] + y: [label_len + pred_len] + """ + seq_len = args["lag"] + label_len = args["label_len"] + pred_len = args["horizon"] + + L, N, C = data.shape + + # ---------- construct timestamp features ---------- + time_in_day, day_in_week = _generate_time_features(L, args) + data_mark = np.concatenate([time_in_day, day_in_week], axis=-1) + + xs, ys, x_marks, y_marks = [], [], [], [] + + for s_begin in range(L - seq_len - pred_len - 1): + s_end = s_begin + seq_len + r_begin = s_end - label_len + r_end = r_begin + label_len + pred_len + + xs.append(data[s_begin:s_end]) + ys.append(data[r_begin:r_end]) + + x_marks.append(data_mark[s_begin:s_end]) + y_marks.append(data_mark[r_begin:r_end]) + + return np.array(xs), np.array(ys), np.array(x_marks), np.array(y_marks) + + +# ============================================================== +# TIME FEATURE +# ============================================================== + +def _generate_time_features(L, args): + N = args["num_nodes"] + + # Time in day + tid = np.array([i % args["steps_per_day"] / args["steps_per_day"] for i in range(L)]) + tid = np.tile(tid[:, None], (1, N)) + + # Day in week + diw = np.array([(i // args["steps_per_day"]) % args["days_per_week"] for i in range(L)]) + diw = np.tile(diw[:, None], (1, N)) + + return tid[..., None], diw[..., None] + + +# ============================================================== +# NORMALIZATION +# ============================================================== + +def _normalize_data(train_data, val_data, test_data, args, normalizer): + scaler = normalize_dataset( + train_data[..., :args["input_dim"]], + normalizer, args["column_wise"] + ) + for data in [train_data, val_data, test_data]: + data[..., :args["input_dim"]] = scaler.transform( + data[..., :args["input_dim"]] + ) + return scaler + + +def _apply_existing_scaler(train_data, val_data, test_data, scaler, args): + for data in [train_data, val_data, test_data]: + data[..., :args["input_dim"]] = scaler.transform( + data[..., :args["input_dim"]] + ) + + +# ============================================================== +# DATALOADER +# ============================================================== + +def _create_dataloader(x, y, x_mark, y_mark, batch_size, shuffle, drop_last): + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + dataset = torch.utils.data.TensorDataset( + torch.tensor(x, dtype=torch.float32, device=device), + torch.tensor(y, dtype=torch.float32, device=device), + torch.tensor(x_mark, dtype=torch.float32, device=device), + torch.tensor(y_mark, dtype=torch.float32, device=device), + ) + return torch.utils.data.DataLoader(dataset, batch_size=batch_size, + shuffle=shuffle, drop_last=drop_last) + + +# ============================================================== +# SPLIT +# ============================================================== + +def split_data_by_days(data, val_days, test_days, interval=30): + t = int((24 * 60) / interval) + test_data = data[-t * int(test_days):] + val_data = data[-t * int(test_days + val_days):-t * int(test_days)] + train_data = data[:-t * int(test_days + val_days)] + return train_data, val_data, test_data + + +def split_data_by_ratio(data, val_ratio, test_ratio): + L = len(data) + test_data = data[-int(L * test_ratio):] + val_data = data[-int(L * (test_ratio + val_ratio)):-int(L * test_ratio)] + train_data = data[: -int(L * (test_ratio + val_ratio))] + return train_data, val_data, test_data + + +# ============================================================== +# RESHAPE [B,T,N,C] -> [B*N,T,C] +# ============================================================== + +def _reshape_tensor(*tensors): + reshaped = [] + for x in tensors: + b, t, n, c = x.shape + x_new = x.transpose(0, 2, 1, 3).reshape(b * n, t, c) + reshaped.append(x_new) + return reshaped diff --git a/dataloader/PeMSDdataloader.py b/dataloader/PeMSDdataloader.py index ea6e89b..18ebf61 100755 --- a/dataloader/PeMSDdataloader.py +++ b/dataloader/PeMSDdataloader.py @@ -1,158 +1,90 @@ -from utils.normalization import normalize_dataset -from dataloader.data_selector import load_st_dataset - import numpy as np import torch +from dataloader.data_selector import load_st_dataset +from utils.normalization import normalize_dataset -def get_dataloader(args, normalizer="std", single=True): - data = load_st_dataset(args) - args = args["data"] - L, N, F = data.shape +_device = "cuda" if torch.cuda.is_available() else "cpu" +to_tensor = lambda a: torch.as_tensor(a, dtype=torch.float32, device=_device) - # Generate sliding windows for main data and add time features - x, y = _prepare_data_with_windows(data, args, single) - - # Split data - split_fn = split_data_by_days if args["test_ratio"] > 1 else split_data_by_ratio - x_train, x_val, x_test = split_fn(x, args["val_ratio"], args["test_ratio"]) - y_train, y_val, y_test = split_fn(y, args["val_ratio"], args["test_ratio"]) +# Sliding window (stride trick, zero copy) +window = lambda d, w, h, o=0: np.lib.stride_tricks.as_strided( + d[o:], + shape=(len(d) - w - h + 1, w, *d.shape[1:]), + strides=(d.strides[0], d.strides[0], *d.strides[1:]) +) - # Normalize x and y using the same scaler - scaler = _normalize_data(x_train, x_val, x_test, args, normalizer) - _apply_existing_scaler(y_train, y_val, y_test, scaler, args) +# pad_with_last_sample=True +pad_last = lambda X, Y, bs: ( + (lambda r: ( + (np.concatenate([X, np.repeat(X[-1:], r, 0)], 0), + np.concatenate([Y, np.repeat(Y[-1:], r, 0)], 0)) + if r else (X, Y) + ))((-len(X)) % bs) +) - # Create dataloaders - return ( - _create_dataloader(x_train, y_train, args["batch_size"], True, False), - _create_dataloader(x_val, y_val, args["batch_size"], False, False), - _create_dataloader(x_test, y_test, args["batch_size"], False, False), - scaler +# Train / Val / Test split +split_by_ratio = lambda d, vr, tr: ( + d[:-(vl := int(len(d) * (vr + tr)))], + d[-vl:-(tl := int(len(d) * tr))], + d[-tl:] +) + + +def get_dataloader(config, normalizer="std", single_step=True): + data = load_st_dataset(config) + cfg = config["data"] + + T, N, _ = data.shape + lag, horizon, batch_size, input_dim = ( + cfg["lag"], cfg["horizon"], cfg["batch_size"], cfg["input_dim"] ) + # X / Y construction + X = window(data, lag, horizon) + Y = window( + data, + 1 if single_step else horizon, + horizon, + lag if not single_step else lag + horizon - 1 + ) -def _prepare_data_with_windows(data, args, single): - # Generate sliding windows for main data - x = add_window_x(data, args["lag"], args["horizon"], single) - y = add_window_y(data, args["lag"], args["horizon"], single) + # Time features + t = np.arange(T) + time_in_day = np.tile((t % cfg["steps_per_day"]) / cfg["steps_per_day"], (N, 1)).T + day_in_week = np.tile((t // cfg["steps_per_day"]) % cfg["days_per_week"], (N, 1)).T + tf = lambda z: window(z[..., None], lag, horizon) - # Generate time features - time_features = _generate_time_features(data.shape[0], args) - - # Add time features to x and y - x = _add_time_features(x, time_features, args["lag"], args["horizon"], single, add_window_x) - y = _add_time_features(y, time_features, args["lag"], args["horizon"], single, add_window_y) - - return x, y + X = np.concatenate([X, tf(time_in_day), tf(day_in_week)], -1) + Y = np.concatenate([Y, tf(time_in_day), tf(day_in_week)], -1) + # Split + X_train, X_val, X_test = split_by_ratio(X, cfg["val_ratio"], cfg["test_ratio"]) + Y_train, Y_val, Y_test = split_by_ratio(Y, cfg["val_ratio"], cfg["test_ratio"]) -def _generate_time_features(L, args): - N = args["num_nodes"] - time_in_day = [i % args["steps_per_day"] / args["steps_per_day"] for i in range(L)] - time_in_day = np.tile(np.array(time_in_day), [1, N, 1]).transpose((2, 1, 0)) - - day_in_week = [(i // args["steps_per_day"]) % args["days_per_week"] for i in range(L)] - day_in_week = np.tile(np.array(day_in_week), [1, N, 1]).transpose((2, 1, 0)) - - return time_in_day, day_in_week - - -def _add_time_features(data, time_features, lag, horizon, single, window_fn): - time_in_day, day_in_week = time_features - time_day = window_fn(time_in_day, lag, horizon, single) - time_week = window_fn(day_in_week, lag, horizon, single) - return np.concatenate([data, time_day, time_week], axis=-1) - - -def _normalize_data(train_data, val_data, test_data, args, normalizer): - scaler = normalize_dataset(train_data[..., : args["input_dim"]], normalizer, args["column_wise"]) - - for data in [train_data, val_data, test_data]: - data[..., : args["input_dim"]] = scaler.transform(data[..., : args["input_dim"]]) - - return scaler - - -def _apply_existing_scaler(train_data, val_data, test_data, scaler, args): - for data in [train_data, val_data, test_data]: - data[..., : args["input_dim"]] = scaler.transform(data[..., : args["input_dim"]]) - - -def _create_dataloader(X_data, Y_data, batch_size, shuffle, drop_last): - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - X_tensor = torch.tensor(X_data, dtype=torch.float32, device=device) - Y_tensor = torch.tensor(Y_data, dtype=torch.float32, device=device) - dataset = torch.utils.data.TensorDataset(X_tensor, Y_tensor) - return torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last) - - -def split_data_by_days(data, val_days, test_days, interval=30): - t = int((24 * 60) / interval) - test_data = data[-t * int(test_days) :] - val_data = data[-t * int(test_days + val_days) : -t * int(test_days)] - train_data = data[: -t * int(test_days + val_days)] - return train_data, val_data, test_data - - -def split_data_by_ratio(data, val_ratio, test_ratio): - data_len = data.shape[0] - test_data = data[-int(data_len * test_ratio) :] - val_data = data[ - -int(data_len * (test_ratio + val_ratio)) : -int(data_len * test_ratio) + # Channel-wise normalization (fit on train only) + scalers = [ + normalize_dataset(X_train[..., i:i+1], normalizer, cfg["column_wise"]) + for i in range(input_dim) ] - train_data = data[: -int(data_len * (test_ratio + val_ratio))] - return train_data, val_data, test_data + for i, sc in enumerate(scalers): + for d in (X_train, X_val, X_test, Y_train, Y_val, Y_test): + d[..., i:i+1] = sc.transform(d[..., i:i+1]) + # Padding + X_train, Y_train = pad_last(X_train, Y_train, batch_size) + X_val, Y_val = pad_last(X_val, Y_val, batch_size) + X_test, Y_test = pad_last(X_test, Y_test, batch_size) + # DataLoader + make_loader = lambda X, Y, shuffle: torch.utils.data.DataLoader( + torch.utils.data.TensorDataset(to_tensor(X), to_tensor(Y)), + batch_size=batch_size, shuffle=shuffle, drop_last=False + ) - -def _generate_windows(data, window=3, horizon=1, offset=0): - """ - Internal helper function to generate sliding windows. - - :param data: Input data - :param window: Window size - :param horizon: Horizon size - :param offset: Offset from window start - :return: Windowed data - """ - length = len(data) - end_index = length - horizon - window + 1 - windows = [] - index = 0 - - while index < end_index: - windows.append(data[index + offset : index + offset + window]) - index += 1 - - return np.array(windows) - -def add_window_x(data, window=3, horizon=1, single=False): - """ - Generate windowed X values from the input data. - - :param data: Input data, shape [B, ...] - :param window: Size of the sliding window - :param horizon: Horizon size - :param single: If True, generate single-step windows, else multi-step - :return: X with shape [B, W, ...] - """ - return _generate_windows(data, window, horizon, offset=0) - -def add_window_y(data, window=3, horizon=1, single=False): - """ - Generate windowed Y values from the input data. - - :param data: Input data, shape [B, ...] - :param window: Size of the sliding window - :param horizon: Horizon size - :param single: If True, generate single-step windows, else multi-step - :return: Y with shape [B, H, ...] - """ - offset = window if not single else window + horizon - 1 - return _generate_windows(data, window=1 if single else horizon, horizon=horizon, offset=offset) - -if __name__ == "__main__": - from dataloader.data_selector import load_st_dataset - res = load_st_dataset({"dataset": "SD"}) - print(f"Dataset shape: {res.shape}") + return ( + make_loader(X_train, Y_train, True), + make_loader(X_val, Y_val, False), + make_loader(X_test, Y_test, False), + scalers + ) diff --git a/dataloader/TSloader.py b/dataloader/TSloader.py new file mode 100755 index 0000000..66ef45c --- /dev/null +++ b/dataloader/TSloader.py @@ -0,0 +1,192 @@ +from dataloader.data_selector import load_st_dataset +from utils.normalization import normalize_dataset + +import numpy as np +import torch + + +def get_dataloader(args, normalizer="std", single=True): + data = load_st_dataset(args) + # data = data[..., 0:1] + + args = args["data"] + L, N, F = data.shape + # data = data.reshape(L, N*F) # [L, N*F] + + # Generate sliding windows for main data and add time features + x, y = _prepare_data_with_windows(data, args, single) + + # Split data [b,t,n,c] + split_fn = split_data_by_days if args["test_ratio"] > 1 else split_data_by_ratio + x_train, x_val, x_test = split_fn(x, args["val_ratio"], args["test_ratio"]) + y_train, y_val, y_test = split_fn(y, args["val_ratio"], args["test_ratio"]) + + # Normalize x and y using the same scaler + scaler = _normalize_data(x_train, x_val, x_test, args, normalizer) + _apply_existing_scaler(y_train, y_val, y_test, scaler, args) + + # reshape [b,t,n,c] -> [b*n, t, c] + x_train, x_val, x_test, y_train, y_val, y_test = \ + _reshape_tensor(x_train, x_val, x_test, y_train, y_val, y_test) + + # Create dataloaders + return ( + _create_dataloader(x_train, y_train, args["batch_size"], True, False), + _create_dataloader(x_val, y_val, args["batch_size"], False, False), + _create_dataloader(x_test, y_test, args["batch_size"], False, False), + scaler + ) + +def _reshape_tensor(*tensors): + """Reshape tensors from [b, t, n, c] -> [b*n, t, c].""" + reshaped = [] + for x in tensors: + # x 是 ndarray:shape (b, t, n, c) + b, t, n, c = x.shape + x_new = x.transpose(0, 2, 1, 3).reshape(b * n, t, c) + reshaped.append(x_new) + return reshaped + +def _prepare_data_with_windows(data, args, single): + # Generate sliding windows for main data + x = add_window_x(data, args["lag"], args["horizon"], single) + y = add_window_y(data, args["lag"], args["horizon"], single) + return x, y + +def _normalize_data(train_data, val_data, test_data, args, normalizer): + scaler = normalize_dataset(train_data[..., : args["num_nodes"]], normalizer, args["column_wise"]) + + for data in [train_data, val_data, test_data]: + data[..., : args["num_nodes"]] = scaler.transform(data[..., : args["num_nodes"]]) + + return scaler + + +def _apply_existing_scaler(train_data, val_data, test_data, scaler, args): + for data in [train_data, val_data, test_data]: + data[..., : args["num_nodes"]] = scaler.transform(data[..., : args["num_nodes"]]) + + +def _create_dataloader(X_data, Y_data, batch_size, shuffle, drop_last): + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + X_tensor = torch.tensor(X_data, dtype=torch.float32, device=device) + Y_tensor = torch.tensor(Y_data, dtype=torch.float32, device=device) + dataset = torch.utils.data.TensorDataset(X_tensor, Y_tensor) + return torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last) + + +def split_data_by_days(data, val_days, test_days, interval=30): + t = int((24 * 60) / interval) + test_data = data[-t * int(test_days) :] + val_data = data[-t * int(test_days + val_days) : -t * int(test_days)] + train_data = data[: -t * int(test_days + val_days)] + return train_data, val_data, test_data + + +def split_data_by_ratio(data, val_ratio, test_ratio): + data_len = data.shape[0] + test_data = data[-int(data_len * test_ratio) :] + val_data = data[-int(data_len * (test_ratio + val_ratio)) : -int(data_len * test_ratio)] + train_data = data[: -int(data_len * (test_ratio + val_ratio))] + return train_data, val_data, test_data + +def _generate_windows(data, window=3, horizon=1, offset=0): + """ + Internal helper function to generate sliding windows. + + :param data: Input data, shape [L, T, C] + :param window: Window size + :param horizon: Horizon size + :param offset: Offset from window start + :return: Windowed data, shape [num_windows, window, T, C] + """ + length = len(data) + end_index = length - horizon - window + 1 + windows = [] + index = 0 + + if end_index <= 0: + raise ValueError(f"end_index is non-positive: {end_index}, length={length}, horizon={horizon}, window={window}") + + while index < end_index: + window_data = data[index + offset : index + offset + window] + windows.append(window_data) + index += 1 + + if not windows: + raise ValueError("No windows generated") + + # Check window shapes + first_shape = windows[0].shape + for i, w in enumerate(windows): + if w.shape != first_shape: + raise ValueError(f"Window {i} has shape {w.shape}, expected {first_shape}") + + return np.array(windows) + +def add_window_x(data, window=3, horizon=1, single=False): + """ + Generate windowed X values from the input data. + + :param data: Input data, shape [L, T, C] + :param window: Size of the sliding window + :param horizon: Horizon size + :param single: If True, generate single-step windows, else multi-step + :return: X with shape [num_windows, window, T, C] + """ + return _generate_windows(data, window, horizon, offset=0) + +def add_window_y(data, window=3, horizon=1, single=False): + """ + Generate windowed Y values from the input data. + + :param data: Input data, shape [L, T, C] + :param window: Size of the sliding window + :param horizon: Horizon size + :param single: If True, generate single-step windows, else multi-step + :return: Y with shape [num_windows, horizon, T, C] + """ + offset = window if not single else window + horizon - 1 + return _generate_windows(data, window=1 if single else horizon, horizon=horizon, offset=offset) + +if __name__ == "__main__": + + # Test with a dummy config using METR-LA dataset + dummy_args = { + "basic": { + "dataset": "METR-LA" + }, + "data": { + "lag": 3, + "horizon": 1, + "val_ratio": 0.1, + "test_ratio": 0.2, + "steps_per_day": 288, + "days_per_week": 7, + "input_dim": 1, + "column_wise": False, + "batch_size": 32, + "time_dim": 1 # Add time dimension parameter + } + } + + try: + # Load data + data = load_st_dataset(dummy_args) + print(f"Original data shape: {data.shape}") + + # Get dataloader + train_loader, val_loader, test_loader, scaler = get_dataloader(dummy_args) + + # Test data loader + for batch_x, batch_y in train_loader: + print(f"Batch X shape: {batch_x.shape}") + print(f"Batch Y shape: {batch_y.shape}") + break + + print("Test passed successfully!") + + except Exception as e: + print(f"Test failed with error: {e}") + import traceback + traceback.print_exc() \ No newline at end of file diff --git a/dataloader/data_selector.py b/dataloader/data_selector.py index e0b23e1..bd8e61a 100644 --- a/dataloader/data_selector.py +++ b/dataloader/data_selector.py @@ -2,95 +2,80 @@ import os import numpy as np import h5py + def load_st_dataset(config): dataset = config["basic"]["dataset"] - # sample = config["data"]["sample"] - # output B, N, D - match dataset: - case "BeijingAirQuality": - data_path = os.path.join("./data/BeijingAirQuality/data.dat") - data = np.memmap(data_path, dtype=np.float32, mode='r') - L, N, C = 36000, 7, 3 - data = data.reshape(L, N, C) - case "AirQuality": - data_path = os.path.join("./data/AirQuality/data.dat") - data = np.memmap(data_path, dtype=np.float32, mode='r') - L, N, C = 8701,35,6 - data = data.reshape(L, N, C) - case "PEMS-BAY": - data_path = os.path.join("./data/PEMS-BAY/pems-bay.h5") - with h5py.File(data_path, 'r') as f: - data = f['speed']['block0_values'][:] - case "METR-LA": - data_path = os.path.join("./data/METR-LA/METR-LA.h5") - with h5py.File(data_path, 'r') as f: - data = f['df']['block0_values'][:] - case "SolarEnergy": - data_path = os.path.join("./data/SolarEnergy/SolarEnergy.csv") - data = np.loadtxt(data_path, delimiter=",") - case "PEMSD3": - data_path = os.path.join("./data/PEMS03/PEMS03.npz") - data = np.load(data_path)["data"][:, :, 0] - case "PEMSD4": - data_path = os.path.join("./data/PEMS04/PEMS04.npz") - data = np.load(data_path)["data"][:, :, 0] - case "PEMSD7": - data_path = os.path.join("./data/PEMS07/PEMS07.npz") - data = np.load(data_path)["data"][:, :, 0] - case "PEMSD8": - data_path = os.path.join("./data/PEMS08/PEMS08.npz") - data = np.load(data_path)["data"][:, :, 0] - case "PEMSD7(L)": - data_path = os.path.join("./data/PEMS07(L)/PEMS07L.npz") - data = np.load(data_path)["data"][:, :, 0] - case "PEMSD7(M)": - data_path = os.path.join("./data/PEMS07(M)/V_228.csv") - data = np.genfromtxt(data_path, delimiter=",") - case "BJ": - data_path = os.path.join("./data/BJ/BJ500.csv") - data = np.genfromtxt(data_path, delimiter=",", skip_header=1) - case "Hainan": - data_path = os.path.join("./data/Hainan/Hainan.npz") - data = np.load(data_path)["data"][:, :, 0] - case "SD": - data_path = os.path.join("./data/SD/data.npz") - data = np.load(data_path)["data"][:, :, 0].astype(np.float32) - case "BJTaxi-InFlow": - data = read_BeijingTaxi()[:, :, 0:1].astype(np.float32) - case "BJTaxi-OutFlow": - data = read_BeijingTaxi()[:, :, 1:2].astype(np.float32) - case "NYCBike-InFlow": - data_path = os.path.join("./data/NYCBike/NYC16x8.h5") - with h5py.File(data_path, 'r') as f: - data = f['data'][:].astype(np.float32) - data = data.transpose(0,2,3,1).reshape(-1, 16*8, 2) - data = data[:, :, 0:1] - case "NYCBike-OutFlow": - data_path = os.path.join("./data/NYCBike/NYC16x8.h5") - with h5py.File(data_path, 'r') as f: - data = f['data'][:].astype(np.float32) - data = data.transpose(0,2,3,1).reshape(-1, 16*8, 2) - data = data[:, :, 1:2] - case _: - raise ValueError(f"Unsupported dataset: {dataset}") - # Ensure data shape compatibility - if len(data.shape) == 2: - data = np.expand_dims(data, axis=-1) + loaders = { + "BeijingAirQuality": lambda: _memmap("./data/BeijingAirQuality/data.dat", 36000, 7, 3), + "AirQuality": lambda: _memmap("./data/AirQuality/data.dat", 8701, 35, 6), - print("加载 %s 数据集中... " % dataset) - # return data[::sample] + "PEMS-BAY": lambda: _h5("./data/PEMS-BAY/pems-bay.h5", ("speed", "block0_values")), + "METR-LA": lambda: _h5("./data/METR-LA/METR-LA.h5", ("df", "block0_values")), + + "SolarEnergy": lambda: np.loadtxt("./data/SolarEnergy/SolarEnergy.csv", delimiter=","), + + "PEMSD3": lambda: _npz("./data/PEMS03/PEMS03.npz"), + "PEMSD4": lambda: _npz("./data/PEMS04/PEMS04.npz"), + "PEMSD7": lambda: _npz("./data/PEMS07/PEMS07.npz"), + "PEMSD8": lambda: _npz("./data/PEMS08/PEMS08.npz"), + + "PEMSD7(L)": lambda: _npz("./data/PEMS07(L)/PEMS07L.npz"), + "PEMSD7(M)": lambda: np.genfromtxt("./data/PEMS07(M)/V_228.csv", delimiter=","), + + "BJ": lambda: np.genfromtxt("./data/BJ/BJ500.csv", delimiter=",", skip_header=1), + "Hainan": lambda: _npz("./data/Hainan/Hainan.npz"), + "SD": lambda: _npz("./data/SD/data.npz", cast=True), + + "BJTaxi-InFlow": lambda: read_BeijingTaxi()[:, :, 0:1].astype(np.float32), + "BJTaxi-OutFlow": lambda: read_BeijingTaxi()[:, :, 1:2].astype(np.float32), + + "NYCBike-InFlow": lambda: _nyc_bike(0), + "NYCBike-OutFlow": lambda: _nyc_bike(1), + } + + if dataset not in loaders: + raise ValueError(f"Unsupported dataset: {dataset}") + + data = loaders[dataset]() + + if data.ndim == 2: + data = data[..., None] + + print(f"加载 {dataset} 数据集中... ") return data + +# ---------------- helpers ---------------- +def _memmap(path, L, N, C): + data = np.memmap(path, dtype=np.float32, mode="r") + return data.reshape(L, N, C) + + +def _h5(path, keys): + with h5py.File(path, "r") as f: + return f[keys[0]][keys[1]][:] + + +def _npz(path, cast=False): + data = np.load(path)["data"][:, :, 0] + return data.astype(np.float32) if cast else data + + +def _nyc_bike(channel): + with h5py.File("./data/NYCBike/NYC16x8.h5", "r") as f: + data = f["data"][:].astype(np.float32) + data = data.transpose(0, 2, 3, 1).reshape(-1, 16 * 8, 2) + return data[:, :, channel:channel + 1] + + def read_BeijingTaxi(): - files = ["TaxiBJ2013.npy", "TaxiBJ2014.npy", "TaxiBJ2015.npy", - "TaxiBJ2016_1.npy", "TaxiBJ2016_2.npy"] - all_data = [] - for file in files: - data_path = os.path.join(f"./data/BeijingTaxi/{file}") - data = np.load(data_path) - all_data.append(data) - all_data = np.concatenate(all_data, axis=0) - time_num = all_data.shape[0] - all_data = all_data.transpose(0, 2, 3, 1).reshape(time_num, 32*32, 2) - return all_data \ No newline at end of file + files = [ + "TaxiBJ2013.npy", "TaxiBJ2014.npy", "TaxiBJ2015.npy", + "TaxiBJ2016_1.npy", "TaxiBJ2016_2.npy", + ] + data = np.concatenate( + [np.load(f"./data/BeijingTaxi/{f}") for f in files], axis=0 + ) + T = data.shape[0] + return data.transpose(0, 2, 3, 1).reshape(T, 32 * 32, 2) diff --git a/dataloader/loader_selector.py b/dataloader/loader_selector.py index f8dacdb..caeeb03 100755 --- a/dataloader/loader_selector.py +++ b/dataloader/loader_selector.py @@ -3,18 +3,17 @@ from dataloader.PeMSDdataloader import get_dataloader as normal_loader from dataloader.DCRNNdataloader import get_dataloader as DCRNN_loader from dataloader.EXPdataloader import get_dataloader as EXP_loader from dataloader.cde_loader.cdeDataloader import get_dataloader as nrde_loader +from dataloader.TSloader import get_dataloader as TS_loader +from dataloader.Informer_loader import get_dataloader as Informer_loader def get_dataloader(config, normalizer, single): - model_name = config["basic"]["model"] - match model_name: - case "STGNCDE": - return cde_loader(config, normalizer, single) - case "STGNRDE": - return nrde_loader(config, normalizer, single) - case "DCRNN": - return DCRNN_loader(config, normalizer, single) - case "EXP": - return EXP_loader(config, normalizer, single) - case _: - return normal_loader(config, normalizer, single) + loader_map = { + "STGNCDE": cde_loader, + "STGNRDE": nrde_loader, + "DCRNN": DCRNN_loader, + "EXP": EXP_loader, + } + return loader_map.get(config["basic"]["model"], normal_loader)( + config, normalizer, single + ) diff --git a/model/AGCRN/model_config.json b/model/AGCRN/model_config.json new file mode 100644 index 0000000..e1c9b61 --- /dev/null +++ b/model/AGCRN/model_config.json @@ -0,0 +1,7 @@ +[ + { + "name": "AGCRN", + "module": "model.AGCRN.AGCRN", + "entry": "AGCRN" + } +] \ No newline at end of file diff --git a/model/ARIMA/model_config.json b/model/ARIMA/model_config.json new file mode 100644 index 0000000..9b33c5c --- /dev/null +++ b/model/ARIMA/model_config.json @@ -0,0 +1,7 @@ +[ + { + "name": "ARIMA", + "module": "model.ARIMA.ARIMA", + "entry": "ARIMA" + } +] \ No newline at end of file diff --git a/model/AEPSA/Chebyshev+Laplacian_construction.py b/model/ASTRA/Chebyshev+Laplacian_construction.py similarity index 100% rename from model/AEPSA/Chebyshev+Laplacian_construction.py rename to model/ASTRA/Chebyshev+Laplacian_construction.py diff --git a/model/AEPSA/aepsa.py b/model/ASTRA/astra.py similarity index 76% rename from model/AEPSA/aepsa.py rename to model/ASTRA/astra.py index 7ea003d..f0d32e5 100644 --- a/model/AEPSA/aepsa.py +++ b/model/ASTRA/astra.py @@ -2,27 +2,20 @@ import torch import torch.nn as nn from transformers.models.gpt2.modeling_gpt2 import GPT2Model from einops import rearrange -from model.AEPSA.normalizer import GumbelSoftmax -from model.AEPSA.reprogramming import PatchEmbedding, ReprogrammingLayer +from model.ASTRA.normalizer import GumbelSoftmax +from model.ASTRA.reprogramming import PatchEmbedding, ReprogrammingLayer import torch.nn.functional as F class DynamicGraphEnhancer(nn.Module): - """ - 动态图增强器,基于节点嵌入自动生成图结构 - 参考DDGCRN的设计,使用节点嵌入和特征信息动态计算邻接矩阵 - """ + """动态图增强编码器""" def __init__(self, num_nodes, in_dim, embed_dim=10): super().__init__() - self.num_nodes = num_nodes - self.embed_dim = embed_dim + self.num_nodes = num_nodes # 节点个数 + self.embed_dim = embed_dim # 节点嵌入维度 - # 节点嵌入参数 - self.node_embeddings = nn.Parameter( - torch.randn(num_nodes, embed_dim), requires_grad=True - ) + self.node_embeddings = nn.Parameter(torch.randn(num_nodes, embed_dim), requires_grad=True) # 节点嵌入参数 - # 特征转换层,用于生成动态调整的嵌入 - self.feature_transform = nn.Sequential( + self.feature_transform = nn.Sequential( # 特征转换网络 nn.Linear(in_dim, 16), nn.Sigmoid(), nn.Linear(16, 2), @@ -30,48 +23,29 @@ class DynamicGraphEnhancer(nn.Module): nn.Linear(2, embed_dim) ) - # 注册单位矩阵作为固定的支持矩阵 - self.register_buffer("eye", torch.eye(num_nodes)) + self.register_buffer("eye", torch.eye(num_nodes)) # 注册单位矩阵 def get_laplacian(self, graph, I, normalize=True): - """ - 计算归一化拉普拉斯矩阵 - """ - # 计算度矩阵的逆平方根 - D_inv = torch.diag_embed(torch.sum(graph, -1) ** (-0.5)) + D_inv = torch.diag_embed(torch.sum(graph, -1) ** (-0.5)) # 度矩阵的逆平方根 D_inv[torch.isinf(D_inv)] = 0.0 # 处理零除问题 - if normalize: - return torch.matmul(torch.matmul(D_inv, graph), D_inv) + return torch.matmul(torch.matmul(D_inv, graph), D_inv) # 归一化拉普拉斯矩阵 else: - return torch.matmul(torch.matmul(D_inv, graph + I), D_inv) + return torch.matmul(torch.matmul(D_inv, graph + I), D_inv) # 带自环的归一化拉普拉斯矩阵 def forward(self, X): - """ - X: 输入特征 [B, N, D] - 返回: 动态生成的归一化拉普拉斯矩阵 [B, N, N] - """ - batch_size = X.size(0) - laplacians = [] - - # 获取单位矩阵 - I = self.eye.to(X.device) + """生成动态拉普拉斯矩阵""" + batch_size = X.size(0) # 批次大小 + laplacians = [] # 存储各批次的拉普拉斯矩阵 + I = self.eye.to(X.device) # 移动单位矩阵到目标设备 for b in range(batch_size): - # 使用特征转换层生成动态嵌入调整因子 - filt = self.feature_transform(X[b]) # [N, embed_dim] - - # 计算节点嵌入向量 - nodevec = torch.tanh(self.node_embeddings * filt) - - # 通过节点嵌入的点积计算邻接矩阵 - adj = F.relu(torch.matmul(nodevec, nodevec.transpose(0, 1))) - - # 计算归一化拉普拉斯矩阵 - laplacian = self.get_laplacian(adj, I) + filt = self.feature_transform(X[b]) # 特征转换 + nodevec = torch.tanh(self.node_embeddings * filt) # 计算节点嵌入 + adj = F.relu(torch.matmul(nodevec, nodevec.transpose(0, 1))) # 计算邻接矩阵 + laplacian = self.get_laplacian(adj, I) # 计算拉普拉斯矩阵 laplacians.append(laplacian) - - return torch.stack(laplacians, dim=0) + return torch.stack(laplacians, dim=0) # 堆叠并返回 class GraphEnhancedEncoder(nn.Module): """ @@ -147,10 +121,10 @@ class GraphEnhancedEncoder(nn.Module): return torch.stack(enhanced_features, dim=0) -class AEPSA(nn.Module): +class ASTRA(nn.Module): def __init__(self, configs): - super(AEPSA, self).__init__() + super(ASTRA, self).__init__() self.device = configs['device'] self.pred_len = configs['pred_len'] self.seq_len = configs['seq_len'] @@ -190,8 +164,8 @@ class AEPSA(nn.Module): # 添加动态图增强编码器 self.graph_encoder = GraphEnhancedEncoder( K=configs.get('chebyshev_order', 3), - in_dim=self.d_model, - hidden_dim=configs.get('graph_hidden_dim', 32), + in_dim=self.d_model * self.input_dim, + hidden_dim=self.d_model, num_nodes=self.num_nodes, embed_dim=configs.get('graph_embed_dim', 10), device=self.device @@ -199,14 +173,14 @@ class AEPSA(nn.Module): # 特征融合层 self.feature_fusion = nn.Linear( - self.d_model + configs.get('graph_hidden_dim', 32) * (configs.get('chebyshev_order', 3) + 1), + self.d_model * self.input_dim + self.d_model * (configs.get('chebyshev_order', 3) + 1), self.d_model ) self.out_mlp = nn.Sequential( nn.Linear(self.d_llm, 128), nn.ReLU(), - nn.Linear(128, self.pred_len) + nn.Linear(128, self.pred_len * self.output_dim) ) for i, (name, param) in enumerate(self.gpts.named_parameters()): @@ -229,10 +203,9 @@ class AEPSA(nn.Module): x = x[..., :self.input_dim] x_enc = rearrange(x, 'b t n c -> b n c t') # 原版Patch - enc_out, n_vars = self.patch_embedding(x_enc) # (B, N, C) + enc_out, n_vars = self.patch_embedding(x_enc) # (B, N, d_model * input_dim) # 应用图增强编码器(自动生成图结构) - graph_enhanced = self.graph_encoder(enc_out) - # 特征融合 - 现在两个张量都是三维的 [B, N, d_model] + graph_enhanced = self.graph_encoder(enc_out) # (B, N, K * hidden_dim) enc_out = torch.cat([enc_out, graph_enhanced], dim=-1) enc_out = self.feature_fusion(enc_out) @@ -243,9 +216,10 @@ class AEPSA(nn.Module): enc_out = self.reprogramming_layer(enc_out, source_embeddings, source_embeddings) enc_out = self.gpts(inputs_embeds=enc_out).last_hidden_state - dec_out = self.out_mlp(enc_out) - outputs = dec_out.unsqueeze(dim=-1) - outputs = outputs.repeat(1, 1, 1, n_vars) - outputs = outputs.permute(0,2,1,3) + dec_out = self.out_mlp(enc_out) #[B, N, T*C] + + B, N, _ = dec_out.shape + outputs = dec_out.view(B, N, self.pred_len, self.output_dim) + outputs = outputs.permute(0, 2, 1, 3) # B, T, N, C return outputs diff --git a/model/AEPSA/aepsav2.py b/model/ASTRA/astrav2.py similarity index 90% rename from model/AEPSA/aepsav2.py rename to model/ASTRA/astrav2.py index aac9149..f18ac90 100644 --- a/model/AEPSA/aepsav2.py +++ b/model/ASTRA/astrav2.py @@ -2,8 +2,8 @@ import torch import torch.nn as nn from transformers.models.gpt2.modeling_gpt2 import GPT2Model from einops import rearrange -from model.AEPSA.normalizer import GumbelSoftmax -from model.AEPSA.reprogramming import ReprogrammingLayer +from model.ASTRA.normalizer import GumbelSoftmax +from model.ASTRA.reprogramming import ReprogrammingLayer import torch.nn.functional as F # 基于动态图增强的时空序列预测模型实现 @@ -113,10 +113,10 @@ class GraphEnhancedEncoder(nn.Module): return torch.stack(enhanced_features, dim=0) # 堆叠返回[B,N,hidden_dim*(K+1)],每个节点在每个k阶下的切比雪夫特征 -class AEPSA(nn.Module): +class ASTRA(nn.Module): """自适应特征投影时空自注意力模型""" def __init__(self, configs): - super(AEPSA, self).__init__() + super(ASTRA, self).__init__() self.device = configs['device'] # 运行设备 self.pred_len = configs['pred_len'] # 预测序列长度 self.seq_len = configs['seq_len'] # 输入序列长度 @@ -127,7 +127,11 @@ class AEPSA(nn.Module): self.gpt_layers = configs['gpt_layers'] # 使用的GPT2层数 self.d_ff = configs['d_ff'] # 前馈网络隐藏层维度 self.gpt_path = configs['gpt_path'] # 预训练GPT2模型路径 - self.num_nodes = configs.get('num_nodes', 325) # 节点数量 + self.num_nodes = configs['num_nodes'] # 节点数量 + self.output_dim = configs['output_dim'] + self.cheb = configs['cheb'] + self.graph_dim = configs['graph_dim'] + self.graph_embed_dim = configs['graph_embed_dim'] self.word_choice = GumbelSoftmax(configs['word_num']) # 词汇选择层 @@ -151,25 +155,25 @@ class AEPSA(nn.Module): # 初始化图增强编码器 self.graph_encoder = GraphEnhancedEncoder( - K=configs.get('chebyshev_order', 3), # Chebyshev多项式阶数 + K=self.cheb, # Chebyshev多项式阶数 in_dim=self.d_model, # 输入特征维度 - hidden_dim=configs.get('graph_hidden_dim', 32), # 隐藏层维度 + hidden_dim=self.graph_dim, # 隐藏层维度 num_nodes=self.num_nodes, # 节点数量 - embed_dim=configs.get('graph_embed_dim', 10), # 节点嵌入维度 + embed_dim=self.graph_embed_dim, # 节点嵌入维度 device=self.device, # 运行设备 temporal_dim=self.seq_len, # 时间序列长度 num_features=self.input_dim # 特征通道数 ) self.graph_projection = nn.Linear( # 图特征投影层,每一k阶的切比雪夫权重映射到隐藏维度 - configs.get('graph_hidden_dim', 32) * (configs.get('chebyshev_order', 3) + 1), # 输入维度 + self.graph_dim * (self.cheb + 1), # 输入维度 self.d_model # 输出维度 ) self.out_mlp = nn.Sequential( nn.Linear(self.d_llm, 128), nn.ReLU(), - nn.Linear(128, self.pred_len) + nn.Linear(128, self.pred_len * self.output_dim) ) # 设置参数可训练性 wps=word position embeddings @@ -184,7 +188,7 @@ class AEPSA(nn.Module): def forward(self, x): # 数据处理 - x = x[..., :1] # [B,T,N,1] + x = x[..., :self.input_dim] # [B,T,N,1] x_enc = rearrange(x, 'b t n c -> b n c t') # [B,N,1,T] # 图编码 @@ -202,7 +206,8 @@ class AEPSA(nn.Module): dec_out = self.out_mlp(enc_out) # [B,N,pred_len] # 维度调整 - outputs = dec_out.unsqueeze(dim=-1) # [B,N,pred_len,1] - outputs = outputs.permute(0, 2, 1, 3) # [B,pred_len,N,1] + B, N, _ = dec_out.shape + outputs = dec_out.view(B, N, self.pred_len, self.output_dim) + outputs = outputs.permute(0, 2, 1, 3) # B, T, N, C return outputs \ No newline at end of file diff --git a/model/ASTRA/astrav3.py b/model/ASTRA/astrav3.py new file mode 100644 index 0000000..7f4317c --- /dev/null +++ b/model/ASTRA/astrav3.py @@ -0,0 +1,214 @@ +import torch +import torch.nn as nn +from transformers.models.gpt2.modeling_gpt2 import GPT2Model +from einops import rearrange +from model.ASTRA.normalizer import GumbelSoftmax +from model.ASTRA.reprogramming import ReprogrammingLayer +import torch.nn.functional as F + +# 基于动态图增强的时空序列预测模型实现 + +class DynamicGraphEnhancer(nn.Module): + """动态图增强编码器""" + def __init__(self, num_nodes, in_dim, embed_dim=10): + super().__init__() + self.num_nodes = num_nodes # 节点个数 + self.embed_dim = embed_dim # 节点嵌入维度 + + self.node_embeddings = nn.Parameter(torch.randn(num_nodes, embed_dim), requires_grad=True) # 节点嵌入参数 + + self.feature_transform = nn.Sequential( # 特征转换网络 + nn.Linear(in_dim, 16), + nn.Sigmoid(), + nn.Linear(16, 2), + nn.Sigmoid(), + nn.Linear(2, embed_dim) + ) + + self.register_buffer("eye", torch.eye(num_nodes)) # 注册单位矩阵 + + def get_laplacian(self, graph, I, normalize=True): + D_inv = torch.diag_embed(torch.sum(graph, -1) ** (-0.5)) # 度矩阵的逆平方根 + D_inv[torch.isinf(D_inv)] = 0.0 # 处理零除问题 + if normalize: + return torch.matmul(torch.matmul(D_inv, graph), D_inv) # 归一化拉普拉斯矩阵 + else: + return torch.matmul(torch.matmul(D_inv, graph + I), D_inv) # 带自环的归一化拉普拉斯矩阵 + + def forward(self, X): + """生成动态拉普拉斯矩阵""" + batch_size = X.size(0) # 批次大小 + laplacians = [] # 存储各批次的拉普拉斯矩阵 + I = self.eye.to(X.device) # 移动单位矩阵到目标设备 + + for b in range(batch_size): + filt = self.feature_transform(X[b]) # 特征转换 + nodevec = torch.tanh(self.node_embeddings * filt) # 计算节点嵌入 + adj = F.relu(torch.matmul(nodevec, nodevec.transpose(0, 1))) # 计算邻接矩阵 + laplacian = self.get_laplacian(adj, I) # 计算拉普拉斯矩阵 + laplacians.append(laplacian) + return torch.stack(laplacians, dim=0) # 堆叠并返回 + +class GraphEnhancedEncoder(nn.Module): + """图增强编码器""" + def __init__(self, K=3, in_dim=64, hidden_dim=32, num_nodes=325, embed_dim=10, device='cpu', + temporal_dim=12, num_features=1): + super().__init__() + self.K = K # Chebyshev多项式阶数 + self.in_dim = in_dim # 输入特征维度 + self.hidden_dim = hidden_dim # 隐藏层维度 + self.device = device # 运行设备 + self.temporal_dim = temporal_dim # 时间序列长度 + self.num_features = num_features # 特征通道数量 + + self.input_projection = nn.Sequential( # 输入投影层 + nn.Conv2d(num_features, 16, kernel_size=(1, 3), padding=(0, 1)), + nn.ReLU(), + nn.Conv2d(16, in_dim, kernel_size=(1, temporal_dim)), + nn.ReLU() + ) + + self.graph_enhancer = DynamicGraphEnhancer(num_nodes, in_dim, embed_dim) # 动态图增强器 + self.alpha = nn.Parameter(torch.randn(K + 1, 1)) # 谱系数 + self.W = nn.ParameterList([nn.Parameter(torch.randn(in_dim, hidden_dim)) for _ in range(K + 1)]) # 传播权重 + self.to(device) # 移动到指定设备 + + def chebyshev_polynomials(self, L_tilde, X): + """计算Chebyshev多项式展开""" + T_k_list = [X] # T_0(X) = X + if self.K >= 1: + T_k_list.append(torch.matmul(L_tilde, X)) # T_1(X) = L_tilde * X + for k in range(2, self.K + 1): + T_k_list.append(2 * torch.matmul(L_tilde, T_k_list[-1]) - T_k_list[-2]) # 递推计算 + return T_k_list # 返回多项式列表 + + def forward(self, X): + """输入特征[B,N,C,T],返回增强特征[B,N,hidden_dim*(K+1)]""" + batch_size = X.size(0) # 批次大小 + num_nodes = X.size(1) # 节点数量 + + x = X.permute(0, 2, 1, 3) # [B,C,N,T] + x_proj = self.input_projection(x).squeeze(-1) # [B,in_dim,N] + x_proj = x_proj.permute(0, 2, 1) # [B,N,in_dim] + + enhanced_features = [] # 存储增强特征 + laplacians = self.graph_enhancer(x_proj) # 生成动态拉普拉斯矩阵 + + for b in range(batch_size): + L = laplacians[b] # 当前批次的拉普拉斯矩阵 + + # 特征值缩放 + try: + lambda_max = torch.linalg.eigvalsh(L).max().real # 最大特征值 + lambda_max = 1.0 if lambda_max < 1e-6 else lambda_max # 防止除零 + L_tilde = (2.0 / lambda_max) * L - torch.eye(L.size(0), device=L.device) # 归一化拉普拉斯 + except: + L_tilde = torch.eye(num_nodes, device=X.device) # 异常处理 + + # 计算展开并应用权重 + T_k_list = self.chebyshev_polynomials(L_tilde, x_proj[b]) # 计算Chebyshev多项式 + H_list = [torch.matmul(T_k_list[k], self.W[k]) for k in range(self.K + 1)] # 应用权重 + X_enhanced = torch.cat(H_list, dim=-1) # 拼接特征 + enhanced_features.append(X_enhanced) + + return torch.stack(enhanced_features, dim=0) # 堆叠返回[B,N,hidden_dim*(K+1)],每个节点在每个k阶下的切比雪夫特征 + +class ASTRA(nn.Module): + """自适应特征投影时空自注意力模型""" + def __init__(self, configs): + super(ASTRA, self).__init__() + self.device = configs['device'] # 运行设备 + self.pred_len = configs['pred_len'] # 预测序列长度 + self.seq_len = configs['seq_len'] # 输入序列长度 + self.patch_len = configs['patch_len'] # 补丁长度 + self.input_dim = configs['input_dim'] # 输入特征维度 + self.stride = configs['stride'] # 步长 + self.dropout = configs['dropout'] # Dropout概率 + self.gpt_layers = configs['gpt_layers'] # 使用的GPT2层数 + self.d_ff = configs['d_ff'] # 前馈网络隐藏层维度 + self.gpt_path = configs['gpt_path'] # 预训练GPT2模型路径 + self.num_nodes = configs['num_nodes'] # 节点数量 + self.output_dim = configs['output_dim'] + self.cheb = configs['cheb'] + self.graph_dim = configs['graph_dim'] + self.graph_embed_dim = configs['graph_embed_dim'] + + self.word_choice = GumbelSoftmax(configs['word_num']) # 词汇选择层 + + self.d_model = configs['d_model'] # 模型维度 + self.n_heads = configs['n_heads'] # 注意力头数量 + self.d_keys = None # 键维度 + self.d_llm = 768 # GPT2隐藏层维度 + + self.patch_nums = int((self.seq_len - self.patch_len) / self.stride + 2) # 补丁数量 + self.head_nf = self.d_ff * self.patch_nums # 头特征维度 + + # 初始化GPT2模型 + self.gpts = GPT2Model.from_pretrained(self.gpt_path, output_attentions=True, output_hidden_states=True) # GPT2模型 + self.gpts.h = self.gpts.h[:self.gpt_layers] # 截取指定层数 + self.gpts.apply(self.reset_parameters) # 重置参数 + + self.word_embeddings = self.gpts.get_input_embeddings().weight.to(self.device) # 词嵌入权重 + self.vocab_size = self.word_embeddings.shape[0] # 词汇表大小 + self.mapping_layer = nn.Linear(self.vocab_size, 1) # 映射层 + self.reprogramming_layer = ReprogrammingLayer(self.d_model + self.graph_dim * (self.cheb + 1), self.n_heads, self.d_keys, self.d_llm) # 重编程层 + + # 初始化图增强编码器 + self.graph_encoder = GraphEnhancedEncoder( + K=configs.get('chebyshev_order', 3), # Chebyshev多项式阶数 + in_dim=self.d_model, # 输入特征维度 + hidden_dim=self.graph_dim, # 隐藏层维度 + num_nodes=self.num_nodes, # 节点数量 + embed_dim=self.graph_embed_dim, # 节点嵌入维度 + device=self.device, # 运行设备 + temporal_dim=self.seq_len, # 时间序列长度 + num_features=self.input_dim # 特征通道数 + ) + + self.graph_projection = nn.Linear( # 图特征投影层,每一k阶的切比雪夫权重映射到隐藏维度 + self.graph_dim * (self.cheb + 1), # 输入维度 + self.d_model # 输出维度 + ) + + self.out_mlp = nn.Sequential( + nn.Linear(self.d_llm, 128), + nn.ReLU(), + nn.Linear(128, self.pred_len * self.output_dim) + ) + + # 设置参数可训练性 wps=word position embeddings + for name, param in self.gpts.named_parameters(): + param.requires_grad = 'wpe' in name + + def reset_parameters(self, module): + if hasattr(module, 'weight') and module.weight is not None: + torch.nn.init.normal_(module.weight, mean=0.0, std=0.02) + if hasattr(module, 'bias') and module.bias is not None: + torch.nn.init.zeros_(module.bias) + + def forward(self, x): + # 数据处理 + x = x[..., :self.input_dim] + x_enc = rearrange(x, 'b t n c -> b n c t') # [B,N,1,T] + + # 图编码 + H_t = self.graph_encoder(x_enc) # [B,N,1,T] -> [B, N, hidden_dim*(K+1)] + X_t_1 = self.graph_projection(H_t) # [B,N,d_model] + X_enc = torch.cat([H_t, X_t_1], dim = -1) # [B, N, d_model + hidden_dim*(K+1)] + + # 词嵌入处理 + self.mapping_layer(self.word_embeddings.permute(1, 0)).permute(1, 0) + masks = self.word_choice(self.mapping_layer.weight.data.permute(1,0)) # [d_llm,1] + source_embeddings = self.word_embeddings[masks==1] # [selected_words,d_llm] + + # 重编程与预测 + X_enc = self.reprogramming_layer(X_enc, source_embeddings, source_embeddings) + X_enc = self.gpts(inputs_embeds=X_enc).last_hidden_state # [B,N,d_llm] + dec_out = self.out_mlp(X_enc) # [B,N,pred_len] + + # 维度调整 + B, N, _ = dec_out.shape + outputs = dec_out.view(B, N, self.pred_len, self.output_dim) + outputs = outputs.permute(0, 2, 1, 3) # B, T, N, C + + return outputs \ No newline at end of file diff --git a/model/ASTRA/model_config.json b/model/ASTRA/model_config.json new file mode 100644 index 0000000..3cd0064 --- /dev/null +++ b/model/ASTRA/model_config.json @@ -0,0 +1,17 @@ +[ + { + "name": "ASTRA", + "module": "model.ASTRA.astra", + "entry": "ASTRA" + }, + { + "name": "ASTRA_v2", + "module": "model.ASTRA.astrav2", + "entry": "ASTRA" + }, + { + "name": "ASTRA_v3", + "module": "model.ASTRA.astrav3", + "entry": "ASTRA" + } +] \ No newline at end of file diff --git a/model/AEPSA/normalizer.py b/model/ASTRA/normalizer.py similarity index 100% rename from model/AEPSA/normalizer.py rename to model/ASTRA/normalizer.py diff --git a/model/AEPSA/reprogramming.py b/model/ASTRA/reprogramming.py similarity index 100% rename from model/AEPSA/reprogramming.py rename to model/ASTRA/reprogramming.py diff --git a/model/DCRNN/model_config.json b/model/DCRNN/model_config.json new file mode 100644 index 0000000..c92b599 --- /dev/null +++ b/model/DCRNN/model_config.json @@ -0,0 +1,7 @@ +[ + { + "name": "DCRNN", + "module": "model.DCRNN.dcrnn_model", + "entry": "DCRNNModel" + } +] \ No newline at end of file diff --git a/model/DDGCRN/model_config.json b/model/DDGCRN/model_config.json new file mode 100644 index 0000000..a07fc3a --- /dev/null +++ b/model/DDGCRN/model_config.json @@ -0,0 +1,7 @@ +[ + { + "name": "DDGCRN", + "module": "model.DDGCRN.DDGCRN", + "entry": "DDGCRN" + } +] \ No newline at end of file diff --git a/model/DSANET/model_config.json b/model/DSANET/model_config.json new file mode 100644 index 0000000..5624f8a --- /dev/null +++ b/model/DSANET/model_config.json @@ -0,0 +1,7 @@ +[ + { + "name": "DSANET", + "module": "model.DSANET.DSANET", + "entry": "DSANet" + } +] \ No newline at end of file diff --git a/model/EXP/model_config.json b/model/EXP/model_config.json new file mode 100644 index 0000000..bdf39b7 --- /dev/null +++ b/model/EXP/model_config.json @@ -0,0 +1,7 @@ +[ + { + "name": "EXP", + "module": "model.EXP.EXP32", + "entry": "EXP" + } +] \ No newline at end of file diff --git a/model/FPT/fpt.py b/model/FPT/fpt.py new file mode 100644 index 0000000..941da6d --- /dev/null +++ b/model/FPT/fpt.py @@ -0,0 +1,45 @@ +import torch.nn as nn +from transformers.models.gpt2.modeling_gpt2 import GPT2Model +from einops import rearrange + +class fpt(nn.Module): + def __init__(self, configs): + super(fpt, self).__init__() + self.patch_len = configs['patch_len'] + self.stride = configs['stride'] + self.input_dim = configs['input_dim'] + self.seq_len = configs['seq_len'] + self.pred_len = configs['pred_len'] + self.gpt_layers = configs['gpt_layers'] # 使用的GPT2层数 + self.d_model = configs['d_model'] + self.gpt_path = configs['gpt_path'] + + self.patch_num = int((self.seq_len - self.patch_len) / self.stride + 2) # 补丁数量 + self.padding_patch_layer = nn.ReplicationPad1d((0, self.stride)) + + self.gpts = GPT2Model.from_pretrained(self.gpt_path, output_attentions=True, output_hidden_states=True) + self.gpts.h = self.gpts.h[:self.gpt_layers] + for i, (name, param) in enumerate(self.gpts.named_parameters()): + if 'wpe' in name: + param.requires_grad = True + else: + param.requires_grad = False + + self.in_layer = nn.Linear(self.patch_len, self.d_model) + self.out_layer = nn.Linear(self.d_model * self.patch_num, self.pred_len) + + def forward(self, x): + B, L, M = x.shape + x = x[..., :self.input_dim] + x = rearrange(x, 'b l m -> b m l') + + x = self.padding_patch_layer(x) + x = x.unfold(dimension = -1, size = self.patch_len, step = self.stride) + x = rearrange(x, 'b m n p -> (b m) n p') + + outputs = self.in_layer(x) + outputs = self.gpts(inputs_embeds=outputs).last_hidden_state + outputs = self.out_layer(outputs.reshape(B*M, -1)) + outputs = rearrange(outputs, '(b m) l -> b l m', b = B) + return outputs + diff --git a/model/FPT/model_config.json b/model/FPT/model_config.json new file mode 100644 index 0000000..a7d040c --- /dev/null +++ b/model/FPT/model_config.json @@ -0,0 +1,7 @@ +[ + { + "name": "FPT", + "module": "model.FPT.fpt", + "entry": "fpt" + } +] \ No newline at end of file diff --git a/model/GWN/GraphWaveNet.py b/model/GWN/GraphWaveNet.py index 6f290f5..5bece37 100755 --- a/model/GWN/GraphWaveNet.py +++ b/model/GWN/GraphWaveNet.py @@ -1,53 +1,35 @@ -import torch, torch.nn as nn, torch.nn.functional as F +import torch +import torch.nn as nn +from torch.nn import BatchNorm2d, Conv1d, Conv2d, ModuleList, Parameter +import torch.nn.functional as F + +def nconv(x, A): + """Multiply x by adjacency matrix along source node axis""" + return torch.einsum('ncvl,vw->ncwl', (x, A)).contiguous() -class nconv(nn.Module): - """ - 图卷积操作的实现类 - 使用einsum进行矩阵运算,实现图卷积操作 - """ - - def forward(self, x, A): - return torch.einsum("ncvl,vw->ncwl", (x, A)).contiguous() - - -class linear(nn.Module): - """ - 线性变换层 - 使用1x1卷积实现线性变换 - """ - - def __init__(self, c_in, c_out): - super().__init__() - self.mlp = nn.Conv2d(c_in, c_out, 1) - - def forward(self, x): - return self.mlp(x) - - -class gcn(nn.Module): - """ - 图卷积网络层 - 实现高阶图卷积操作,支持多阶邻接矩阵 - """ - +class GraphConvNet(nn.Module): def __init__(self, c_in, c_out, dropout, support_len=3, order=2): super().__init__() - self.nconv = nconv() c_in = (order * support_len + 1) * c_in - self.mlp, self.dropout, self.order = linear(c_in, c_out), dropout, order + self.final_conv = Conv2d(c_in, c_out, (1, 1), padding=(0, 0), stride=(1, 1), bias=True) + self.dropout = dropout + self.order = order - def forward(self, x, support): + def forward(self, x, support: list): out = [x] for a in support: - x1 = self.nconv(x, a) + x1 = nconv(x, a) out.append(x1) - for _ in range(2, self.order + 1): - x1 = self.nconv(x1, a) - out.append(x1) - return F.dropout( - self.mlp(torch.cat(out, dim=1)), self.dropout, training=self.training - ) + for k in range(2, self.order + 1): + x2 = nconv(x1, a) + out.append(x2) + x1 = x2 + + h = torch.cat(out, dim=1) + h = self.final_conv(h) + h = F.dropout(h, self.dropout, training=self.training) + return h class gwnet(nn.Module): @@ -59,126 +41,121 @@ class gwnet(nn.Module): def __init__(self, args): super().__init__() # 初始化基本参数 - self.dropout, self.blocks, self.layers = ( - args["dropout"], - args["blocks"], - args["layers"], - ) - self.gcn_bool, self.addaptadj = args["gcn_bool"], args["addaptadj"] + self.dropout = args["dropout"] + self.blocks = args["blocks"] + self.layers = args["layers"] + self.do_graph_conv = args.get("do_graph_conv", True) + self.cat_feat_gc = args.get("cat_feat_gc", False) + self.addaptadj = args.get("addaptadj", True) + supports = None + aptinit = args.get("aptinit", None) + in_dim = args.get("in_dim") + out_dim = args.get("out_dim") + residual_channels = args.get("residual_channels") + dilation_channels = args.get("dilation_channels") + skip_channels = args.get("skip_channels") + end_channels = args.get("end_channels") + kernel_size = args.get("kernel_size") + apt_size = args.get("apt_size", 10) - # 初始化各种卷积层和模块 - self.filter_convs, self.gate_convs = nn.ModuleList(), nn.ModuleList() - self.residual_convs, self.skip_convs, self.bn, self.gconv = ( - nn.ModuleList(), - nn.ModuleList(), - nn.ModuleList(), - nn.ModuleList(), - ) - self.start_conv = nn.Conv2d(args["in_dim"], args["residual_channels"], 1) - self.supports = args.get("supports", None) - # 计算感受野 + if self.cat_feat_gc: + self.start_conv = nn.Conv2d(in_channels=1, # hard code to avoid errors + out_channels=residual_channels, + kernel_size=(1, 1)) + self.cat_feature_conv = nn.Conv2d(in_channels=in_dim - 1, + out_channels=residual_channels, + kernel_size=(1, 1)) + else: + self.start_conv = nn.Conv2d(in_channels=in_dim, + out_channels=residual_channels, + kernel_size=(1, 1)) + + self.fixed_supports = supports or [] receptive_field = 1 - self.supports_len = len(self.supports) if self.supports is not None else 0 - # 如果使用自适应邻接矩阵,初始化相关参数 - if self.gcn_bool and self.addaptadj: - aptinit = args.get("aptinit", None) + self.supports_len = len(self.fixed_supports) + if self.do_graph_conv and self.addaptadj: if aptinit is None: - if self.supports is None: - self.supports = [] - self.nodevec1 = nn.Parameter( - torch.randn(args["num_nodes"], 10, device=args["device"]) - ) - self.nodevec2 = nn.Parameter( - torch.randn(10, args["num_nodes"], device=args["device"]) - ) - self.supports_len += 1 + nodevecs = torch.randn(args["num_nodes"], apt_size), torch.randn(apt_size, args["num_nodes"]) else: - if self.supports is None: - self.supports = [] - m, p, n = torch.svd(aptinit) - initemb1 = torch.mm(m[:, :10], torch.diag(p[:10] ** 0.5)) - initemb2 = torch.mm(torch.diag(p[:10] ** 0.5), n[:, :10].t()) - self.nodevec1 = nn.Parameter(initemb1) - self.nodevec2 = nn.Parameter(initemb2) - self.supports_len += 1 + nodevecs = self.svd_init(args["num_nodes"], apt_size, aptinit) + self.supports_len += 1 + self.nodevec1, self.nodevec2 = [Parameter(n.to(args["device"]), requires_grad=True) for n in nodevecs] - # 获取模型参数 - ks, res, dil, skip, endc, out_dim = ( - args["kernel_size"], - args["residual_channels"], - args["dilation_channels"], - args["skip_channels"], - args["end_channels"], - args["out_dim"], - ) + depth = list(range(self.blocks * self.layers)) - # 构建模型层 + # 1x1 convolution for residual and skip connections (slightly different see docstring) + self.residual_convs = ModuleList([Conv2d(dilation_channels, residual_channels, (1, 1)) for _ in depth]) + self.skip_convs = ModuleList([Conv2d(dilation_channels, skip_channels, (1, 1)) for _ in depth]) + self.bn = ModuleList([BatchNorm2d(residual_channels) for _ in depth]) + self.graph_convs = ModuleList([GraphConvNet(dilation_channels, residual_channels, self.dropout, support_len=self.supports_len) + for _ in depth]) + + self.filter_convs = ModuleList() + self.gate_convs = ModuleList() for b in range(self.blocks): - add_scope, new_dil = ks - 1, 1 + additional_scope = kernel_size - 1 + D = 1 # dilation for i in range(self.layers): - # 添加时间卷积层 - self.filter_convs.append(nn.Conv2d(res, dil, (1, ks), dilation=new_dil)) - self.gate_convs.append(nn.Conv2d(res, dil, (1, ks), dilation=new_dil)) - self.residual_convs.append(nn.Conv2d(dil, res, 1)) - self.skip_convs.append(nn.Conv2d(dil, skip, 1)) - self.bn.append(nn.BatchNorm2d(res)) - new_dil *= 2 - receptive_field += add_scope - add_scope *= 2 - if self.gcn_bool: - self.gconv.append( - gcn(dil, res, args["dropout"], support_len=self.supports_len) - ) - - # 输出层 - self.end_conv_1 = nn.Conv2d(skip, endc, 1) - self.end_conv_2 = nn.Conv2d(endc, out_dim, 1) + # dilated convolutions + self.filter_convs.append(Conv2d(residual_channels, dilation_channels, (1, kernel_size), dilation=D)) + self.gate_convs.append(Conv2d(residual_channels, dilation_channels, (1, kernel_size), dilation=D)) + D *= 2 + receptive_field += additional_scope + additional_scope *= 2 self.receptive_field = receptive_field + self.end_conv_1 = Conv2d(skip_channels, end_channels, (1, 1), bias=True) + self.end_conv_2 = Conv2d(end_channels, out_dim, (1, 1), bias=True) + def forward(self, input): - """ - 前向传播函数 - 实现模型的推理过程 - """ - # 数据预处理 - input = input[..., 0:2].transpose(1, 3) - input = F.pad(input, (1, 0, 0, 0)) - in_len = input.size(3) - x = ( - F.pad(input, (self.receptive_field - in_len, 0, 0, 0)) - if in_len < self.receptive_field - else input - ) - - # 初始卷积 - x, skip, new_supports = self.start_conv(x), 0, None - - # 如果使用自适应邻接矩阵,计算新的邻接矩阵 - if self.gcn_bool and self.addaptadj and self.supports is not None: + x = input[..., 0:1].transpose(1, 3) + # Input shape is (bs, features, n_nodes, n_timesteps) + in_len = x.size(3) + if in_len < self.receptive_field: + x = nn.functional.pad(x, (self.receptive_field - in_len, 0, 0, 0)) + if self.cat_feat_gc: + f1, f2 = x[:, [0]], x[:, 1:] + x1 = self.start_conv(f1) + x2 = F.leaky_relu(self.cat_feature_conv(f2)) + x = x1 + x2 + else: + x = self.start_conv(x) + skip = 0 + adjacency_matrices = self.fixed_supports + # calculate the current adaptive adj matrix once per iteration + if self.addaptadj: adp = F.softmax(F.relu(torch.mm(self.nodevec1, self.nodevec2)), dim=1) - new_supports = self.supports + [adp] + adjacency_matrices = self.fixed_supports + [adp] - # 主网络层的前向传播 + # WaveNet layers for i in range(self.blocks * self.layers): residual = x - # 时间卷积操作 - f = self.filter_convs[i](residual).tanh() - g = self.gate_convs[i](residual).sigmoid() - x = f * g - s = self.skip_convs[i](x) - skip = ( - skip[:, :, :, -s.size(3) :] if isinstance(skip, torch.Tensor) else 0 - ) + s + # dilated convolution + filter = torch.tanh(self.filter_convs[i](residual)) + gate = torch.sigmoid(self.gate_convs[i](residual)) + x = filter * gate + # parametrized skip connection + s = self.skip_convs[i](x) # what are we skipping?? + try: # if i > 0 this works + skip = skip[:, :, :, -s.size(3):] # TODO(SS): Mean/Max Pool? + except: + skip = 0 + skip = s + skip + if i == (self.blocks * self.layers - 1): # last X getting ignored anyway + break - # 图卷积操作 - if self.gcn_bool and self.supports is not None: - x = self.gconv[i](x, new_supports if self.addaptadj else self.supports) + if self.do_graph_conv: + graph_out = self.graph_convs[i](x, adjacency_matrices) + x = x + graph_out if self.cat_feat_gc else graph_out else: x = self.residual_convs[i](x) - x = x + residual[:, :, :, -x.size(3) :] + x = x + residual[:, :, :, -x.size(3):] # TODO(SS): Mean/Max Pool? x = self.bn[i](x) - # 输出层处理 - return self.end_conv_2(F.relu(self.end_conv_1(F.relu(skip)))) + x = F.relu(skip) # ignore last X? + x = F.relu(self.end_conv_1(x)) + x = self.end_conv_2(x) # downsample to (bs, seq_length, 207, nfeatures) + # x = x.transpose(1, 3) + return x diff --git a/model/GWN/GraphWaveNet_bk.py b/model/GWN/GraphWaveNet_bk.py index 19308d4..6f290f5 100755 --- a/model/GWN/GraphWaveNet_bk.py +++ b/model/GWN/GraphWaveNet_bk.py @@ -1,97 +1,98 @@ -import torch -import torch.nn as nn -import torch.nn.functional as F -from torch.autograd import Variable -import sys +import torch, torch.nn as nn, torch.nn.functional as F class nconv(nn.Module): - def __init__(self): - super(nconv, self).__init__() + """ + 图卷积操作的实现类 + 使用einsum进行矩阵运算,实现图卷积操作 + """ def forward(self, x, A): - x = torch.einsum("ncvl,vw->ncwl", (x, A)) - return x.contiguous() + return torch.einsum("ncvl,vw->ncwl", (x, A)).contiguous() class linear(nn.Module): + """ + 线性变换层 + 使用1x1卷积实现线性变换 + """ + def __init__(self, c_in, c_out): - super(linear, self).__init__() - self.mlp = torch.nn.Conv2d( - c_in, c_out, kernel_size=(1, 1), padding=(0, 0), stride=(1, 1), bias=True - ) + super().__init__() + self.mlp = nn.Conv2d(c_in, c_out, 1) def forward(self, x): return self.mlp(x) class gcn(nn.Module): + """ + 图卷积网络层 + 实现高阶图卷积操作,支持多阶邻接矩阵 + """ + def __init__(self, c_in, c_out, dropout, support_len=3, order=2): - super(gcn, self).__init__() + super().__init__() self.nconv = nconv() c_in = (order * support_len + 1) * c_in - self.mlp = linear(c_in, c_out) - self.dropout = dropout - self.order = order + self.mlp, self.dropout, self.order = linear(c_in, c_out), dropout, order def forward(self, x, support): out = [x] for a in support: x1 = self.nconv(x, a) out.append(x1) - for k in range(2, self.order + 1): - x2 = self.nconv(x1, a) - out.append(x2) - x1 = x2 - - h = torch.cat(out, dim=1) - h = self.mlp(h) - h = F.dropout(h, self.dropout, training=self.training) - return h + for _ in range(2, self.order + 1): + x1 = self.nconv(x1, a) + out.append(x1) + return F.dropout( + self.mlp(torch.cat(out, dim=1)), self.dropout, training=self.training + ) class gwnet(nn.Module): + """ + Graph WaveNet模型的主类 + 结合了图卷积网络和时序卷积网络,用于时空预测任务 + """ + def __init__(self, args): - super(gwnet, self).__init__() - self.dropout = args["dropout"] - self.blocks = args["blocks"] - self.layers = args["layers"] - self.gcn_bool = args["gcn_bool"] - self.addaptadj = args["addaptadj"] - - self.filter_convs = nn.ModuleList() - self.gate_convs = nn.ModuleList() - self.residual_convs = nn.ModuleList() - self.skip_convs = nn.ModuleList() - self.bn = nn.ModuleList() - self.gconv = nn.ModuleList() - - self.start_conv = nn.Conv2d( - in_channels=args["in_dim"], - out_channels=args["residual_channels"], - kernel_size=(1, 1), + super().__init__() + # 初始化基本参数 + self.dropout, self.blocks, self.layers = ( + args["dropout"], + args["blocks"], + args["layers"], ) + self.gcn_bool, self.addaptadj = args["gcn_bool"], args["addaptadj"] + + # 初始化各种卷积层和模块 + self.filter_convs, self.gate_convs = nn.ModuleList(), nn.ModuleList() + self.residual_convs, self.skip_convs, self.bn, self.gconv = ( + nn.ModuleList(), + nn.ModuleList(), + nn.ModuleList(), + nn.ModuleList(), + ) + self.start_conv = nn.Conv2d(args["in_dim"], args["residual_channels"], 1) self.supports = args.get("supports", None) + # 计算感受野 receptive_field = 1 + self.supports_len = len(self.supports) if self.supports is not None else 0 - self.supports_len = 0 - if self.supports is not None: - self.supports_len += len(self.supports) - + # 如果使用自适应邻接矩阵,初始化相关参数 if self.gcn_bool and self.addaptadj: aptinit = args.get("aptinit", None) if aptinit is None: if self.supports is None: self.supports = [] self.nodevec1 = nn.Parameter( - torch.randn(args["num_nodes"], 10).to(args["device"]), - requires_grad=True, - ).to(args["device"]) + torch.randn(args["num_nodes"], 10, device=args["device"]) + ) self.nodevec2 = nn.Parameter( - torch.randn(10, args["num_nodes"]).to(args["device"]), - requires_grad=True, - ).to(args["device"]) + torch.randn(10, args["num_nodes"], device=args["device"]) + ) self.supports_len += 1 else: if self.supports is None: @@ -99,156 +100,85 @@ class gwnet(nn.Module): m, p, n = torch.svd(aptinit) initemb1 = torch.mm(m[:, :10], torch.diag(p[:10] ** 0.5)) initemb2 = torch.mm(torch.diag(p[:10] ** 0.5), n[:, :10].t()) - self.nodevec1 = nn.Parameter(initemb1, requires_grad=True).to( - args["device"] - ) - self.nodevec2 = nn.Parameter(initemb2, requires_grad=True).to( - args["device"] - ) + self.nodevec1 = nn.Parameter(initemb1) + self.nodevec2 = nn.Parameter(initemb2) self.supports_len += 1 - kernel_size = args["kernel_size"] - residual_channels = args["residual_channels"] - dilation_channels = args["dilation_channels"] - kernel_size = args["kernel_size"] - skip_channels = args["skip_channels"] - end_channels = args["end_channels"] - out_dim = args["out_dim"] - dropout = args["dropout"] + # 获取模型参数 + ks, res, dil, skip, endc, out_dim = ( + args["kernel_size"], + args["residual_channels"], + args["dilation_channels"], + args["skip_channels"], + args["end_channels"], + args["out_dim"], + ) + # 构建模型层 for b in range(self.blocks): - additional_scope = kernel_size - 1 - new_dilation = 1 + add_scope, new_dil = ks - 1, 1 for i in range(self.layers): - # dilated convolutions - self.filter_convs.append( - nn.Conv2d( - in_channels=residual_channels, - out_channels=dilation_channels, - kernel_size=(1, kernel_size), - dilation=new_dilation, - ) - ) - - self.gate_convs.append( - nn.Conv2d( - in_channels=residual_channels, - out_channels=dilation_channels, - kernel_size=(1, kernel_size), - dilation=new_dilation, - ) - ) - - # 1x1 convolution for residual connection - self.residual_convs.append( - nn.Conv2d( - in_channels=dilation_channels, - out_channels=residual_channels, - kernel_size=(1, 1), - ) - ) - - # 1x1 convolution for skip connection - self.skip_convs.append( - nn.Conv2d( - in_channels=dilation_channels, - out_channels=skip_channels, - kernel_size=(1, 1), - ) - ) - self.bn.append(nn.BatchNorm2d(residual_channels)) - new_dilation *= 2 - receptive_field += additional_scope - additional_scope *= 2 + # 添加时间卷积层 + self.filter_convs.append(nn.Conv2d(res, dil, (1, ks), dilation=new_dil)) + self.gate_convs.append(nn.Conv2d(res, dil, (1, ks), dilation=new_dil)) + self.residual_convs.append(nn.Conv2d(dil, res, 1)) + self.skip_convs.append(nn.Conv2d(dil, skip, 1)) + self.bn.append(nn.BatchNorm2d(res)) + new_dil *= 2 + receptive_field += add_scope + add_scope *= 2 if self.gcn_bool: self.gconv.append( - gcn( - dilation_channels, - residual_channels, - dropout, - support_len=self.supports_len, - ) + gcn(dil, res, args["dropout"], support_len=self.supports_len) ) - self.end_conv_1 = nn.Conv2d( - in_channels=skip_channels, - out_channels=end_channels, - kernel_size=(1, 1), - bias=True, - ) - - self.end_conv_2 = nn.Conv2d( - in_channels=end_channels, - out_channels=out_dim, - kernel_size=(1, 1), - bias=True, - ) - + # 输出层 + self.end_conv_1 = nn.Conv2d(skip, endc, 1) + self.end_conv_2 = nn.Conv2d(endc, out_dim, 1) self.receptive_field = receptive_field def forward(self, input): - input = input[..., 0:2] - input = input.transpose(1, 3) - input = nn.functional.pad(input, (1, 0, 0, 0)) + """ + 前向传播函数 + 实现模型的推理过程 + """ + # 数据预处理 + input = input[..., 0:2].transpose(1, 3) + input = F.pad(input, (1, 0, 0, 0)) in_len = input.size(3) - if in_len < self.receptive_field: - x = nn.functional.pad(input, (self.receptive_field - in_len, 0, 0, 0)) - else: - x = input - x = self.start_conv(x) - skip = 0 + x = ( + F.pad(input, (self.receptive_field - in_len, 0, 0, 0)) + if in_len < self.receptive_field + else input + ) - # calculate the current adaptive adj matrix once per iteration - new_supports = None + # 初始卷积 + x, skip, new_supports = self.start_conv(x), 0, None + + # 如果使用自适应邻接矩阵,计算新的邻接矩阵 if self.gcn_bool and self.addaptadj and self.supports is not None: adp = F.softmax(F.relu(torch.mm(self.nodevec1, self.nodevec2)), dim=1) new_supports = self.supports + [adp] - # WaveNet layers + # 主网络层的前向传播 for i in range(self.blocks * self.layers): - # |----------------------------------------| *residual* - # | | - # | |-- conv -- tanh --| | - # -> dilate -|----| * ----|-- 1x1 -- + --> *input* - # |-- conv -- sigm --| | - # 1x1 - # | - # ---------------------------------------> + -------------> *skip* - - # (dilation, init_dilation) = self.dilations[i] - - # residual = dilation_func(x, dilation, init_dilation, i) residual = x - # dilated convolution - filter = self.filter_convs[i](residual) - filter = torch.tanh(filter) - gate = self.gate_convs[i](residual) - gate = torch.sigmoid(gate) - x = filter * gate - - # parametrized skip connection - - s = x - s = self.skip_convs[i](s) - try: - skip = skip[:, :, :, -s.size(3) :] - except: - skip = 0 - skip = s + skip + # 时间卷积操作 + f = self.filter_convs[i](residual).tanh() + g = self.gate_convs[i](residual).sigmoid() + x = f * g + s = self.skip_convs[i](x) + skip = ( + skip[:, :, :, -s.size(3) :] if isinstance(skip, torch.Tensor) else 0 + ) + s + # 图卷积操作 if self.gcn_bool and self.supports is not None: - if self.addaptadj: - x = self.gconv[i](x, new_supports) - else: - x = self.gconv[i](x, self.supports) + x = self.gconv[i](x, new_supports if self.addaptadj else self.supports) else: x = self.residual_convs[i](x) - x = x + residual[:, :, :, -x.size(3) :] - x = self.bn[i](x) - x = F.relu(skip) - x = F.relu(self.end_conv_1(x)) - x = self.end_conv_2(x) - return x + # 输出层处理 + return self.end_conv_2(F.relu(self.end_conv_1(F.relu(skip)))) diff --git a/model/GWN/model_config.json b/model/GWN/model_config.json new file mode 100644 index 0000000..38d05b4 --- /dev/null +++ b/model/GWN/model_config.json @@ -0,0 +1,7 @@ +[ + { + "name": "GWN", + "module": "model.GWN.GraphWaveNet", + "entry": "gwnet" + } +] \ No newline at end of file diff --git a/model/HI/HI.py b/model/HI/HI.py new file mode 100644 index 0000000..aefbd12 --- /dev/null +++ b/model/HI/HI.py @@ -0,0 +1,45 @@ +from typing import List +import torch +from torch import nn + + +class HI(nn.Module): + """ + Paper: Historical Inertia: A Neglected but Powerful Baseline for Long Sequence Time-series Forecasting + Link: https://arxiv.org/abs/2103.16349 + Official code: None + Venue: CIKM 2021 + Task: Long-term Time Series Forecasting + """ + + def __init__(self, config): + """ + Init HI. + + Args: + config (HIConfig): model config. + """ + + super().__init__() + self.input_len = config['input_len'] + self.output_len = config['output_len'] + assert self.input_len >= self.output_len, "HI model requires input length > output length" + self.reverse = config['reverse'] + # self.fake_param = nn.Linear(1, 1, bias=False) + + def forward(self, inputs: torch.Tensor) -> torch.Tensor: + """Forward function of HI. + + Args: + inputs (torch.Tensor): shape = [B, L_in, N] + + Returns: + torch.Tensor: model prediction [B, L_out, N]. + """ + # historical inertia + prediction = inputs[:, -self.output_len:, :] + # last point + # prediction = inputs[:, [-1], :].expand(-1, self.output_len, -1) + if self.reverse: + prediction = prediction.flip(dims=[1]) + return prediction \ No newline at end of file diff --git a/model/HI/model_config.json b/model/HI/model_config.json new file mode 100644 index 0000000..3071864 --- /dev/null +++ b/model/HI/model_config.json @@ -0,0 +1,7 @@ +[ + { + "name": "HI", + "module": "model.HI.HI", + "entry": "HI" + } +] \ No newline at end of file diff --git a/model/Informer/attn.py b/model/Informer/attn.py new file mode 100644 index 0000000..45344a8 --- /dev/null +++ b/model/Informer/attn.py @@ -0,0 +1,163 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +import numpy as np + +from math import sqrt +from model.Informer.masking import TriangularCausalMask, ProbMask + +class FullAttention(nn.Module): + def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False): + super(FullAttention, self).__init__() + self.scale = scale + self.mask_flag = mask_flag + self.output_attention = output_attention + self.dropout = nn.Dropout(attention_dropout) + + def forward(self, queries, keys, values, attn_mask): + B, L, H, E = queries.shape + _, S, _, D = values.shape + scale = self.scale or 1./sqrt(E) + + scores = torch.einsum("blhe,bshe->bhls", queries, keys) + if self.mask_flag: + if attn_mask is None: + attn_mask = TriangularCausalMask(B, L, device=queries.device) + + scores.masked_fill_(attn_mask.mask, -np.inf) + + A = self.dropout(torch.softmax(scale * scores, dim=-1)) + V = torch.einsum("bhls,bshd->blhd", A, values) + + if self.output_attention: + return (V.contiguous(), A) + else: + return (V.contiguous(), None) + +class ProbAttention(nn.Module): + def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False): + super(ProbAttention, self).__init__() + self.factor = factor + self.scale = scale + self.mask_flag = mask_flag + self.output_attention = output_attention + self.dropout = nn.Dropout(attention_dropout) + + def _prob_QK(self, Q, K, sample_k, n_top): # n_top: c*ln(L_q) + # Q [B, H, L, D] + B, H, L_K, E = K.shape + _, _, L_Q, _ = Q.shape + + # calculate the sampled Q_K + K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E) + index_sample = torch.randint(L_K, (L_Q, sample_k)) # real U = U_part(factor*ln(L_k))*L_q + K_sample = K_expand[:, :, torch.arange(L_Q).unsqueeze(1), index_sample, :] + Q_K_sample = torch.matmul(Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze(-2) + + # find the Top_k query with sparisty measurement + M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K) + M_top = M.topk(n_top, sorted=False)[1] + + # use the reduced Q to calculate Q_K + Q_reduce = Q[torch.arange(B)[:, None, None], + torch.arange(H)[None, :, None], + M_top, :] # factor*ln(L_q) + Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1)) # factor*ln(L_q)*L_k + + return Q_K, M_top + + def _get_initial_context(self, V, L_Q): + B, H, L_V, D = V.shape + if not self.mask_flag: + # V_sum = V.sum(dim=-2) + V_sum = V.mean(dim=-2) + contex = V_sum.unsqueeze(-2).expand(B, H, L_Q, V_sum.shape[-1]).clone() + else: # use mask + assert(L_Q == L_V) # requires that L_Q == L_V, i.e. for self-attention only + contex = V.cumsum(dim=-2) + return contex + + def _update_context(self, context_in, V, scores, index, L_Q, attn_mask): + B, H, L_V, D = V.shape + + if self.mask_flag: + attn_mask = ProbMask(B, H, L_Q, index, scores, device=V.device) + scores.masked_fill_(attn_mask.mask, -np.inf) + + attn = torch.softmax(scores, dim=-1) # nn.Softmax(dim=-1)(scores) + + context_in[torch.arange(B)[:, None, None], + torch.arange(H)[None, :, None], + index, :] = torch.matmul(attn, V).type_as(context_in) + if self.output_attention: + attns = (torch.ones([B, H, L_V, L_V])/L_V).type_as(attn).to(attn.device) + attns[torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :] = attn + return (context_in, attns) + else: + return (context_in, None) + + def forward(self, queries, keys, values, attn_mask): + B, L_Q, H, D = queries.shape + _, L_K, _, _ = keys.shape + + queries = queries.transpose(2,1) + keys = keys.transpose(2,1) + values = values.transpose(2,1) + + U_part = self.factor * np.ceil(np.log(L_K)).astype('int').item() # c*ln(L_k) + u = self.factor * np.ceil(np.log(L_Q)).astype('int').item() # c*ln(L_q) + + U_part = U_part if U_part='1.5.0' else 2 + self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model, + kernel_size=3, padding=padding, padding_mode='circular') + for m in self.modules(): + if isinstance(m, nn.Conv1d): + nn.init.kaiming_normal_(m.weight,mode='fan_in',nonlinearity='leaky_relu') + + def forward(self, x): + x = self.tokenConv(x.permute(0, 2, 1)).transpose(1,2) + return x + +class FixedEmbedding(nn.Module): + def __init__(self, c_in, d_model): + super(FixedEmbedding, self).__init__() + + w = torch.zeros(c_in, d_model).float() + w.require_grad = False + + position = torch.arange(0, c_in).float().unsqueeze(1) + div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp() + + w[:, 0::2] = torch.sin(position * div_term) + w[:, 1::2] = torch.cos(position * div_term) + + self.emb = nn.Embedding(c_in, d_model) + self.emb.weight = nn.Parameter(w, requires_grad=False) + + def forward(self, x): + return self.emb(x).detach() + +class TemporalEmbedding(nn.Module): + def __init__(self, d_model, embed_type='fixed', freq='h'): + super(TemporalEmbedding, self).__init__() + + minute_size = 4; hour_size = 24 + weekday_size = 7; day_size = 32; month_size = 13 + + Embed = FixedEmbedding if embed_type=='fixed' else nn.Embedding + if freq=='t': + self.minute_embed = Embed(minute_size, d_model) + self.hour_embed = Embed(hour_size, d_model) + self.weekday_embed = Embed(weekday_size, d_model) + self.day_embed = Embed(day_size, d_model) + self.month_embed = Embed(month_size, d_model) + + def forward(self, x): + x = x.long() + + # Check the size of x's last dimension to avoid index errors + last_dim = x.shape[-1] + + minute_x = 0. + hour_x = 0. + weekday_x = 0. + day_x = 0. + month_x = 0. + + # For our generated time features, we have only 2 dimensions: [day_of_week, hour] + # So we need to map them to the appropriate embedding layers + if last_dim > 0: + # Use the first dimension for hour + # Ensure hour is in the valid range [0, 23] + hour = torch.clamp(x[:,:,0], 0, 23) + hour_x = self.hour_embed(hour) + + if last_dim > 1: + # Use the second dimension for weekday + # Ensure weekday is in the valid range [0, 6] + weekday = torch.clamp(x[:,:,1], 0, 6) + weekday_x = self.weekday_embed(weekday) + + return hour_x + weekday_x + day_x + month_x + minute_x + +class TimeFeatureEmbedding(nn.Module): + def __init__(self, d_model, embed_type='timeF', freq='h'): + super(TimeFeatureEmbedding, self).__init__() + + freq_map = {'h':4, 't':5, 's':6, 'm':1, 'a':1, 'w':2, 'd':3, 'b':3} + d_inp = freq_map[freq] + self.embed = nn.Linear(d_inp, d_model) + + def forward(self, x): + return self.embed(x) + +class DataEmbedding(nn.Module): + def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1): + super(DataEmbedding, self).__init__() + + self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model) + self.position_embedding = PositionalEmbedding(d_model=d_model) + self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type, freq=freq) if embed_type!='timeF' else TimeFeatureEmbedding(d_model=d_model, embed_type=embed_type, freq=freq) + + self.dropout = nn.Dropout(p=dropout) + + def forward(self, x, x_mark): + a = self.value_embedding(x) + b = self.position_embedding(x) + c = self.temporal_embedding(x_mark) + x = a + b + c + + return self.dropout(x) \ No newline at end of file diff --git a/model/Informer/encoder.py b/model/Informer/encoder.py new file mode 100644 index 0000000..7aeb877 --- /dev/null +++ b/model/Informer/encoder.py @@ -0,0 +1,98 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +class ConvLayer(nn.Module): + def __init__(self, c_in): + super(ConvLayer, self).__init__() + padding = 1 if torch.__version__>='1.5.0' else 2 + self.downConv = nn.Conv1d(in_channels=c_in, + out_channels=c_in, + kernel_size=3, + padding=padding, + padding_mode='circular') + self.norm = nn.BatchNorm1d(c_in) + self.activation = nn.ELU() + self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1) + + def forward(self, x): + x = self.downConv(x.permute(0, 2, 1)) + x = self.norm(x) + x = self.activation(x) + x = self.maxPool(x) + x = x.transpose(1,2) + return x + +class EncoderLayer(nn.Module): + def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"): + super(EncoderLayer, self).__init__() + d_ff = d_ff or 4*d_model + self.attention = attention + self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1) + self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1) + self.norm1 = nn.LayerNorm(d_model) + self.norm2 = nn.LayerNorm(d_model) + self.dropout = nn.Dropout(dropout) + self.activation = F.relu if activation == "relu" else F.gelu + + def forward(self, x, attn_mask=None): + # x [B, L, D] + # x = x + self.dropout(self.attention( + # x, x, x, + # attn_mask = attn_mask + # )) + new_x, attn = self.attention( + x, x, x, + attn_mask = attn_mask + ) + x = x + self.dropout(new_x) + + y = x = self.norm1(x) + y = self.dropout(self.activation(self.conv1(y.transpose(-1,1)))) + y = self.dropout(self.conv2(y).transpose(-1,1)) + + return self.norm2(x+y), attn + +class Encoder(nn.Module): + def __init__(self, attn_layers, conv_layers=None, norm_layer=None): + super(Encoder, self).__init__() + self.attn_layers = nn.ModuleList(attn_layers) + self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None + self.norm = norm_layer + + def forward(self, x, attn_mask=None): + # x [B, L, D] + attns = [] + if self.conv_layers is not None: + for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers): + x, attn = attn_layer(x, attn_mask=attn_mask) + x = conv_layer(x) + attns.append(attn) + x, attn = self.attn_layers[-1](x, attn_mask=attn_mask) + attns.append(attn) + else: + for attn_layer in self.attn_layers: + x, attn = attn_layer(x, attn_mask=attn_mask) + attns.append(attn) + + if self.norm is not None: + x = self.norm(x) + + return x, attns + +class EncoderStack(nn.Module): + def __init__(self, encoders, inp_lens): + super(EncoderStack, self).__init__() + self.encoders = nn.ModuleList(encoders) + self.inp_lens = inp_lens + + def forward(self, x, attn_mask=None): + # x [B, L, D] + x_stack = []; attns = [] + for i_len, encoder in zip(self.inp_lens, self.encoders): + inp_len = x.shape[1]//(2**i_len) + x_s, attn = encoder(x[:, -inp_len:, :]) + x_stack.append(x_s); attns.append(attn) + x_stack = torch.cat(x_stack, -2) + + return x_stack, attns \ No newline at end of file diff --git a/model/Informer/masking.py b/model/Informer/masking.py new file mode 100644 index 0000000..7fd479e --- /dev/null +++ b/model/Informer/masking.py @@ -0,0 +1,24 @@ +import torch + +class TriangularCausalMask(): + def __init__(self, B, L, device="cpu"): + mask_shape = [B, 1, L, L] + with torch.no_grad(): + self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device) + + @property + def mask(self): + return self._mask + +class ProbMask(): + def __init__(self, B, H, L, index, scores, device="cpu"): + _mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1) + _mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1]) + indicator = _mask_ex[torch.arange(B)[:, None, None], + torch.arange(H)[None, :, None], + index, :].to(device) + self._mask = indicator.view(scores.shape).to(device) + + @property + def mask(self): + return self._mask \ No newline at end of file diff --git a/model/Informer/model.py b/model/Informer/model.py new file mode 100644 index 0000000..fb7471f --- /dev/null +++ b/model/Informer/model.py @@ -0,0 +1,141 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from model.Informer.encoder import Encoder, EncoderLayer, ConvLayer, EncoderStack +from model.Informer.decoder import Decoder, DecoderLayer +from model.Informer.attn import FullAttention, ProbAttention, AttentionLayer +from model.Informer.embed import DataEmbedding + +class Informer(nn.Module): + def __init__(self, args): + super(Informer, self).__init__() + self.pred_len = args['pred_len'] + self.attn = args['attn'] + self.output_attention = args['output_attention'] + + # Encoding + self.enc_embedding = DataEmbedding(args['enc_in'], args['d_model'], args['embed'], args['freq'], args['dropout']) + self.dec_embedding = DataEmbedding(args['dec_in'], args['d_model'], args['embed'], args['freq'], args['dropout']) + # Attention + Attn = ProbAttention if args['attn']=='prob' else FullAttention + # Encoder + self.encoder = Encoder( + [ + EncoderLayer( + AttentionLayer(Attn(False, args['factor'], attention_dropout=args['dropout'], output_attention=args['output_attention']), + args['d_model'], args['n_heads'], mix=False), + args['d_model'], + args['d_ff'], + dropout=args['dropout'], + activation=args['activation'] + ) for l in range(args['e_layers']) + ], + [ + ConvLayer( + args['d_model'] + ) for l in range(args['e_layers']-1) + ] if args['distil'] else None, + norm_layer=torch.nn.LayerNorm(args['d_model']) + ) + # Decoder + self.decoder = Decoder( + [ + DecoderLayer( + AttentionLayer(Attn(True, args['factor'], attention_dropout=args['dropout'], output_attention=False), + args['d_model'], args['n_heads'], mix=args['mix']), + AttentionLayer(FullAttention(False, args['factor'], attention_dropout=args['dropout'], output_attention=False), + args['d_model'], args['n_heads'], mix=False), + args['d_model'], + args['d_ff'], + dropout=args['dropout'], + activation=args['activation'], + ) + for l in range(args['d_layers']) + ], + norm_layer=torch.nn.LayerNorm(args['d_model']) + ) + self.projection = nn.Linear(args['d_model'], args['c_out'], bias=True) + + def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, + enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None): + enc_out = self.enc_embedding(x_enc, x_mark_enc) + enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask) + + dec_out = self.dec_embedding(x_dec, x_mark_dec) + dec_out = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask) + dec_out = self.projection(dec_out) + + if self.output_attention: + return dec_out[:,-self.pred_len:,:], attns + else: + return dec_out[:,-self.pred_len:,:] # [B, L, D] + + +class InformerStack(nn.Module): + def __init__(self, args): + super(InformerStack, self).__init__() + self.pred_len = args['pred_len'] + self.attn = args['attn'] + self.output_attention = args['output_attention'] + + # Encoding + self.enc_embedding = DataEmbedding(args['enc_in'], args['d_model'], args['embed'], args['freq'], args['dropout']) + self.dec_embedding = DataEmbedding(args['dec_in'], args['d_model'], args['embed'], args['freq'], args['dropout']) + # Attention + Attn = ProbAttention if args['attn']=='prob' else FullAttention + # Encoder + + inp_lens = list(range(len(args['e_layers']))) # [0,1,2,...] you can customize here + encoders = [ + Encoder( + [ + EncoderLayer( + AttentionLayer(Attn(False, args['factor'], attention_dropout=args['dropout'], output_attention=args['output_attention']), + args['d_model'], args['n_heads'], mix=False), + args['d_model'], + args['d_ff'], + dropout=args['dropout'], + activation=args['activation'] + ) for l in range(el) + ], + [ + ConvLayer( + args['d_model'] + ) for l in range(el-1) + ] if args['distil'] else None, + norm_layer=torch.nn.LayerNorm(args['d_model']) + ) for el in args['e_layers']] + self.encoder = EncoderStack(encoders, inp_lens) + # Decoder + self.decoder = Decoder( + [ + DecoderLayer( + AttentionLayer(Attn(True, args['factor'], attention_dropout=args['dropout'], output_attention=False), + args['d_model'], args['n_heads'], mix=args['mix']), + AttentionLayer(FullAttention(False, args['factor'], attention_dropout=args['dropout'], output_attention=False), + args['d_model'], args['n_heads'], mix=False), + args['d_model'], + args['d_ff'], + dropout=args['dropout'], + activation=args['activation'], + ) + for l in range(args['d_layers']) + ], + norm_layer=torch.nn.LayerNorm(args['d_model']) + ) + self.projection = nn.Linear(args['d_model'], args['c_out'], bias=True) + + def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, + enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None): + enc_out = self.enc_embedding(x_enc, x_mark_enc) + enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask) + + dec_out = self.dec_embedding(x_dec, x_mark_dec) + dec_out = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask) + dec_out = self.projection(dec_out) + + if self.output_attention: + return dec_out[:,-self.pred_len:,:], attns + else: + return dec_out[:,-self.pred_len:,:] # [B, L, D] \ No newline at end of file diff --git a/model/Informer/model_config.json b/model/Informer/model_config.json new file mode 100644 index 0000000..3836cd0 --- /dev/null +++ b/model/Informer/model_config.json @@ -0,0 +1,7 @@ +[ + { + "name": "Informer", + "module": "model.Informer.model", + "entry": "Informer" + } +] \ No newline at end of file diff --git a/model/MTGNN/MTGNN.py b/model/MTGNN/MTGNN.py new file mode 100644 index 0000000..43d9b31 --- /dev/null +++ b/model/MTGNN/MTGNN.py @@ -0,0 +1,155 @@ +import torch.nn as nn +from model.MTGNN.layer import * + + +class gtnet(nn.Module): + def __init__(self, configs): + super(gtnet, self).__init__() + self.gcn_true = configs['gcn_true'] # 是否使用图卷积网络 + self.buildA_true = configs['buildA_true'] # 是否动态构建邻接矩阵 + self.num_nodes = configs['num_nodes'] # 节点数量 + self.device = configs['device'] # 设备(CPU/GPU) + self.dropout = configs['dropout'] # dropout率 + self.predefined_A = configs.get('predefined_A', None) # 预定义邻接矩阵 + self.static_feat = configs.get('static_feat', None) # 静态特征 + self.subgraph_size = configs['subgraph_size'] # 子图大小 + self.node_dim = configs['node_dim'] # 节点嵌入维度 + self.dilation_exponential = configs['dilation_exponential'] # 膨胀卷积指数 + self.conv_channels = configs['conv_channels'] # 卷积通道数 + self.residual_channels = configs['residual_channels'] # 残差通道数 + self.skip_channels = configs['skip_channels'] # 跳跃连接通道数 + self.end_channels = configs['end_channels'] # 输出层通道数 + self.seq_length = configs['seq_len'] # 输入序列长度 + self.in_dim = configs['in_dim'] # 输入特征维度 + self.out_len = configs['out_len'] # 输出序列长度 + self.out_dim = configs['out_dim'] # 输出预测维度 + self.layers = configs['layers'] # 模型层数 + self.propalpha = configs['propalpha'] # 图传播参数alpha + self.tanhalpha = configs['tanhalpha'] # tanh激活参数alpha + self.layer_norm_affline = configs['layer_norm_affline'] # 层归一化是否使用affine变换 + self.gcn_depth = configs['gcn_depth'] # 图卷积深度 + self.filter_convs = nn.ModuleList() # 卷积滤波器列表 + self.gate_convs = nn.ModuleList() # 门控卷积列表 + self.residual_convs = nn.ModuleList() # 残差卷积列表 + self.skip_convs = nn.ModuleList() # 跳跃连接卷积列表 + self.gconv1 = nn.ModuleList() # 第一层图卷积列表 + self.gconv2 = nn.ModuleList() # 第二层图卷积列表 + self.norm = nn.ModuleList() # 归一化层列表 + self.start_conv = nn.Conv2d(in_channels=self.in_dim, + out_channels=self.residual_channels, + kernel_size=(1, 1)) + self.gc = graph_constructor(self.num_nodes, self.subgraph_size, self.node_dim, self.device, alpha=self.tanhalpha, static_feat=self.static_feat) + + kernel_size = 7 + if self.dilation_exponential>1: + self.receptive_field = int(1+(kernel_size-1)*(self.dilation_exponential**self.layers-1)/(self.dilation_exponential-1)) + else: + self.receptive_field = self.layers*(kernel_size-1) + 1 + + for i in range(1): + if self.dilation_exponential>1: + rf_size_i = int(1 + i*(kernel_size-1)*(self.dilation_exponential**self.layers-1)/(self.dilation_exponential-1)) + else: + rf_size_i = i*self.layers*(kernel_size-1)+1 + new_dilation = 1 + for j in range(1,self.layers+1): + if self.dilation_exponential > 1: + rf_size_j = int(rf_size_i + (kernel_size-1)*(self.dilation_exponential**j-1)/(self.dilation_exponential-1)) + else: + rf_size_j = rf_size_i+j*(kernel_size-1) + + self.filter_convs.append(dilated_inception(self.residual_channels, self.conv_channels, dilation_factor=new_dilation)) + self.gate_convs.append(dilated_inception(self.residual_channels, self.conv_channels, dilation_factor=new_dilation)) + self.residual_convs.append(nn.Conv2d(in_channels=self.conv_channels, + out_channels=self.residual_channels, + kernel_size=(1, 1))) + if self.seq_length>self.receptive_field: + self.skip_convs.append(nn.Conv2d(in_channels=self.conv_channels, + out_channels=self.skip_channels, + kernel_size=(1, self.seq_length-rf_size_j+1))) + else: + self.skip_convs.append(nn.Conv2d(in_channels=self.conv_channels, + out_channels=self.skip_channels, + kernel_size=(1, self.receptive_field-rf_size_j+1))) + + if self.gcn_true: + self.gconv1.append(mixprop(self.conv_channels, self.residual_channels, self.gcn_depth, self.dropout, self.propalpha)) + self.gconv2.append(mixprop(self.conv_channels, self.residual_channels, self.gcn_depth, self.dropout, self.propalpha)) + + if self.seq_length>self.receptive_field: + self.norm.append(LayerNorm((self.residual_channels, self.num_nodes, self.seq_length - rf_size_j + 1),elementwise_affine=self.layer_norm_affline)) + else: + self.norm.append(LayerNorm((self.residual_channels, self.num_nodes, self.receptive_field - rf_size_j + 1),elementwise_affine=self.layer_norm_affline)) + + new_dilation *= self.dilation_exponential + + self.end_conv_1 = nn.Conv2d(in_channels=self.skip_channels, + out_channels=self.end_channels, + kernel_size=(1,1), + bias=True) + self.end_conv_2 = nn.Conv2d(in_channels=self.end_channels, + out_channels=self.out_len * self.out_dim, + kernel_size=(1,1), + bias=True) + if self.seq_length > self.receptive_field: + self.skip0 = nn.Conv2d(in_channels=self.in_dim, out_channels=self.skip_channels, kernel_size=(1, self.seq_length), bias=True) + self.skipE = nn.Conv2d(in_channels=self.residual_channels, out_channels=self.skip_channels, kernel_size=(1, self.seq_length-self.receptive_field+1), bias=True) + + + else: + self.skip0 = nn.Conv2d(in_channels=self.in_dim, out_channels=self.skip_channels, kernel_size=(1, self.receptive_field), bias=True) + self.skipE = nn.Conv2d(in_channels=self.residual_channels, out_channels=self.skip_channels, kernel_size=(1, 1), bias=True) + + self.idx = torch.arange(self.num_nodes).to(self.device) + + + def forward(self, input, idx=None): + input = input[..., :-2] # 去掉周期嵌入 + input = input.transpose(1, 3) + seq_len = input.size(3) + assert seq_len==self.seq_length, 'input sequence length not equal to preset sequence length' + + if self.seq_length [b,t,c,n] -> [b, t, n, c] + x = x.reshape(x.size(0), self.out_len, self.out_dim, self.num_nodes) + x = x.permute(0, 1, 3, 2) + return x \ No newline at end of file diff --git a/model/MTGNN/layer.py b/model/MTGNN/layer.py new file mode 100644 index 0000000..09b783d --- /dev/null +++ b/model/MTGNN/layer.py @@ -0,0 +1,328 @@ +from __future__ import division +import torch +import torch.nn as nn +from torch.nn import init +import numbers +import torch.nn.functional as F + + +class nconv(nn.Module): + def __init__(self): + super(nconv,self).__init__() + + def forward(self,x, A): + x = torch.einsum('ncwl,vw->ncvl',(x,A)) + return x.contiguous() + +class dy_nconv(nn.Module): + def __init__(self): + super(dy_nconv,self).__init__() + + def forward(self,x, A): + x = torch.einsum('ncvl,nvwl->ncwl',(x,A)) + return x.contiguous() + +class linear(nn.Module): + def __init__(self,c_in,c_out,bias=True): + super(linear,self).__init__() + self.mlp = torch.nn.Conv2d(c_in, c_out, kernel_size=(1, 1), padding=(0,0), stride=(1,1), bias=bias) + + def forward(self,x): + return self.mlp(x) + + +class prop(nn.Module): + def __init__(self,c_in,c_out,gdep,dropout,alpha): + super(prop, self).__init__() + self.nconv = nconv() + self.mlp = linear(c_in,c_out) + self.gdep = gdep + self.dropout = dropout + self.alpha = alpha + + def forward(self,x,adj): + adj = adj + torch.eye(adj.size(0)).to(x.device) + d = adj.sum(1) + h = x + dv = d + a = adj / dv.view(-1, 1) + for i in range(self.gdep): + h = self.alpha*x + (1-self.alpha)*self.nconv(h,a) + ho = self.mlp(h) + return ho + + +class mixprop(nn.Module): + def __init__(self,c_in,c_out,gdep,dropout,alpha): + super(mixprop, self).__init__() + self.nconv = nconv() + self.mlp = linear((gdep+1)*c_in,c_out) + self.gdep = gdep + self.dropout = dropout + self.alpha = alpha + + + def forward(self,x,adj): + adj = adj + torch.eye(adj.size(0)).to(x.device) + d = adj.sum(1) + h = x + out = [h] + a = adj / d.view(-1, 1) + for i in range(self.gdep): + h = self.alpha*x + (1-self.alpha)*self.nconv(h,a) + out.append(h) + ho = torch.cat(out,dim=1) + ho = self.mlp(ho) + return ho + +class dy_mixprop(nn.Module): + def __init__(self,c_in,c_out,gdep,dropout,alpha): + super(dy_mixprop, self).__init__() + self.nconv = dy_nconv() + self.mlp1 = linear((gdep+1)*c_in,c_out) + self.mlp2 = linear((gdep+1)*c_in,c_out) + + self.gdep = gdep + self.dropout = dropout + self.alpha = alpha + self.lin1 = linear(c_in,c_in) + self.lin2 = linear(c_in,c_in) + + + def forward(self,x): + #adj = adj + torch.eye(adj.size(0)).to(x.device) + #d = adj.sum(1) + x1 = torch.tanh(self.lin1(x)) + x2 = torch.tanh(self.lin2(x)) + adj = self.nconv(x1.transpose(2,1),x2) + adj0 = torch.softmax(adj, dim=2) + adj1 = torch.softmax(adj.transpose(2,1), dim=2) + + h = x + out = [h] + for i in range(self.gdep): + h = self.alpha*x + (1-self.alpha)*self.nconv(h,adj0) + out.append(h) + ho = torch.cat(out,dim=1) + ho1 = self.mlp1(ho) + + + h = x + out = [h] + for i in range(self.gdep): + h = self.alpha * x + (1 - self.alpha) * self.nconv(h, adj1) + out.append(h) + ho = torch.cat(out, dim=1) + ho2 = self.mlp2(ho) + + return ho1+ho2 + + + +class dilated_1D(nn.Module): + def __init__(self, cin, cout, dilation_factor=2): + super(dilated_1D, self).__init__() + self.tconv = nn.ModuleList() + self.kernel_set = [2,3,6,7] + self.tconv = nn.Conv2d(cin,cout,(1,7),dilation=(1,dilation_factor)) + + def forward(self,input): + x = self.tconv(input) + return x + +class dilated_inception(nn.Module): + def __init__(self, cin, cout, dilation_factor=2): + super(dilated_inception, self).__init__() + self.tconv = nn.ModuleList() + self.kernel_set = [2,3,6,7] + cout = int(cout/len(self.kernel_set)) + for kern in self.kernel_set: + self.tconv.append(nn.Conv2d(cin,cout,(1,kern),dilation=(1,dilation_factor))) + + def forward(self,input): + x = [] + for i in range(len(self.kernel_set)): + x.append(self.tconv[i](input)) + for i in range(len(self.kernel_set)): + x[i] = x[i][...,-x[-1].size(3):] + x = torch.cat(x,dim=1) + return x + + +class graph_constructor(nn.Module): + def __init__(self, nnodes, k, dim, device, alpha=3, static_feat=None): + super(graph_constructor, self).__init__() + self.nnodes = nnodes + if static_feat is not None: + xd = static_feat.shape[1] + self.lin1 = nn.Linear(xd, dim) + self.lin2 = nn.Linear(xd, dim) + else: + self.emb1 = nn.Embedding(nnodes, dim) + self.emb2 = nn.Embedding(nnodes, dim) + self.lin1 = nn.Linear(dim,dim) + self.lin2 = nn.Linear(dim,dim) + + self.device = device + self.k = k + self.dim = dim + self.alpha = alpha + self.static_feat = static_feat + + def forward(self, idx): + if self.static_feat is None: + nodevec1 = self.emb1(idx) + nodevec2 = self.emb2(idx) + else: + nodevec1 = self.static_feat[idx,:] + nodevec2 = nodevec1 + + nodevec1 = torch.tanh(self.alpha*self.lin1(nodevec1)) + nodevec2 = torch.tanh(self.alpha*self.lin2(nodevec2)) + + a = torch.mm(nodevec1, nodevec2.transpose(1,0))-torch.mm(nodevec2, nodevec1.transpose(1,0)) + adj = F.relu(torch.tanh(self.alpha*a)) + mask = torch.zeros(idx.size(0), idx.size(0)).to(self.device) + mask.fill_(float('0')) + s1,t1 = (adj + torch.rand_like(adj)*0.01).topk(self.k,1) + mask.scatter_(1,t1,s1.fill_(1)) + adj = adj*mask + return adj + + def fullA(self, idx): + if self.static_feat is None: + nodevec1 = self.emb1(idx) + nodevec2 = self.emb2(idx) + else: + nodevec1 = self.static_feat[idx,:] + nodevec2 = nodevec1 + + nodevec1 = torch.tanh(self.alpha*self.lin1(nodevec1)) + nodevec2 = torch.tanh(self.alpha*self.lin2(nodevec2)) + + a = torch.mm(nodevec1, nodevec2.transpose(1,0))-torch.mm(nodevec2, nodevec1.transpose(1,0)) + adj = F.relu(torch.tanh(self.alpha*a)) + return adj + +class graph_global(nn.Module): + def __init__(self, nnodes, k, dim, device, alpha=3, static_feat=None): + super(graph_global, self).__init__() + self.nnodes = nnodes + self.A = nn.Parameter(torch.randn(nnodes, nnodes).to(device), requires_grad=True).to(device) + + def forward(self, idx): + return F.relu(self.A) + + +class graph_undirected(nn.Module): + def __init__(self, nnodes, k, dim, device, alpha=3, static_feat=None): + super(graph_undirected, self).__init__() + self.nnodes = nnodes + if static_feat is not None: + xd = static_feat.shape[1] + self.lin1 = nn.Linear(xd, dim) + else: + self.emb1 = nn.Embedding(nnodes, dim) + self.lin1 = nn.Linear(dim,dim) + + self.device = device + self.k = k + self.dim = dim + self.alpha = alpha + self.static_feat = static_feat + + def forward(self, idx): + if self.static_feat is None: + nodevec1 = self.emb1(idx) + nodevec2 = self.emb1(idx) + else: + nodevec1 = self.static_feat[idx,:] + nodevec2 = nodevec1 + + nodevec1 = torch.tanh(self.alpha*self.lin1(nodevec1)) + nodevec2 = torch.tanh(self.alpha*self.lin1(nodevec2)) + + a = torch.mm(nodevec1, nodevec2.transpose(1,0)) + adj = F.relu(torch.tanh(self.alpha*a)) + mask = torch.zeros(idx.size(0), idx.size(0)).to(self.device) + mask.fill_(float('0')) + s1,t1 = adj.topk(self.k,1) + mask.scatter_(1,t1,s1.fill_(1)) + adj = adj*mask + return adj + + + +class graph_directed(nn.Module): + def __init__(self, nnodes, k, dim, device, alpha=3, static_feat=None): + super(graph_directed, self).__init__() + self.nnodes = nnodes + if static_feat is not None: + xd = static_feat.shape[1] + self.lin1 = nn.Linear(xd, dim) + self.lin2 = nn.Linear(xd, dim) + else: + self.emb1 = nn.Embedding(nnodes, dim) + self.emb2 = nn.Embedding(nnodes, dim) + self.lin1 = nn.Linear(dim,dim) + self.lin2 = nn.Linear(dim,dim) + + self.device = device + self.k = k + self.dim = dim + self.alpha = alpha + self.static_feat = static_feat + + def forward(self, idx): + if self.static_feat is None: + nodevec1 = self.emb1(idx) + nodevec2 = self.emb2(idx) + else: + nodevec1 = self.static_feat[idx,:] + nodevec2 = nodevec1 + + nodevec1 = torch.tanh(self.alpha*self.lin1(nodevec1)) + nodevec2 = torch.tanh(self.alpha*self.lin2(nodevec2)) + + a = torch.mm(nodevec1, nodevec2.transpose(1,0)) + adj = F.relu(torch.tanh(self.alpha*a)) + mask = torch.zeros(idx.size(0), idx.size(0)).to(self.device) + mask.fill_(float('0')) + s1,t1 = adj.topk(self.k,1) + mask.scatter_(1,t1,s1.fill_(1)) + adj = adj*mask + return adj + + +class LayerNorm(nn.Module): + __constants__ = ['normalized_shape', 'weight', 'bias', 'eps', 'elementwise_affine'] + def __init__(self, normalized_shape, eps=1e-5, elementwise_affine=True): + super(LayerNorm, self).__init__() + if isinstance(normalized_shape, numbers.Integral): + normalized_shape = (normalized_shape,) + self.normalized_shape = tuple(normalized_shape) + self.eps = eps + self.elementwise_affine = elementwise_affine + if self.elementwise_affine: + self.weight = nn.Parameter(torch.Tensor(*normalized_shape)) + self.bias = nn.Parameter(torch.Tensor(*normalized_shape)) + else: + self.register_parameter('weight', None) + self.register_parameter('bias', None) + self.reset_parameters() + + + def reset_parameters(self): + if self.elementwise_affine: + init.ones_(self.weight) + init.zeros_(self.bias) + + def forward(self, input, idx): + if self.elementwise_affine: + return F.layer_norm(input, tuple(input.shape[1:]), self.weight[:,idx,:], self.bias[:,idx,:], self.eps) + else: + return F.layer_norm(input, tuple(input.shape[1:]), self.weight, self.bias, self.eps) + + def extra_repr(self): + return '{normalized_shape}, eps={eps}, ' \ + 'elementwise_affine={elementwise_affine}'.format(**self.__dict__) \ No newline at end of file diff --git a/model/MTGNN/model_config.json b/model/MTGNN/model_config.json new file mode 100644 index 0000000..94aa32c --- /dev/null +++ b/model/MTGNN/model_config.json @@ -0,0 +1,7 @@ +[ + { + "name": "MTGNN", + "module": "model.MTGNN.MTGNN", + "entry": "gtnet" + } +] \ No newline at end of file diff --git a/model/MegaCRN/model_config.json b/model/MegaCRN/model_config.json new file mode 100644 index 0000000..e8c0599 --- /dev/null +++ b/model/MegaCRN/model_config.json @@ -0,0 +1,7 @@ +[ + { + "name": "MegaCRN", + "module": "model.MegaCRN.MegaCRNModel", + "entry": "MegaCRNModel" + } +] \ No newline at end of file diff --git a/model/NLT/model_config.json b/model/NLT/model_config.json new file mode 100644 index 0000000..a99a6b1 --- /dev/null +++ b/model/NLT/model_config.json @@ -0,0 +1,7 @@ +[ + { + "name": "NLT", + "module": "model.NLT.HierAttnLstm", + "entry": "HierAttnLstm" + } +] \ No newline at end of file diff --git a/model/PDG2SEQ/model_config.json b/model/PDG2SEQ/model_config.json new file mode 100644 index 0000000..783f3bf --- /dev/null +++ b/model/PDG2SEQ/model_config.json @@ -0,0 +1,7 @@ +[ + { + "name": "PDG2SEQ", + "module": "model.PDG2SEQ.PDG2Seqb", + "entry": "PDG2Seq" + } +] \ No newline at end of file diff --git a/model/PatchTST/PatchTST.py b/model/PatchTST/PatchTST.py new file mode 100644 index 0000000..4645c28 --- /dev/null +++ b/model/PatchTST/PatchTST.py @@ -0,0 +1,109 @@ +import torch +from torch import nn +from model.PatchTST.layers.Transformer import Encoder, EncoderLayer +from model.PatchTST.layers.SelfAttention import FullAttention, AttentionLayer +from model.PatchTST.layers.Embed import PatchEmbedding + +class Transpose(nn.Module): + def __init__(self, *dims, contiguous=False): + super().__init__() + self.dims, self.contiguous = dims, contiguous + def forward(self, x): + if self.contiguous: return x.transpose(*self.dims).contiguous() + else: return x.transpose(*self.dims) + + +class FlattenHead(nn.Module): + def __init__(self, n_vars, nf, target_window, head_dropout=0): + super().__init__() + self.n_vars = n_vars + self.flatten = nn.Flatten(start_dim=-2) + self.linear = nn.Linear(nf, target_window) + self.dropout = nn.Dropout(head_dropout) + + def forward(self, x): # x: [bs x nvars x d_model x patch_num] + x = self.flatten(x) + x = self.linear(x) + x = self.dropout(x) + return x + + +class Model(nn.Module): + """ + Paper link: https://arxiv.org/pdf/2211.14730.pdf + """ + + def __init__(self, configs): + """ + patch_len: int, patch len for patch_embedding + stride: int, stride for patch_embedding + """ + super().__init__() + self.seq_len = configs['seq_len'] + self.pred_len = configs['pred_len'] + self.patch_len = configs['patch_len'] + self.stride = configs['stride'] + padding = self.stride + + # patching and embedding + self.patch_embedding = PatchEmbedding( + configs['d_model'], self.patch_len, self.stride, padding, configs['dropout']) + + # Encoder + self.encoder = Encoder( + [ + EncoderLayer( + AttentionLayer( + FullAttention(False, attention_dropout=configs['dropout'], + output_attention=False), configs['d_model'], configs['n_heads']), + configs['d_model'], + configs['d_ff'], + dropout=configs['dropout'], + activation=configs['activation'] + ) for l in range(configs['e_layers']) + ], + norm_layer=nn.Sequential(Transpose(1,2), nn.BatchNorm1d(configs['d_model']), Transpose(1,2)) + ) + + # Prediction Head + self.head_nf = configs['d_model'] * \ + int((configs['seq_len'] - self.patch_len) / self.stride + 2) + self.head = FlattenHead(configs['enc_in'], self.head_nf, configs['pred_len'], + head_dropout=configs['dropout']) + + def forecast(self, x_enc): + # Normalization from Non-stationary Transformer + means = x_enc.mean(1, keepdim=True).detach() + x_enc = x_enc - means + stdev = torch.sqrt( + torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) + x_enc /= stdev + + # do patching and embedding + x_enc = x_enc.permute(0, 2, 1) + # u: [bs * nvars x patch_num x d_model] + enc_out, n_vars = self.patch_embedding(x_enc) + + # Encoder + # z: [bs * nvars x patch_num x d_model] + enc_out, attns = self.encoder(enc_out) + # z: [bs x nvars x patch_num x d_model] + enc_out = torch.reshape( + enc_out, (-1, n_vars, enc_out.shape[-2], enc_out.shape[-1])) + # z: [bs x nvars x d_model x patch_num] + enc_out = enc_out.permute(0, 1, 3, 2) + + # Decoder + dec_out = self.head(enc_out) # z: [bs x nvars x target_window] + dec_out = dec_out.permute(0, 2, 1) + + # De-Normalization from Non-stationary Transformer + dec_out = dec_out * \ + (stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1)) + dec_out = dec_out + \ + (means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1)) + return dec_out + + def forward(self, x_enc): + dec_out = self.forecast(x_enc) + return dec_out[:, -self.pred_len:, :] # [B, L, D] diff --git a/model/PatchTST/layers/Embed.py b/model/PatchTST/layers/Embed.py new file mode 100644 index 0000000..d38d093 --- /dev/null +++ b/model/PatchTST/layers/Embed.py @@ -0,0 +1,50 @@ +import torch +import torch.nn as nn +import math + +class PositionalEmbedding(nn.Module): + def __init__(self, d_model, max_len=5000): + super(PositionalEmbedding, self).__init__() + # Compute the positional encodings once in log space. + pe = torch.zeros(max_len, d_model).float() + pe.require_grad = False + + position = torch.arange(0, max_len).float().unsqueeze(1) + div_term = (torch.arange(0, d_model, 2).float() + * -(math.log(10000.0) / d_model)).exp() + + pe[:, 0::2] = torch.sin(position * div_term) + pe[:, 1::2] = torch.cos(position * div_term) + + pe = pe.unsqueeze(0) + self.register_buffer('pe', pe) + + def forward(self, x): + return self.pe[:, :x.size(1)] + +class PatchEmbedding(nn.Module): + def __init__(self, d_model, patch_len, stride, padding, dropout): + super(PatchEmbedding, self).__init__() + # Patching + self.patch_len = patch_len + self.stride = stride + self.padding_patch_layer = nn.ReplicationPad1d((0, padding)) + + # Backbone, Input encoding: projection of feature vectors onto a d-dim vector space + self.value_embedding = nn.Linear(patch_len, d_model, bias=False) + + # Positional embedding + self.position_embedding = PositionalEmbedding(d_model) + + # Residual dropout + self.dropout = nn.Dropout(dropout) + + def forward(self, x): + # do patching + n_vars = x.shape[1] + x = self.padding_patch_layer(x) + x = x.unfold(dimension=-1, size=self.patch_len, step=self.stride) + x = torch.reshape(x, (x.shape[0] * x.shape[1], x.shape[2], x.shape[3])) + # Input encoding + x = self.value_embedding(x) + self.position_embedding(x) + return self.dropout(x), n_vars \ No newline at end of file diff --git a/model/PatchTST/layers/SelfAttention.py b/model/PatchTST/layers/SelfAttention.py new file mode 100644 index 0000000..55b2493 --- /dev/null +++ b/model/PatchTST/layers/SelfAttention.py @@ -0,0 +1,80 @@ +import torch +import torch.nn as nn +import numpy as np +from math import sqrt + +class FullAttention(nn.Module): + def __init__(self, mask_flag=True, scale=None, attention_dropout=0.1, output_attention=False): + super(FullAttention, self).__init__() + self.scale = scale + self.mask_flag = mask_flag + self.output_attention = output_attention + self.dropout = nn.Dropout(attention_dropout) + + def forward(self, queries, keys, values, attn_mask, tau=None, delta=None): + B, L, H, E = queries.shape + _, S, _, D = values.shape + scale = self.scale or 1. / sqrt(E) + + scores = torch.einsum("blhe,bshe->bhls", queries, keys) + + if self.mask_flag: + if attn_mask is None: + attn_mask = TriangularCausalMask(B, L, device=queries.device) + + scores.masked_fill_(attn_mask.mask, -np.inf) + + A = self.dropout(torch.softmax(scale * scores, dim=-1)) + V = torch.einsum("bhls,bshd->blhd", A, values) + + if self.output_attention: + return V.contiguous(), A + else: + return V.contiguous(), None + +class AttentionLayer(nn.Module): + def __init__(self, attention, d_model, n_heads, d_keys=None, + d_values=None): + super(AttentionLayer, self).__init__() + + d_keys = d_keys or (d_model // n_heads) + d_values = d_values or (d_model // n_heads) + + self.inner_attention = attention + self.query_projection = nn.Linear(d_model, d_keys * n_heads) + self.key_projection = nn.Linear(d_model, d_keys * n_heads) + self.value_projection = nn.Linear(d_model, d_values * n_heads) + self.out_projection = nn.Linear(d_values * n_heads, d_model) + self.n_heads = n_heads + + def forward(self, queries, keys, values, attn_mask, tau=None, delta=None): + B, L, _ = queries.shape + _, S, _ = keys.shape + H = self.n_heads + + queries = self.query_projection(queries).view(B, L, H, -1) + keys = self.key_projection(keys).view(B, S, H, -1) + values = self.value_projection(values).view(B, S, H, -1) + + out, attn = self.inner_attention( + queries, + keys, + values, + attn_mask, + tau=tau, + delta=delta + ) + out = out.view(B, L, -1) + + return self.out_projection(out), attn + + +class TriangularCausalMask: + def __init__(self, B, L, device="cpu"): + mask_shape = [B, 1, L, L] + with torch.no_grad(): + self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device) + + @property + def mask(self): + return self._mask \ No newline at end of file diff --git a/model/PatchTST/layers/Transformer.py b/model/PatchTST/layers/Transformer.py new file mode 100644 index 0000000..6116325 --- /dev/null +++ b/model/PatchTST/layers/Transformer.py @@ -0,0 +1,57 @@ +import torch.nn as nn +import torch.nn.functional as F + +class EncoderLayer(nn.Module): + def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"): + super(EncoderLayer, self).__init__() + d_ff = d_ff or 4 * d_model + self.attention = attention + self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1) + self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1) + self.norm1 = nn.LayerNorm(d_model) + self.norm2 = nn.LayerNorm(d_model) + self.dropout = nn.Dropout(dropout) + self.activation = F.relu if activation == "relu" else F.gelu + + def forward(self, x, attn_mask=None, tau=None, delta=None): + new_x, attn = self.attention( + x, x, x, + attn_mask=attn_mask, + tau=tau, delta=delta + ) + x = x + self.dropout(new_x) + + y = x = self.norm1(x) + y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) + y = self.dropout(self.conv2(y).transpose(-1, 1)) + + return self.norm2(x + y), attn + + +class Encoder(nn.Module): + def __init__(self, attn_layers, conv_layers=None, norm_layer=None): + super(Encoder, self).__init__() + self.attn_layers = nn.ModuleList(attn_layers) + self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None + self.norm = norm_layer + + def forward(self, x, attn_mask=None, tau=None, delta=None): + # x [B, L, D] + attns = [] + if self.conv_layers is not None: + for i, (attn_layer, conv_layer) in enumerate(zip(self.attn_layers, self.conv_layers)): + delta = delta if i == 0 else None + x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta) + x = conv_layer(x) + attns.append(attn) + x, attn = self.attn_layers[-1](x, tau=tau, delta=None) + attns.append(attn) + else: + for attn_layer in self.attn_layers: + x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta) + attns.append(attn) + + if self.norm is not None: + x = self.norm(x) + + return x, attns \ No newline at end of file diff --git a/model/PatchTST/model_config.json b/model/PatchTST/model_config.json new file mode 100644 index 0000000..d613fbb --- /dev/null +++ b/model/PatchTST/model_config.json @@ -0,0 +1,7 @@ +[ + { + "name": "PatchTST", + "module": "model.PatchTST.PatchTST", + "entry": "Model" + } +] \ No newline at end of file diff --git a/model/README.md b/model/README.md new file mode 100644 index 0000000..24dd3ca --- /dev/null +++ b/model/README.md @@ -0,0 +1,109 @@ +# 模型注册说明 + +## 概述 + +本项目使用基于配置文件的模型注册机制,每个模型目录下的 `model_config.json` 文件用于注册该目录下的模型。 + +## model_config.json 格式 + +### 基本格式 + +每个 `model_config.json` 文件是一个 JSON 数组,包含一个或多个模型配置对象: + +```json +[ + { + "name": "模型名称", + "module": "模型模块路径", + "entry": "模型入口点" + } +] +``` + +### 字段说明 + +- **name**: 模型的唯一标识符,用于在配置文件中选择模型 +- **module**: 模型所在的模块路径,使用 Python 导入格式 +- **entry**: 模型的入口点,可以是类名或函数名 + +### 示例 + +#### 1. 单个模型 + +```json +[ + { + "name": "DDGCRN", + "module": "model.DDGCRN.DDGCRN", + "entry": "DDGCRN" + } +] +``` + +#### 2. 多个模型(同一目录下的不同版本) + +```json +[ + { + "name": "ASTRA", + "module": "model.ASTRA.astra", + "entry": "ASTRA" + }, + { + "name": "ASTRA_v2", + "module": "model.ASTRA.astrav2", + "entry": "ASTRA" + }, + { + "name": "ASTRA_v3", + "module": "model.ASTRA.astrav3", + "entry": "ASTRA" + } +] +``` + +#### 3. 函数模型 + +```json +[ + { + "name": "STGNCDE", + "module": "model.STGNCDE.Make_model", + "entry": "make_model" + } +] +``` + +## 添加新模型 + +1. 在 `model` 目录下创建模型目录 +2. 在该目录下实现模型代码 +3. 创建 `model_config.json` 文件,配置模型信息 +4. 在配置文件中使用模型名称选择模型 + +## 注意事项 + +1. 模型名称必须唯一,不允许重复 +2. 模块路径必须是正确的 Python 导入路径 +3. 入口点必须是模块中存在的类或函数 +4. 配置文件必须是有效的 JSON 格式 +5. 每个模型目录下只能有一个 `model_config.json` 文件 + +## 模型选择 + +在配置文件中,通过 `basic.model` 字段指定要使用的模型名称: + +```json +{ + "basic": { + "model": "ASTRA" + }, + "model": { + // 模型特定配置 + } +} +``` + +## 冲突检测 + +系统会自动检测模型名冲突,如有冲突会抛出 `AssertionError` 并显示冲突信息。 diff --git a/model/REPST/model_config.json b/model/REPST/model_config.json new file mode 100644 index 0000000..5bdfce6 --- /dev/null +++ b/model/REPST/model_config.json @@ -0,0 +1,7 @@ +[ + { + "name": "REPST", + "module": "model.REPST.repst", + "entry": "repst" + } +] \ No newline at end of file diff --git a/model/REPST/repst.py b/model/REPST/repst.py index 5b709a4..9afbda1 100644 --- a/model/REPST/repst.py +++ b/model/REPST/repst.py @@ -19,7 +19,7 @@ class repst(nn.Module): self.gpt_layers = configs['gpt_layers'] self.d_ff = configs['d_ff'] self.gpt_path = configs['gpt_path'] - self.output_dim = configs.get('output_dim', 1) + self.output_dim = configs['output_dim'] self.word_choice = GumbelSoftmax(configs['word_num']) diff --git a/model/STAEFormer/STAEFormer.py b/model/STAEFormer/STAEFormer.py index 63fdb01..91b8188 100755 --- a/model/STAEFormer/STAEFormer.py +++ b/model/STAEFormer/STAEFormer.py @@ -187,17 +187,19 @@ class STAEformer(nn.Module): batch_size = x.shape[0] if self.tod_embedding_dim > 0: - tod = x[..., 1] + tod = x[..., -2] if self.dow_embedding_dim > 0: - dow = x[..., 2] - x = x[..., 0:1] + dow = x[..., -1] + x = x[..., 0:self.input_dim] x = self.input_proj(x) # (batch_size, in_steps, num_nodes, input_embedding_dim) features = [x] if self.tod_embedding_dim > 0: - tod_emb = self.tod_embedding( - (tod * self.steps_per_day).long() - ) # (batch_size, in_steps, num_nodes, tod_embedding_dim) + # 确保索引在有效范围内 + tod_index = (tod * self.steps_per_day).long() + # 防止索引越界 + tod_index = torch.clamp(tod_index, 0, self.steps_per_day - 1) + tod_emb = self.tod_embedding(tod_index) # (batch_size, in_steps, num_nodes, tod_embedding_dim) features.append(tod_emb) if self.dow_embedding_dim > 0: dow_emb = self.dow_embedding( diff --git a/model/STAEFormer/model_config.json b/model/STAEFormer/model_config.json new file mode 100644 index 0000000..8823a88 --- /dev/null +++ b/model/STAEFormer/model_config.json @@ -0,0 +1,7 @@ +[ + { + "name": "STAEFormer", + "module": "model.STAEFormer.STAEFormer", + "entry": "STAEformer" + } +] \ No newline at end of file diff --git a/model/STAWnet/model_config.json b/model/STAWnet/model_config.json new file mode 100644 index 0000000..0e83de9 --- /dev/null +++ b/model/STAWnet/model_config.json @@ -0,0 +1,7 @@ +[ + { + "name": "STAWnet", + "module": "model.STAWnet.STAWnet", + "entry": "STAWnet" + } +] \ No newline at end of file diff --git a/model/STFGNN/model_config.json b/model/STFGNN/model_config.json new file mode 100644 index 0000000..ef5bd7e --- /dev/null +++ b/model/STFGNN/model_config.json @@ -0,0 +1,7 @@ +[ + { + "name": "STFGNN", + "module": "model.STFGNN.STFGNN", + "entry": "STFGNN" + } +] \ No newline at end of file diff --git a/model/STGCN/model_config.json b/model/STGCN/model_config.json new file mode 100644 index 0000000..af5885a --- /dev/null +++ b/model/STGCN/model_config.json @@ -0,0 +1,7 @@ +[ + { + "name": "STGCN", + "module": "model.STGCN.models", + "entry": "STGCNChebGraphConv" + } +] \ No newline at end of file diff --git a/model/STGNCDE/model_config.json b/model/STGNCDE/model_config.json new file mode 100644 index 0000000..3ec8745 --- /dev/null +++ b/model/STGNCDE/model_config.json @@ -0,0 +1,7 @@ +[ + { + "name": "STGNCDE", + "module": "model.STGNCDE.Make_model", + "entry": "make_model" + } +] \ No newline at end of file diff --git a/model/STGNRDE/model_config.json b/model/STGNRDE/model_config.json new file mode 100644 index 0000000..ec655a8 --- /dev/null +++ b/model/STGNRDE/model_config.json @@ -0,0 +1,7 @@ +[ + { + "name": "STGNRDE", + "module": "model.STGNRDE.Make_model", + "entry": "make_model" + } +] \ No newline at end of file diff --git a/model/STGODE/model_config.json b/model/STGODE/model_config.json new file mode 100644 index 0000000..d6a03e2 --- /dev/null +++ b/model/STGODE/model_config.json @@ -0,0 +1,7 @@ +[ + { + "name": "STGODE", + "module": "model.STGODE.STGODE", + "entry": "ODEGCN" + } +] \ No newline at end of file diff --git a/model/STID/model_config.json b/model/STID/model_config.json new file mode 100644 index 0000000..1a39d87 --- /dev/null +++ b/model/STID/model_config.json @@ -0,0 +1,7 @@ +[ + { + "name": "STID", + "module": "model.STID.STID", + "entry": "STID" + } +] \ No newline at end of file diff --git a/model/STIDGCN/model_config.json b/model/STIDGCN/model_config.json new file mode 100644 index 0000000..a986383 --- /dev/null +++ b/model/STIDGCN/model_config.json @@ -0,0 +1,7 @@ +[ + { + "name": "STIDGCN", + "module": "model.STIDGCN.STIDGCN", + "entry": "STIDGCN" + } +] \ No newline at end of file diff --git a/model/STMLP/model_config.json b/model/STMLP/model_config.json new file mode 100644 index 0000000..e7cfb08 --- /dev/null +++ b/model/STMLP/model_config.json @@ -0,0 +1,7 @@ +[ + { + "name": "STMLP", + "module": "model.STMLP.STMLP", + "entry": "STMLP" + } +] \ No newline at end of file diff --git a/model/STNorm/STNorm.py b/model/STNorm/STNorm.py new file mode 100644 index 0000000..11a72e4 --- /dev/null +++ b/model/STNorm/STNorm.py @@ -0,0 +1,148 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + + +# ========================= +# Spatial Normalization +# ========================= +class SNorm(nn.Module): + def __init__(self, channels, eps=1e-5): + super().__init__() + self.gamma = nn.Parameter(torch.ones(1, channels, 1, 1)) + self.beta = nn.Parameter(torch.zeros(1, channels, 1, 1)) + self.eps = eps + + def forward(self, x): + # normalize over node dimension + mean = x.mean(dim=2, keepdim=True) + var = x.var(dim=2, keepdim=True, unbiased=False) + x = (x - mean) / torch.sqrt(var + self.eps) + return x * self.gamma + self.beta + + +# ========================= +# Temporal Normalization +# ========================= +class TNorm(nn.Module): + def __init__(self, num_nodes, channels, momentum=0.1, eps=1e-5): + super().__init__() + self.gamma = nn.Parameter(torch.ones(1, channels, num_nodes, 1)) + self.beta = nn.Parameter(torch.zeros(1, channels, num_nodes, 1)) + self.register_buffer("running_mean", torch.zeros(1, channels, num_nodes, 1)) + self.register_buffer("running_var", torch.ones(1, channels, num_nodes, 1)) + self.momentum = momentum + self.eps = eps + + def forward(self, x): + if self.training: + mean = x.mean(dim=(0, 3), keepdim=True) + var = x.var(dim=(0, 3), keepdim=True, unbiased=False) + # in-place update (VERY IMPORTANT) + self.running_mean.mul_(1 - self.momentum).add_(self.momentum * mean) + self.running_var.mul_(1 - self.momentum).add_(self.momentum * var) + else: + mean = self.running_mean + var = self.running_var + + x = (x - mean) / torch.sqrt(var + self.eps) + return x * self.gamma + self.beta + + +# ========================= +# STNorm WaveNet +# ========================= +class STNormNet(nn.Module): + def __init__(self, args): + super().__init__() + self.blocks = args["blocks"] + self.layers = args["layers"] + self.dropout = args["dropout"] + self.num_nodes = args["num_nodes"] + + self.in_dim = args["in_dim"] + self.out_dim = args["out_dim"] + self.channels = args["channels"] + self.kernel_size = args["kernel_size"] + + self.use_snorm = args["snorm_bool"] + self.use_tnorm = args["tnorm_bool"] + + self.start_conv = nn.Conv2d(self.in_dim, self.channels, kernel_size=(1, 1)) + + self.filter_convs = nn.ModuleList() + self.gate_convs = nn.ModuleList() + self.residual_convs = nn.ModuleList() + self.skip_convs = nn.ModuleList() + + self.snorms = nn.ModuleList() + self.tnorms = nn.ModuleList() + + self.receptive_field = 1 + + for b in range(self.blocks): + dilation = 1 + rf_add = self.kernel_size - 1 + for _ in range(self.layers): + if self.use_snorm: + self.snorms.append(SNorm(self.channels)) + if self.use_tnorm: + self.tnorms.append(TNorm(self.num_nodes, self.channels)) + + self.filter_convs.append(nn.Conv2d(self.channels, self.channels, (1, self.kernel_size), dilation=dilation)) + self.gate_convs.append(nn.Conv2d(self.channels, self.channels, (1, self.kernel_size), dilation=dilation)) + self.residual_convs.append(nn.Conv2d(self.channels, self.channels, (1, 1))) + self.skip_convs.append(nn.Conv2d(self.channels, self.channels, (1, 1))) + + self.receptive_field += rf_add + rf_add *= 2 + dilation *= 2 + + self.end_conv_1 = nn.Conv2d(self.channels, self.channels, (1, 1)) + self.end_conv_2 = nn.Conv2d(self.channels, self.out_dim, (1, 1)) + + def forward(self, input): + # (B, T, N, F) -> (B, F, N, T) + x = input[..., :self.in_dim].transpose(1, 3) + + # pad to receptive field + if x.size(3) < self.receptive_field: + x = F.pad(x, (self.receptive_field - x.size(3), 0, 0, 0)) + + x = self.start_conv(x) + skip = None + + norm_idx = 0 + for i in range(self.blocks * self.layers): + residual = x + # ---------- STNorm (safe fusion) ---------- + if self.use_tnorm: + x = x + 0.5 * self.tnorms[norm_idx](x) + if self.use_snorm: + x = x + 0.5 * self.snorms[norm_idx](x) + norm_idx += 1 + # ---------- Dilated Conv ---------- + filter_out = torch.tanh(self.filter_convs[i](x)) + gate_out = torch.sigmoid(self.gate_convs[i](x)) + x = filter_out * gate_out + # ---------- Skip (TIME SAFE) ---------- + s = self.skip_convs[i](x) + if skip is None: + skip = s + else: + skip = skip[..., -s.size(3) :] + s + # ---------- Residual (TIME SAFE) ---------- + x = self.residual_convs[i](x) + x = x + residual[..., -x.size(3) :] + + x = F.relu(skip) + x = F.relu(self.end_conv_1(x)) + x = self.end_conv_2(x) # [B, 1, N, T] + T_out = x.size(3) + T_target = input.size(1) + + if T_out < T_target: + x = F.pad(x, (T_target - T_out, 0, 0, 0)) # left pad + + x = x.transpose(1, 3) + return x diff --git a/model/STNorm/model_config.json b/model/STNorm/model_config.json new file mode 100644 index 0000000..f860d07 --- /dev/null +++ b/model/STNorm/model_config.json @@ -0,0 +1,7 @@ +[ + { + "name": "STNorm", + "module": "model.STNorm.STNorm", + "entry": "STNormNet" + } +] \ No newline at end of file diff --git a/model/STSGCN/model_config.json b/model/STSGCN/model_config.json new file mode 100644 index 0000000..a5e2b4d --- /dev/null +++ b/model/STSGCN/model_config.json @@ -0,0 +1,7 @@ +[ + { + "name": "STSGCN", + "module": "model.STSGCN.STSGCN", + "entry": "STSGCN" + } +] \ No newline at end of file diff --git a/model/ST_SSL/model_config.json b/model/ST_SSL/model_config.json new file mode 100644 index 0000000..8bbfb74 --- /dev/null +++ b/model/ST_SSL/model_config.json @@ -0,0 +1,7 @@ +[ + { + "name": "ST_SSL", + "module": "model.ST_SSL.ST_SSL", + "entry": "STSSLModel" + } +] \ No newline at end of file diff --git a/model/TCN/model_config.json b/model/TCN/model_config.json new file mode 100644 index 0000000..d083150 --- /dev/null +++ b/model/TCN/model_config.json @@ -0,0 +1,7 @@ +[ + { + "name": "TCN", + "module": "model.TCN.TCN", + "entry": "TemporalConvNet" + } +] \ No newline at end of file diff --git a/model/TWDGCN/TWDGCN.py b/model/TWDGCN/TWDGCN.py index b360b57..bdf1186 100755 --- a/model/TWDGCN/TWDGCN.py +++ b/model/TWDGCN/TWDGCN.py @@ -89,7 +89,6 @@ class TWDGCN(nn.Module): self.num_layers = args["num_layers"] self.use_day = args["use_day"] self.use_week = args["use_week"] - self.default_graph = args["default_graph"] self.node_embeddings1 = nn.Parameter( torch.randn(self.num_node, args["embed_dim"]), requires_grad=True @@ -154,17 +153,17 @@ class TWDGCN(nn.Module): node_embedding1 = self.node_embeddings1 if self.use_day: - t_i_d_data = source[..., 1] + t_i_d_data = source[..., -2] T_i_D_emb = self.T_i_D_emb[(t_i_d_data * 288).long()] node_embedding1 = node_embedding1 * T_i_D_emb if self.use_week: - d_i_w_data = source[..., 2] + d_i_w_data = source[..., -1] D_i_W_emb = self.D_i_W_emb[d_i_w_data.long()] node_embedding1 = node_embedding1 * D_i_W_emb node_embeddings = [node_embedding1, self.node_embeddings1] - source = source[..., 0].unsqueeze(-1) + source = source[..., 0:self.input_dim] init_state1 = self.encoder1.init_hidden(source.shape[0]) output, _ = self.encoder1(source, init_state1, node_embeddings) diff --git a/model/TWDGCN/model_config.json b/model/TWDGCN/model_config.json new file mode 100644 index 0000000..92f3167 --- /dev/null +++ b/model/TWDGCN/model_config.json @@ -0,0 +1,7 @@ +[ + { + "name": "TWDGCN", + "module": "model.TWDGCN.TWDGCN", + "entry": "TWDGCN" + } +] \ No newline at end of file diff --git a/model/iTransformer/iTransformer.py b/model/iTransformer/iTransformer.py new file mode 100644 index 0000000..3cc0818 --- /dev/null +++ b/model/iTransformer/iTransformer.py @@ -0,0 +1,43 @@ +import torch +import torch.nn as nn +from model.iTransformer.layers.Transformer_EncDec import Encoder, EncoderLayer +from model.iTransformer.layers.SelfAttn import FullAttention, AttentionLayer +from model.iTransformer.layers.Embed import DataEmbedding_inverted + +class iTransformer(nn.Module): + """ + Paper link: https://arxiv.org/abs/2310.06625 + """ + + def __init__(self, args): + super(iTransformer, self).__init__() + self.pred_len = args['pred_len'] + # Embedding + self.enc_embedding = DataEmbedding_inverted(args['seq_len'], args['d_model'], args['dropout']) + # Encoder-only architecture + self.encoder = Encoder( + [ + EncoderLayer( + AttentionLayer( + FullAttention(False, attention_dropout=args['dropout'], + output_attention=args['output_attention']), args['d_model'], args['n_heads']), + args['d_model'], + args['d_ff'], + dropout=args['dropout'], + activation=args['activation'] + ) for l in range(args['e_layers']) + ], + norm_layer=torch.nn.LayerNorm(args['d_model']) + ) + self.projector = nn.Linear(args['d_model'], args['pred_len'], bias=True) + + def forecast(self, x_enc, x_mark_enc): + _, _, N = x_enc.shape # B, T, C + enc_out = self.enc_embedding(x_enc, x_mark_enc) + enc_out, attns = self.encoder(enc_out, attn_mask=None) + dec_out = self.projector(enc_out).permute(0, 2, 1)[:, :, :N] # filter the covariates + return dec_out, attns + + def forward(self, x_enc, x_mark_enc=None): + dec_out, attns = self.forecast(x_enc, x_mark_enc) + return dec_out[:, -self.pred_len:, :] # [B, T, C] \ No newline at end of file diff --git a/model/iTransformer/layers/Embed.py b/model/iTransformer/layers/Embed.py new file mode 100644 index 0000000..8e7209b --- /dev/null +++ b/model/iTransformer/layers/Embed.py @@ -0,0 +1,19 @@ +import torch +import torch.nn as nn + +class DataEmbedding_inverted(nn.Module): + def __init__(self, c_in, d_model, dropout=0.1): + super(DataEmbedding_inverted, self).__init__() + self.value_embedding = nn.Linear(c_in, d_model) + self.dropout = nn.Dropout(p=dropout) + + def forward(self, x, x_mark): + x = x.permute(0, 2, 1) + # x: [Batch Variate Time] + if x_mark is None: + x = self.value_embedding(x) + else: + # the potential to take covariates (e.g. timestamps) as tokens + x = self.value_embedding(torch.cat([x, x_mark.permute(0, 2, 1)], 1)) + # x: [Batch Variate d_model] + return self.dropout(x) \ No newline at end of file diff --git a/model/iTransformer/layers/SelfAttn.py b/model/iTransformer/layers/SelfAttn.py new file mode 100644 index 0000000..e5670e1 --- /dev/null +++ b/model/iTransformer/layers/SelfAttn.py @@ -0,0 +1,82 @@ +import torch +import torch.nn as nn +import numpy as np +from math import sqrt + + +class FullAttention(nn.Module): + def __init__(self, mask_flag=True, scale=None, attention_dropout=0.1, output_attention=False): + super(FullAttention, self).__init__() + self.scale = scale + self.mask_flag = mask_flag + self.output_attention = output_attention + self.dropout = nn.Dropout(attention_dropout) + + def forward(self, queries, keys, values, attn_mask, tau=None, delta=None): + B, L, H, E = queries.shape + _, S, _, D = values.shape + scale = self.scale or 1. / sqrt(E) + + scores = torch.einsum("blhe,bshe->bhls", queries, keys) + + if self.mask_flag: + if attn_mask is None: + attn_mask = TriangularCausalMask(B, L, device=queries.device) + + scores.masked_fill_(attn_mask.mask, -np.inf) + + A = self.dropout(torch.softmax(scale * scores, dim=-1)) + V = torch.einsum("bhls,bshd->blhd", A, values) + + if self.output_attention: + return V.contiguous(), A + else: + return V.contiguous(), None + +class AttentionLayer(nn.Module): + def __init__(self, attention, d_model, n_heads, d_keys=None, + d_values=None): + super(AttentionLayer, self).__init__() + + d_keys = d_keys or (d_model // n_heads) + d_values = d_values or (d_model // n_heads) + + self.inner_attention = attention + self.query_projection = nn.Linear(d_model, d_keys * n_heads) + self.key_projection = nn.Linear(d_model, d_keys * n_heads) + self.value_projection = nn.Linear(d_model, d_values * n_heads) + self.out_projection = nn.Linear(d_values * n_heads, d_model) + self.n_heads = n_heads + + def forward(self, queries, keys, values, attn_mask, tau=None, delta=None): + B, L, _ = queries.shape + _, S, _ = keys.shape + H = self.n_heads + + queries = self.query_projection(queries).view(B, L, H, -1) + keys = self.key_projection(keys).view(B, S, H, -1) + values = self.value_projection(values).view(B, S, H, -1) + + out, attn = self.inner_attention( + queries, + keys, + values, + attn_mask, + tau=tau, + delta=delta + ) + out = out.view(B, L, -1) + + return self.out_projection(out), attn + + +class TriangularCausalMask: + def __init__(self, B, L, device="cpu"): + mask_shape = [B, 1, L, L] + with torch.no_grad(): + self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device) + + @property + def mask(self): + return self._mask + diff --git a/model/iTransformer/layers/Transformer_EncDec.py b/model/iTransformer/layers/Transformer_EncDec.py new file mode 100644 index 0000000..6116325 --- /dev/null +++ b/model/iTransformer/layers/Transformer_EncDec.py @@ -0,0 +1,57 @@ +import torch.nn as nn +import torch.nn.functional as F + +class EncoderLayer(nn.Module): + def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"): + super(EncoderLayer, self).__init__() + d_ff = d_ff or 4 * d_model + self.attention = attention + self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1) + self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1) + self.norm1 = nn.LayerNorm(d_model) + self.norm2 = nn.LayerNorm(d_model) + self.dropout = nn.Dropout(dropout) + self.activation = F.relu if activation == "relu" else F.gelu + + def forward(self, x, attn_mask=None, tau=None, delta=None): + new_x, attn = self.attention( + x, x, x, + attn_mask=attn_mask, + tau=tau, delta=delta + ) + x = x + self.dropout(new_x) + + y = x = self.norm1(x) + y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) + y = self.dropout(self.conv2(y).transpose(-1, 1)) + + return self.norm2(x + y), attn + + +class Encoder(nn.Module): + def __init__(self, attn_layers, conv_layers=None, norm_layer=None): + super(Encoder, self).__init__() + self.attn_layers = nn.ModuleList(attn_layers) + self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None + self.norm = norm_layer + + def forward(self, x, attn_mask=None, tau=None, delta=None): + # x [B, L, D] + attns = [] + if self.conv_layers is not None: + for i, (attn_layer, conv_layer) in enumerate(zip(self.attn_layers, self.conv_layers)): + delta = delta if i == 0 else None + x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta) + x = conv_layer(x) + attns.append(attn) + x, attn = self.attn_layers[-1](x, tau=tau, delta=None) + attns.append(attn) + else: + for attn_layer in self.attn_layers: + x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta) + attns.append(attn) + + if self.norm is not None: + x = self.norm(x) + + return x, attns \ No newline at end of file diff --git a/model/iTransformer/model_config.json b/model/iTransformer/model_config.json new file mode 100644 index 0000000..79c8db5 --- /dev/null +++ b/model/iTransformer/model_config.json @@ -0,0 +1,7 @@ +[ + { + "name": "iTransformer", + "module": "model.iTransformer.iTransformer", + "entry": "iTransformer" + } +] \ No newline at end of file diff --git a/model/model_selector.py b/model/model_selector.py index c669d82..9afd0ff 100755 --- a/model/model_selector.py +++ b/model/model_selector.py @@ -1,88 +1,57 @@ -from model.DDGCRN.DDGCRN import DDGCRN -from model.TWDGCN.TWDGCN import TWDGCN -from model.AGCRN.AGCRN import AGCRN -from model.NLT.HierAttnLstm import HierAttnLstm -from model.STGNCDE.Make_model import make_model -from model.DSANET.DSANET import DSANet -from model.STGCN.models import STGCNChebGraphConv -from model.DCRNN.dcrnn_model import DCRNNModel -from model.ARIMA.ARIMA import ARIMA -from model.TCN.TCN import TemporalConvNet -from model.GWN.GraphWaveNet import gwnet -from model.STFGNN.STFGNN import STFGNN -from model.STSGCN.STSGCN import STSGCN -from model.STGODE.STGODE import ODEGCN -from model.PDG2SEQ.PDG2Seqb import PDG2Seq -from model.STMLP.STMLP import STMLP -from model.STIDGCN.STIDGCN import STIDGCN -from model.STID.STID import STID -from model.STAEFormer.STAEFormer import STAEformer -from model.EXP.EXP32 import EXP as EXP -from model.MegaCRN.MegaCRNModel import MegaCRNModel -from model.ST_SSL.ST_SSL import STSSLModel -from model.STGNRDE.Make_model import make_model as make_nrde_model -from model.STAWnet.STAWnet import STAWnet -from model.REPST.repst import repst as REPST -from model.AEPSA.aepsa import AEPSA as AEPSA -from model.AEPSA.aepsav2 import AEPSA as AEPSAv2 +import os +import json +import importlib +import sys +from pathlib import Path +class ModelRegistry: + def __init__(self): + self.models = {} + self.model_configs = {} + self.model_dir = Path(__file__).parent + self._load_model_configs() + + def _load_model_configs(self): + """加载所有model_config.json文件""" + # 直接遍历所有model_config.json文件 + for config_path in self.model_dir.rglob("model_config.json"): + # 读取配置文件 + with open(config_path, 'r') as f: + configs = json.load(f) + + # 处理每个模型配置 + for config in configs: + model_name = config["name"] + # 检查模型名冲突 + assert model_name not in self.model_configs, f"模型名冲突: {model_name} 已存在,冲突文件: {config_path}" + self.model_configs[model_name] = config + + def _load_model(self, model_name): + """动态加载模型""" + if model_name not in self.model_configs: + raise ValueError(f"模型 {model_name} 未注册") + + config = self.model_configs[model_name] + module = importlib.import_module(config["module"]) + model_cls = getattr(module, config["entry"]) + self.models[model_name] = model_cls + + def get_model(self, model_name): + """获取模型类或函数""" + if model_name not in self.models: + self._load_model(model_name) + return self.models[model_name] +# 初始化模型注册表 +model_registry = ModelRegistry() def model_selector(config): model_name = config["basic"]["model"] model_config = config["model"] - match model_name: - case "DDGCRN": - return DDGCRN(model_config) - case "TWDGCN": - return TWDGCN(model_config) - case "AGCRN": - return AGCRN(model_config) - case "NLT": - return HierAttnLstm(model_config) - case "STGNCDE": - return make_model(model_config) - case "DSANET": - return DSANet(model_config) - case "STGCN": - return STGCNChebGraphConv(model_config) - case "DCRNN": - return DCRNNModel(model_config) - case "ARIMA": - return ARIMA(model_config) - case "TCN": - return TemporalConvNet(model_config) - case "GWN": - return gwnet(model_config) - case "STFGNN": - return STFGNN(model_config) - case "STSGCN": - return STSGCN(model_config) - case "STGODE": - return ODEGCN(model_config) - case "PDG2SEQ": - return PDG2Seq(model_config) - case "STMLP": - return STMLP(model_config) - case "STIDGCN": - return STIDGCN(model_config) - case "STID": - return STID(model_config) - case "STAEFormer": - return STAEformer(model_config) - case "EXP": - return EXP(model_config) - case "MegaCRN": - return MegaCRNModel(model_config) - case "ST_SSL": - return STSSLModel(model_config) - case "STGNRDE": - return make_nrde_model(model_config) - case "STAWnet": - return STAWnet(model_config) - case "REPST": - return REPST(model_config) - case "AEPSA": - return AEPSA(model_config) - case "AEPSA_v2": - return AEPSAv2(model_config) + + model_cls = model_registry.get_model(model_name) + model = model_cls(model_config) + # print(f"\n=== 模型选择结果 ===") + print(f"选择的模型: {model_name}") + print(f"模型入口: {model_registry.model_configs[model_name]['module']}:{model_registry.model_configs[model_name]['entry']}") + return model diff --git a/requirements.txt b/requirements.txt index c964fff..a3568f0 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +numpy pyyaml tqdm statsmodels diff --git a/run.py b/run.py index 95867f4..e62bfd6 100755 --- a/run.py +++ b/run.py @@ -11,36 +11,28 @@ from trainer.trainer_selector import select_trainer def main(): + # 读取配置 args = parse_args() + + # 初始化 device, seed, model, data, trainer args = init.init_device(args) init.init_seed(args["basic"]["seed"]) - - # Load model model = init.init_model(args) - - # Load dataset train_loader, val_loader, test_loader, scaler, *extra_data = get_dataloader( args, normalizer=args["data"]["normalizer"], single=False ) - loss = init.init_loss(args, scaler) optimizer, lr_scheduler = init.init_optimizer(model, args["train"]) init.create_logs(args) - - # Start training or testing trainer = select_trainer( model, - loss, - optimizer, - train_loader, - val_loader, - test_loader, - scaler, + loss, optimizer, + train_loader, val_loader, test_loader, scaler, args, - lr_scheduler, - extra_data, + lr_scheduler, extra_data, ) + # 开始训练 match args["basic"]["mode"]: case "train": trainer.train() @@ -54,9 +46,7 @@ def main(): ) trainer.test( model.to(args["basic"]["device"]), - trainer.args, - test_loader, - scaler, + trainer.args, test_loader, scaler, trainer.logger, ) case _: diff --git a/train.py b/train.py new file mode 100644 index 0000000..db9d8dd --- /dev/null +++ b/train.py @@ -0,0 +1,98 @@ +import yaml +import torch +import os + +import utils.initializer as init +from dataloader.loader_selector import get_dataloader +from trainer.trainer_selector import select_trainer + +def read_config(config_path): + with open(config_path, "r") as file: + config = yaml.safe_load(file) + + # 全局配置 + device = "cpu" # 指定设备为cuda:0 + seed = 2023 # 随机种子 + epochs = 1 # 训练轮数 + + # 拷贝项 + config["basic"]["device"] = device + config["model"]["device"] = device + config["train"]["device"] = device + config["basic"]["seed"] = seed + config["train"]["epochs"] = epochs + return config + +def run(config): + init.init_seed(config["basic"]["seed"]) + model = init.init_model(config) + train_loader, val_loader, test_loader, scaler, *extra_data = get_dataloader( + config, normalizer=config["data"]["normalizer"], single=False + ) + loss = init.init_loss(config, scaler) + optimizer, lr_scheduler = init.init_optimizer(model, config["train"]) + init.create_logs(config) + trainer = select_trainer( + model, + loss, optimizer, + train_loader, val_loader, test_loader, scaler, + config, + lr_scheduler, extra_data, + ) + + # 开始训练 + match config["basic"]["mode"]: + case "train": + trainer.train() + case "test": + model.load_state_dict( + torch.load( + f"./pre-trained/{config['basic']['model']}/{config['basic']['dataset']}.pth", + map_location=config["basic"]["device"], + weights_only=True, + ) + ) + trainer.test( + model.to(config["basic"]["device"]), + trainer.args, test_loader, scaler, + trainer.logger, + ) + case _: + raise ValueError(f"Unsupported mode: {config['basic']['mode']}") + +def main(model, data, debug=False): + # 我的调试开关,不做测试就填 str(False) + # os.environ["TRY"] = str(False) + os.environ["TRY"] = str(debug) + + for model in model_list: + for dataset in data: + config_path = f"./config/{model}/{dataset}.yaml" + # 可去这个函数里面调整统一的config项,⚠️注意调设备,epochs + config = read_config(config_path) + print(f"\nRunning {model} on {dataset}") + if os.environ.get("TRY") == "True": + try: + run(config) + except Exception as e: + import traceback + import sys, traceback + tb_lines = traceback.format_exc().splitlines() + # 如果不是AssertionError,才打印完整traceback + if not tb_lines[-1].startswith("AssertionError"): + traceback.print_exc() + print(f"\n===== {model} on {dataset} failed with error: {e} =====\n") + else: + run(config) + + + +if __name__ == "__main__": + # 调试用 + # model_list = ["iTransformer", "PatchTST", "HI"] + model_list = ["STNorm"] + # model_list = ["PatchTST"] + # dataset_list = ["AirQuality"] + dataset_list = ["BJTaxi-InFlow", "BJTaxi-OutFlow"] + # dataset_list = ["AirQuality", "PEMS-BAY", "SolarEnergy", "NYCBike-InFlow", "NYCBike-OutFlow", "METR-LA"] + main(model_list, dataset_list, debug = True) \ No newline at end of file diff --git a/trainer/DCRNN_Trainer.py b/trainer/DCRNN_Trainer.py index 417d078..1911248 100755 --- a/trainer/DCRNN_Trainer.py +++ b/trainer/DCRNN_Trainer.py @@ -2,6 +2,7 @@ import math import os import time import copy +import psutil from tqdm import tqdm import torch @@ -23,34 +24,56 @@ class Trainer: args, lr_scheduler=None, ): + # 设备和基本参数 + self.device = args["basic"]["device"] + train_args = args["train"] + + # 模型和训练相关组件 self.model = model self.loss = loss self.optimizer = optimizer + self.lr_scheduler = lr_scheduler + + # 数据加载器 self.train_loader = train_loader self.val_loader = val_loader self.test_loader = test_loader + + # 数据处理工具 self.scaler = scaler - self.args = args - self.lr_scheduler = lr_scheduler + self.args = train_args + + # 统计信息 self.train_per_epoch = len(train_loader) self.val_per_epoch = len(val_loader) if val_loader else 0 - # Paths for saving models and logs + # 初始化路径、日志和统计 + self._initialize_paths(train_args) + self._initialize_logger(train_args) + self._initialize_stats() + + def _initialize_paths(self, args): + """初始化模型保存路径""" self.best_path = os.path.join(args["log_dir"], "best_model.pth") self.best_test_path = os.path.join(args["log_dir"], "best_test_model.pth") self.loss_figure_path = os.path.join(args["log_dir"], "loss.png") - - # Initialize logger + + def _initialize_logger(self, args): + """初始化日志记录器""" if not os.path.isdir(args["log_dir"]) and not args["debug"]: os.makedirs(args["log_dir"], exist_ok=True) self.logger = get_logger( args["log_dir"], name=self.model.__class__.__name__, debug=args["debug"] ) self.logger.info(f"Experiment log path in: {args['log_dir']}") - # Stats tracker - self.stats = TrainingStats(device=args["device"]) + + def _initialize_stats(self): + """初始化统计信息记录器""" + self.stats = TrainingStats(device=self.device) def _run_epoch(self, epoch, dataloader, mode): + """运行一个训练/验证/测试epoch""" + # 设置模型模式和是否进行优化 if mode == "train": self.model.train() optimizer_step = True @@ -58,54 +81,87 @@ class Trainer: self.model.eval() optimizer_step = False + # 初始化变量 total_loss = 0 epoch_time = time.time() + y_pred, y_true = [], [] + # 训练/验证循环 with torch.set_grad_enabled(optimizer_step): - with tqdm( - total=len(dataloader), desc=f"{mode.capitalize()} Epoch {epoch}" - ) as pbar: - for batch_idx, (data, target) in enumerate(dataloader): - start_time = time.time() - label = target[..., : self.args["output_dim"]] - output = self.model(data, labels=label.clone()).to( - self.args["device"] - ) + progress_bar = tqdm( + enumerate(dataloader), + total=len(dataloader), + desc=f"{mode.capitalize()} Epoch {epoch}" + ) + + for _, (data, target) in progress_bar: + # 记录步骤开始时间 + start_time = time.time() - if self.args["real_value"]: - output = self.scaler.inverse_transform(output) - label = self.scaler.inverse_transform(label) + # 前向传播 + label = target[..., : self.args["output_dim"]] + output = self.model(data, labels=label.clone()).to(self.device) + loss = self.loss(output, label) - loss = self.loss(output, label) - if optimizer_step and self.optimizer is not None: - self.optimizer.zero_grad() - loss.backward() + # 检查output和label的shape是否一致 + # if output.shape == label.shape: + # print(f"✓ Test passed: output shape {output.shape} matches label shape {label.shape}") + # import sys + # sys.exit(0) + # else: + # print(f"✗ Test failed: output shape {output.shape} does not match label shape {label.shape}") + # import sys + # sys.exit(1) - if self.args["grad_norm"]: - torch.nn.utils.clip_grad_norm_( - self.model.parameters(), self.args["max_grad_norm"] - ) - self.optimizer.step() + # 反归一化 + d_output = self.scaler.inverse_transform(output) + d_label = self.scaler.inverse_transform(label) - step_time = time.time() - start_time - self.stats.record_step_time(step_time, mode) - total_loss += loss.item() + # 反向传播和优化(仅在训练模式) + if optimizer_step and self.optimizer is not None: + self.optimizer.zero_grad() + loss.backward() - if mode == "train" and (batch_idx + 1) % self.args["log_step"] == 0: - self.logger.info( - f"Train Epoch {epoch}: {batch_idx + 1}/{len(dataloader)} Loss: {loss.item():.6f}" + # 梯度裁剪(如果需要) + if self.args["grad_norm"]: + torch.nn.utils.clip_grad_norm_( + self.model.parameters(), self.args["max_grad_norm"] ) + self.optimizer.step() + + # 反归一化的loss + d_loss = self.loss(d_output, d_label) - # 更新 tqdm 的进度 - pbar.update(1) - pbar.set_postfix(loss=loss.item()) + # 记录步骤时间和内存使用 + step_time = time.time() - start_time + self.stats.record_step_time(step_time, mode) + # 累积损失和预测结果 + total_loss += d_loss.item() + y_pred.append(d_output.detach().cpu()) + y_true.append(d_label.detach().cpu()) + + # 更新进度条 + progress_bar.set_postfix(loss=d_loss.item()) + + # 合并所有批次的预测结果 + y_pred = torch.cat(y_pred, dim=0) + y_true = torch.cat(y_true, dim=0) + + # 计算平均损失 avg_loss = total_loss / len(dataloader) - self.logger.info( - f"{mode.capitalize()} Epoch {epoch}: average Loss: {avg_loss:.6f}, time: {time.time() - epoch_time:.2f} s" + + # 计算并记录指标 + mae, rmse, mape = all_metrics( + y_pred, y_true, self.args["mae_thresh"], self.args["mape_thresh"] ) - # 记录内存 + self.logger.info( + f"Epoch #{epoch:02d}: {mode.capitalize():<5} MAE:{mae:5.2f} | RMSE:{rmse:5.2f} | MAPE:{mape:7.4f} | Time: {time.time() - epoch_time:.2f} s" + ) + + # 记录内存使用情况 self.stats.record_memory_usage() + return avg_loss def train_epoch(self, epoch): @@ -118,21 +174,29 @@ class Trainer: return self._run_epoch(epoch, self.test_loader, "test") def train(self): + """执行完整的训练流程""" + # 初始化最佳模型和损失记录 best_model, best_test_model = None, None best_loss, best_test_loss = float("inf"), float("inf") not_improved_count = 0 + # 开始训练 self.stats.start_training() self.logger.info("Training process started") + + # 训练循环 for epoch in range(1, self.args["epochs"] + 1): + # 训练、验证和测试一个epoch train_epoch_loss = self.train_epoch(epoch) val_epoch_loss = self.val_epoch(epoch) test_epoch_loss = self.test_epoch(epoch) + # 检查梯度爆炸 if train_epoch_loss > 1e6: self.logger.warning("Gradient explosion detected. Ending...") break + # 更新最佳验证模型 if val_epoch_loss < best_loss: best_loss = val_epoch_loss not_improved_count = 0 @@ -141,38 +205,55 @@ class Trainer: else: not_improved_count += 1 - if ( - self.args["early_stop"] - and not_improved_count == self.args["early_stop_patience"] - ): - self.logger.info( - f"Validation performance didn't improve for {self.args['early_stop_patience']} epochs. Training stops." - ) + # 检查早停条件 + if self._should_early_stop(not_improved_count): break + # 更新最佳测试模型 if test_epoch_loss < best_test_loss: best_test_loss = test_epoch_loss best_test_model = copy.deepcopy(self.model.state_dict()) + # 保存最佳模型 if not self.args["debug"]: - torch.save(best_model, self.best_path) - torch.save(best_test_model, self.best_test_path) - self.logger.info( - f"Best models saved at {self.best_path} and {self.best_test_path}" - ) + self._save_best_models(best_model, best_test_model) - # 输出统计与参数 + # 结束训练并输出统计信息 self.stats.end_training() self.stats.report(self.logger) - try: - total_params = sum( - p.numel() for p in self.model.parameters() if p.requires_grad - ) - self.logger.info(f"Trainable params: {total_params}") - except Exception: - pass + + # 最终评估 self._finalize_training(best_model, best_test_model) + # 输出模型参数量 + self._log_model_params() + + def _should_early_stop(self, not_improved_count): + """检查是否满足早停条件""" + if ( + self.args["early_stop"] + and not_improved_count == self.args["early_stop_patience"] + ): + self.logger.info( + f"Validation performance didn't improve for {self.args['early_stop_patience']} epochs. Training stops." + ) + return True + return False + + def _save_best_models(self, best_model, best_test_model): + """保存最佳模型到文件""" + torch.save(best_model, self.best_path) + torch.save(best_test_model, self.best_test_path) + self.logger.info( + f"Best models saved at {self.best_path} and {self.best_test_path}" + ) + + def _log_model_params(self): + """输出模型可训练参数数量""" + total_params = sum( p.numel() for p in self.model.parameters() if p.requires_grad) + self.logger.info(f"Trainable params: {total_params}") + + def _finalize_training(self, best_model, best_test_model): self.model.load_state_dict(best_model) self.logger.info("Testing on best validation model") @@ -184,44 +265,44 @@ class Trainer: @staticmethod def test(model, args, data_loader, scaler, logger, path=None): + """对模型进行评估并输出性能指标""" + # 加载模型检查点(如果提供了路径) if path: checkpoint = torch.load(path) model.load_state_dict(checkpoint["state_dict"]) - model.to(args["device"]) + model.to(args["basic"]["device"]) + # 设置为评估模式 model.eval() + + # 收集预测和真实标签 y_pred, y_true = [], [] + # 不计算梯度的情况下进行预测 with torch.no_grad(): for data, target in data_loader: label = target[..., : args["output_dim"]] - output = model(data, labels=label.clone()).to(args["device"]) - y_pred.append(output) - y_true.append(label) + output = model(data, labels=label.clone()) + y_pred.append(output.detach().cpu()) + y_true.append(label.detach().cpu()) - y_pred = torch.cat(y_pred, dim=0) - y_true = torch.cat(y_true, dim=0) - if args["real_value"]: - y_pred = scaler.inverse_transform(y_pred) - y_true = scaler.inverse_transform(y_true) + # 反归一化 + d_y_pred = scaler.inverse_transform(torch.cat(y_pred, dim=0)) + d_y_true = scaler.inverse_transform(torch.cat(y_true, dim=0)) - for t in range(y_true.shape[1]): + # 计算并记录每个时间步的指标 + for t in range(d_y_true.shape[1]): mae, rmse, mape = all_metrics( - y_pred[:, t, ...], - y_true[:, t, ...], + d_y_pred[:, t, ...], + d_y_true[:, t, ...], args["mae_thresh"], args["mape_thresh"], ) - logger.info( - f"Horizon {t + 1:02d}, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}" - ) + logger.info(f"Horizon {t + 1:02d}, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}") - mae, rmse, mape = all_metrics( - y_pred, y_true, args["mae_thresh"], args["mape_thresh"] - ) - logger.info( - f"Average Horizon, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}" - ) + # 计算并记录平均指标 + mae, rmse, mape = all_metrics(d_y_pred, d_y_true, args["mae_thresh"], args["mape_thresh"]) + logger.info( f"Average Horizon, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}") @staticmethod def _compute_sampling_threshold(global_step, k): diff --git a/trainer/E32Trainer.py b/trainer/E32Trainer.py index 07ff01c..4bad8dd 100644 --- a/trainer/E32Trainer.py +++ b/trainer/E32Trainer.py @@ -23,44 +23,65 @@ class Trainer: global_config, lr_scheduler=None, ): + # 设备和基本参数 self.device = global_config["basic"]["device"] train_config = global_config["train"] + + # 模型和训练相关组件 self.model = model self.loss = loss self.optimizer = optimizer + self.lr_scheduler = lr_scheduler + + # 数据加载器 self.train_loader = train_loader self.val_loader = val_loader self.test_loader = test_loader + + # 数据处理工具 self.scaler = scaler self.args = train_config - self.lr_scheduler = lr_scheduler + + # 统计信息 self.train_per_epoch = len(train_loader) self.val_per_epoch = len(val_loader) if val_loader else 0 - # Paths for saving models and logs - self.best_path = os.path.join(train_config["log_dir"], "best_model.pth") - self.best_test_path = os.path.join( - train_config["log_dir"], "best_test_model.pth" - ) - self.loss_figure_path = os.path.join(train_config["log_dir"], "loss.png") - - # Initialize logger - if not os.path.isdir(train_config["log_dir"]) and not train_config["debug"]: - os.makedirs(train_config["log_dir"], exist_ok=True) + # 初始化路径、日志和统计 + self._initialize_paths(train_config) + self._initialize_logger(train_config) + self._initialize_stats() + + def _initialize_paths(self, args): + """初始化模型保存路径""" + self.best_path = os.path.join(args["log_dir"], "best_model.pth") + self.best_test_path = os.path.join(args["log_dir"], "best_test_model.pth") + self.loss_figure_path = os.path.join(args["log_dir"], "loss.png") + + def _initialize_logger(self, args): + """初始化日志记录器""" + if not os.path.isdir(args["log_dir"]) and not args["debug"]: + os.makedirs(args["log_dir"], exist_ok=True) self.logger = get_logger( - train_config["log_dir"], + args["log_dir"], name=self.model.__class__.__name__, - debug=train_config["debug"], + debug=args["debug"], ) - self.logger.info(f"Experiment log path in: {train_config['log_dir']}") - # Stats tracker + self.logger.info(f"Experiment log path in: {args['log_dir']}") + + def _initialize_stats(self): + """初始化统计信息记录器""" self.stats = TrainingStats(device=self.device) def _run_epoch(self, epoch, dataloader, mode): + """运行一个训练/验证/测试epoch""" + # 设置模型模式和是否进行优化 is_train = mode == "train" self.model.train() if is_train else self.model.eval() + + # 初始化变量 total_loss = 0.0 epoch_time = time.time() + y_pred, y_true = [], [] with ( torch.set_grad_enabled(is_train), @@ -85,10 +106,22 @@ class Trainer: # compute loss label = target[..., : self.args["output_dim"]] - if self.args["real_value"]: - output = self.scaler.inverse_transform(output) loss = self.loss(output, label) + # 检查output和label的shape是否一致 + if output.shape == label.shape: + print(f"✓ Test passed: output shape {output.shape} matches label shape {label.shape}") + import sys + sys.exit(0) + else: + print(f"✗ Test failed: output shape {output.shape} does not match label shape {label.shape}") + import sys + sys.exit(1) + + # 反归一化 + d_output = self.scaler.inverse_transform(output) + d_label = self.scaler.inverse_transform(label) + # backward / step if is_train: loss.backward() @@ -98,22 +131,39 @@ class Trainer: ) self.optimizer.step() + # 反归一化的loss + d_loss = self.loss(d_output, d_label) + step_time = time.time() - start_time self.stats.record_step_time(step_time, mode) - total_loss += loss.item() + total_loss += d_loss.item() + + # 累积预测结果 + y_pred.append(d_output.detach().cpu()) + y_true.append(d_label.detach().cpu()) # logging if is_train and (batch_idx + 1) % self.args["log_step"] == 0: self.logger.info( - f"Train Epoch {epoch}: {batch_idx + 1}/{len(dataloader)} Loss: {loss.item():.6f}" + f"Train Epoch {epoch}: {batch_idx + 1}/{len(dataloader)} Loss: {d_loss.item():.6f}" ) pbar.update(1) - pbar.set_postfix(loss=loss.item()) + pbar.set_postfix(loss=d_loss.item()) + # 合并所有批次的预测结果 + y_pred = torch.cat(y_pred, dim=0) + y_true = torch.cat(y_true, dim=0) + + # 计算平均损失 avg_loss = total_loss / len(dataloader) + + # 计算并记录指标 + mae, rmse, mape = all_metrics( + y_pred, y_true, self.args["mae_thresh"], self.args["mape_thresh"] + ) self.logger.info( - f"{mode.capitalize()} Epoch {epoch}: average Loss: {avg_loss:.6f}, time: {time.time() - epoch_time:.2f} s" + f"Epoch #{epoch:02d}: {mode.capitalize():<5} MAE:{mae:5.2f} | RMSE:{rmse:5.2f} | MAPE:{mape:7.4f} | Time: {time.time() - epoch_time:.2f} s" ) # 记录内存 self.stats.record_memory_usage() @@ -129,21 +179,29 @@ class Trainer: return self._run_epoch(epoch, self.test_loader, "test") def train(self): + """执行完整的训练流程""" + # 初始化最佳模型和损失记录 best_model, best_test_model = None, None best_loss, best_test_loss = float("inf"), float("inf") not_improved_count = 0 + # 开始训练 self.stats.start_training() self.logger.info("Training process started") + + # 训练循环 for epoch in range(1, self.args["epochs"] + 1): + # 训练、验证和测试一个epoch train_epoch_loss = self.train_epoch(epoch) val_epoch_loss = self.val_epoch(epoch) test_epoch_loss = self.test_epoch(epoch) + # 检查梯度爆炸 if train_epoch_loss > 1e6: self.logger.warning("Gradient explosion detected. Ending...") break + # 更新最佳验证模型 if val_epoch_loss < best_loss: best_loss = val_epoch_loss not_improved_count = 0 @@ -152,38 +210,55 @@ class Trainer: else: not_improved_count += 1 - if ( - self.args["early_stop"] - and not_improved_count == self.args["early_stop_patience"] - ): - self.logger.info( - f"Validation performance didn't improve for {self.args['early_stop_patience']} epochs. Training stops." - ) + # 检查早停条件 + if self._should_early_stop(not_improved_count): break + # 更新最佳测试模型 if test_epoch_loss < best_test_loss: best_test_loss = test_epoch_loss best_test_model = copy.deepcopy(self.model.state_dict()) + # 保存最佳模型 if not self.args["debug"]: - torch.save(best_model, self.best_path) - torch.save(best_test_model, self.best_test_path) - self.logger.info( - f"Best models saved at {self.best_path} and {self.best_test_path}" - ) + self._save_best_models(best_model, best_test_model) - # 输出统计与参数 + # 结束训练并输出统计信息 self.stats.end_training() self.stats.report(self.logger) - try: - total_params = sum( - p.numel() for p in self.model.parameters() if p.requires_grad - ) - self.logger.info(f"Trainable params: {total_params}") - except Exception: - pass + + # 最终评估 self._finalize_training(best_model, best_test_model) + # 输出模型参数量 + self._log_model_params() + + def _should_early_stop(self, not_improved_count): + """检查是否满足早停条件""" + if ( + self.args["early_stop"] + and not_improved_count == self.args["early_stop_patience"] + ): + self.logger.info( + f"Validation performance didn't improve for {self.args['early_stop_patience']} epochs. Training stops." + ) + return True + return False + + def _save_best_models(self, best_model, best_test_model): + """保存最佳模型到文件""" + torch.save(best_model, self.best_path) + torch.save(best_test_model, self.best_test_path) + self.logger.info( + f"Best models saved at {self.best_path} and {self.best_test_path}" + ) + + def _log_model_params(self): + """输出模型可训练参数数量""" + total_params = sum( p.numel() for p in self.model.parameters() if p.requires_grad) + self.logger.info(f"Trainable params: {total_params}") + + def _finalize_training(self, best_model, best_test_model): self.model.load_state_dict(best_model) self.logger.info("Testing on best validation model") @@ -195,51 +270,44 @@ class Trainer: @staticmethod def test(model, args, data_loader, scaler, logger, path=None): - global_config = args - device = global_config["basic"]["device"] - args = global_config["train"] + """对模型进行评估并输出性能指标""" + # 加载模型检查点(如果提供了路径) if path: checkpoint = torch.load(path) model.load_state_dict(checkpoint["state_dict"]) - model.to(device) + model.to(args["basic"]["device"]) + # 设置为评估模式 model.eval() + + # 收集预测和真实标签 y_pred, y_true = [], [] + # 不计算梯度的情况下进行预测 with torch.no_grad(): for data, target, cycle_index in data_loader: label = target[..., : args["output_dim"]] output = model(data, cycle_index) - y_pred.append(output) - y_true.append(label) + y_pred.append(output.detach().cpu()) + y_true.append(label.detach().cpu()) - if args["real_value"]: - y_pred = scaler.inverse_transform(torch.cat(y_pred, dim=0)) - else: - y_pred = torch.cat(y_pred, dim=0) - y_true = torch.cat(y_true, dim=0) + # 反归一化 + d_y_pred = scaler.inverse_transform(torch.cat(y_pred, dim=0)) + d_y_true = scaler.inverse_transform(torch.cat(y_true, dim=0)) - # 你在这里需要把y_pred和y_true保存下来 - # torch.save(y_pred, "./test/PEMS07/y_pred_D.pt") # [3566,12,170,1] - # torch.save(y_true, "./test/PEMS08/y_true.pt") # [3566,12,170,1] - - for t in range(y_true.shape[1]): + # 计算并记录每个时间步的指标 + for t in range(d_y_true.shape[1]): mae, rmse, mape = all_metrics( - y_pred[:, t, ...], - y_true[:, t, ...], + d_y_pred[:, t, ...], + d_y_true[:, t, ...], args["mae_thresh"], args["mape_thresh"], ) - logger.info( - f"Horizon {t + 1:02d}, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}" - ) + logger.info(f"Horizon {t + 1:02d}, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}") - mae, rmse, mape = all_metrics( - y_pred, y_true, args["mae_thresh"], args["mape_thresh"] - ) - logger.info( - f"Average Horizon, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}" - ) + # 计算并记录平均指标 + mae, rmse, mape = all_metrics(d_y_pred, d_y_true, args["mae_thresh"], args["mape_thresh"]) + logger.info( f"Average Horizon, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}") @staticmethod def _compute_sampling_threshold(global_step, k): diff --git a/trainer/EXP_trainer.py b/trainer/EXP_trainer.py index 0416cc3..b5a48a5 100755 --- a/trainer/EXP_trainer.py +++ b/trainer/EXP_trainer.py @@ -2,6 +2,7 @@ import math import os import time import copy +import psutil from tqdm import tqdm import torch @@ -23,34 +24,56 @@ class Trainer: args, lr_scheduler=None, ): + # 设备和基本参数 + self.device = args["basic"]["device"] + train_args = args["train"] + + # 模型和训练相关组件 self.model = model self.loss = loss self.optimizer = optimizer + self.lr_scheduler = lr_scheduler + + # 数据加载器 self.train_loader = train_loader self.val_loader = val_loader self.test_loader = test_loader + + # 数据处理工具 self.scaler = scaler - self.args = args - self.lr_scheduler = lr_scheduler + self.args = train_args + + # 统计信息 self.train_per_epoch = len(train_loader) self.val_per_epoch = len(val_loader) if val_loader else 0 - # Paths for saving models and logs + # 初始化路径、日志和统计 + self._initialize_paths(train_args) + self._initialize_logger(train_args) + self._initialize_stats() + + def _initialize_paths(self, args): + """初始化模型保存路径""" self.best_path = os.path.join(args["log_dir"], "best_model.pth") self.best_test_path = os.path.join(args["log_dir"], "best_test_model.pth") self.loss_figure_path = os.path.join(args["log_dir"], "loss.png") - - # Initialize logger + + def _initialize_logger(self, args): + """初始化日志记录器""" if not os.path.isdir(args["log_dir"]) and not args["debug"]: os.makedirs(args["log_dir"], exist_ok=True) self.logger = get_logger( args["log_dir"], name=self.model.__class__.__name__, debug=args["debug"] ) self.logger.info(f"Experiment log path in: {args['log_dir']}") - # Stats tracker - self.stats = TrainingStats(device=args["device"]) + + def _initialize_stats(self): + """初始化统计信息记录器""" + self.stats = TrainingStats(device=self.device) def _run_epoch(self, epoch, dataloader, mode): + """运行一个训练/验证/测试epoch""" + # 设置模型模式和是否进行优化 if mode == "train": self.model.train() optimizer_step = True @@ -58,52 +81,77 @@ class Trainer: self.model.eval() optimizer_step = False + # 初始化变量 total_loss = 0 epoch_time = time.time() + y_pred, y_true = [], [] + # 训练/验证循环 with torch.set_grad_enabled(optimizer_step): - with tqdm( - total=len(dataloader), desc=f"{mode.capitalize()} Epoch {epoch}" - ) as pbar: - for batch_idx, (data, target) in enumerate(dataloader): - start_time = time.time() - label = target[..., : self.args["output_dim"]] - output = self.model(data).to(self.args["device"]) + progress_bar = tqdm( + enumerate(dataloader), + total=len(dataloader), + desc=f"{mode.capitalize()} Epoch {epoch}" + ) + + for _, (data, target) in progress_bar: + # 记录步骤开始时间 + start_time = time.time() - if self.args["real_value"]: - output = self.scaler.inverse_transform(output) + # 前向传播 + label = target[..., : self.args["output_dim"]] + output = self.model(data).to(self.device) + loss = self.loss(output, label) - loss = self.loss(output, label) - if optimizer_step and self.optimizer is not None: - self.optimizer.zero_grad() - loss.backward() + # 反归一化 + d_output = self.scaler.inverse_transform(output) + d_label = self.scaler.inverse_transform(label) - if self.args["grad_norm"]: - torch.nn.utils.clip_grad_norm_( - self.model.parameters(), self.args["max_grad_norm"] - ) - self.optimizer.step() + # 反向传播和优化(仅在训练模式) + if optimizer_step and self.optimizer is not None: + self.optimizer.zero_grad() + loss.backward() - step_time = time.time() - start_time - self.stats.record_step_time(step_time, mode) - - total_loss += loss.item() - - if mode == "train" and (batch_idx + 1) % self.args["log_step"] == 0: - self.logger.info( - f"Train Epoch {epoch}: {batch_idx + 1}/{len(dataloader)} Loss: {loss.item():.6f}" + # 梯度裁剪(如果需要) + if self.args["grad_norm"]: + torch.nn.utils.clip_grad_norm_( + self.model.parameters(), self.args["max_grad_norm"] ) + self.optimizer.step() + + # 反归一化的loss + d_loss = self.loss(d_output, d_label) - # 更新 tqdm 的进度 - pbar.update(1) - pbar.set_postfix(loss=loss.item()) + # 记录步骤时间和内存使用 + step_time = time.time() - start_time + self.stats.record_step_time(step_time, mode) + # 累积损失和预测结果 + total_loss += d_loss.item() + y_pred.append(d_output.detach().cpu()) + y_true.append(d_label.detach().cpu()) + + # 更新进度条 + progress_bar.set_postfix(loss=d_loss.item()) + + # 合并所有批次的预测结果 + y_pred = torch.cat(y_pred, dim=0) + y_true = torch.cat(y_true, dim=0) + + # 计算平均损失 avg_loss = total_loss / len(dataloader) - self.logger.info( - f"{mode.capitalize()} Epoch {epoch}: average Loss: {avg_loss:.6f}, time: {time.time() - epoch_time:.2f} s" + + # 计算并记录指标 + mae, rmse, mape = all_metrics( + y_pred, y_true, self.args["mae_thresh"], self.args["mape_thresh"] ) - # 记录内存 + self.logger.info( + f"Epoch #{epoch:02d}: {mode.capitalize():<5} MAE:{mae:5.2f} | RMSE:{rmse:5.2f} | MAPE:{mape:7.4f} | Time: {time.time() - epoch_time:.2f} s" + ) + + # 记录内存使用情况 self.stats.record_memory_usage() + return avg_loss def train_epoch(self, epoch): @@ -116,21 +164,29 @@ class Trainer: return self._run_epoch(epoch, self.test_loader, "test") def train(self): + """执行完整的训练流程""" + # 初始化最佳模型和损失记录 best_model, best_test_model = None, None best_loss, best_test_loss = float("inf"), float("inf") not_improved_count = 0 + # 开始训练 self.stats.start_training() self.logger.info("Training process started") + + # 训练循环 for epoch in range(1, self.args["epochs"] + 1): + # 训练、验证和测试一个epoch train_epoch_loss = self.train_epoch(epoch) val_epoch_loss = self.val_epoch(epoch) test_epoch_loss = self.test_epoch(epoch) + # 检查梯度爆炸 if train_epoch_loss > 1e6: self.logger.warning("Gradient explosion detected. Ending...") break + # 更新最佳验证模型 if val_epoch_loss < best_loss: best_loss = val_epoch_loss not_improved_count = 0 @@ -139,37 +195,55 @@ class Trainer: else: not_improved_count += 1 - if ( - self.args["early_stop"] - and not_improved_count == self.args["early_stop_patience"] - ): - self.logger.info( - f"Validation performance didn't improve for {self.args['early_stop_patience']} epochs. Training stops." - ) + # 检查早停条件 + if self._should_early_stop(not_improved_count): break + # 更新最佳测试模型 if test_epoch_loss < best_test_loss: best_test_loss = test_epoch_loss best_test_model = copy.deepcopy(self.model.state_dict()) + # 保存最佳模型 if not self.args["debug"]: - torch.save(best_model, self.best_path) - torch.save(best_test_model, self.best_test_path) - self.logger.info( - f"Best models saved at {self.best_path} and {self.best_test_path}" - ) - # 输出统计与参数 + self._save_best_models(best_model, best_test_model) + + # 结束训练并输出统计信息 self.stats.end_training() self.stats.report(self.logger) - try: - total_params = sum( - p.numel() for p in self.model.parameters() if p.requires_grad - ) - self.logger.info(f"Trainable params: {total_params}") - except Exception: - pass + + # 最终评估 self._finalize_training(best_model, best_test_model) + # 输出模型参数量 + self._log_model_params() + + def _should_early_stop(self, not_improved_count): + """检查是否满足早停条件""" + if ( + self.args["early_stop"] + and not_improved_count == self.args["early_stop_patience"] + ): + self.logger.info( + f"Validation performance didn't improve for {self.args['early_stop_patience']} epochs. Training stops." + ) + return True + return False + + def _save_best_models(self, best_model, best_test_model): + """保存最佳模型到文件""" + torch.save(best_model, self.best_path) + torch.save(best_test_model, self.best_test_path) + self.logger.info( + f"Best models saved at {self.best_path} and {self.best_test_path}" + ) + + def _log_model_params(self): + """输出模型可训练参数数量""" + total_params = sum( p.numel() for p in self.model.parameters() if p.requires_grad) + self.logger.info(f"Trainable params: {total_params}") + + def _finalize_training(self, best_model, best_test_model): self.model.load_state_dict(best_model) self.logger.info("Testing on best validation model") @@ -181,48 +255,44 @@ class Trainer: @staticmethod def test(model, args, data_loader, scaler, logger, path=None): + """对模型进行评估并输出性能指标""" + # 加载模型检查点(如果提供了路径) if path: checkpoint = torch.load(path) model.load_state_dict(checkpoint["state_dict"]) - model.to(args["device"]) + model.to(args["basic"]["device"]) + # 设置为评估模式 model.eval() + + # 收集预测和真实标签 y_pred, y_true = [], [] + # 不计算梯度的情况下进行预测 with torch.no_grad(): for data, target in data_loader: label = target[..., : args["output_dim"]] output = model(data) - y_pred.append(output) - y_true.append(label) + y_pred.append(output.detach().cpu()) + y_true.append(label.detach().cpu()) - if args["real_value"]: - y_pred = scaler.inverse_transform(torch.cat(y_pred, dim=0)) - else: - y_pred = torch.cat(y_pred, dim=0) - y_true = torch.cat(y_true, dim=0) + # 反归一化 + d_y_pred = scaler.inverse_transform(torch.cat(y_pred, dim=0)) + d_y_true = scaler.inverse_transform(torch.cat(y_true, dim=0)) - # 你在这里需要把y_pred和y_true保存下来 - # torch.save(y_pred, "./test/PEMS07/y_pred_D.pt") # [3566,12,170,1] - # torch.save(y_true, "./test/PEMS08/y_true.pt") # [3566,12,170,1] - - for t in range(y_true.shape[1]): + # 计算并记录每个时间步的指标 + for t in range(d_y_true.shape[1]): mae, rmse, mape = all_metrics( - y_pred[:, t, ...], - y_true[:, t, ...], + d_y_pred[:, t, ...], + d_y_true[:, t, ...], args["mae_thresh"], args["mape_thresh"], ) - logger.info( - f"Horizon {t + 1:02d}, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}" - ) + logger.info(f"Horizon {t + 1:02d}, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}") - mae, rmse, mape = all_metrics( - y_pred, y_true, args["mae_thresh"], args["mape_thresh"] - ) - logger.info( - f"Average Horizon, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}" - ) + # 计算并记录平均指标 + mae, rmse, mape = all_metrics(d_y_pred, d_y_true, args["mae_thresh"], args["mape_thresh"]) + logger.info( f"Average Horizon, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}") @staticmethod def _compute_sampling_threshold(global_step, k): diff --git a/trainer/InformerTrainer.py b/trainer/InformerTrainer.py new file mode 100644 index 0000000..7b7ed27 --- /dev/null +++ b/trainer/InformerTrainer.py @@ -0,0 +1,250 @@ +import math +import os +import time +import copy +import torch +from utils.logger import get_logger +from utils.loss_function import all_metrics +from tqdm import tqdm + +class InformerTrainer: + """Informer模型训练器,负责整个训练流程的管理,支持多输入模型""" + + def __init__(self, model, loss, optimizer, + train_loader, val_loader, test_loader, scaler, + args, lr_scheduler=None,): + # 设备和基本参数 + self.config = args + self.device = args["basic"]["device"] + train_args = args["train"] + # 模型和训练相关组件 + self.model = model + self.loss = loss + self.optimizer = optimizer + self.lr_scheduler = lr_scheduler + # 数据加载器 + self.train_loader = train_loader + self.val_loader = val_loader + self.test_loader = test_loader + # 数据处理工具 + self.scaler = scaler + self.args = train_args + # 初始化路径、日志和统计 + self._initialize_paths(train_args) + self._initialize_logger(train_args) + + def _initialize_paths(self, args): + """初始化模型保存路径""" + self.best_path = os.path.join(args["log_dir"], "best_model.pth") + self.best_test_path = os.path.join(args["log_dir"], "best_test_model.pth") + self.loss_figure_path = os.path.join(args["log_dir"], "loss.png") + + def _initialize_logger(self, args): + """初始化日志记录器""" + if not os.path.isdir(args["log_dir"]) and not args["debug"]: + os.makedirs(args["log_dir"], exist_ok=True) + self.logger = get_logger(args["log_dir"], name=self.model.__class__.__name__, debug=args["debug"]) + self.logger.info(f"Experiment log path in: {args['log_dir']}") + + def _run_epoch(self, epoch, dataloader, mode): + """运行一个训练/验证/测试epoch,支持多输入模型""" + # 设置模型模式和是否进行优化 + if mode == "train": self.model.train(); optimizer_step = True + else: self.model.eval(); optimizer_step = False + + # 初始化变量 + total_loss = 0 + epoch_time = time.time() + y_pred, y_true = [], [] + + # 训练/验证循环 + with torch.set_grad_enabled(optimizer_step): + progress_bar = tqdm( + enumerate(dataloader), + total=len(dataloader), + desc=f"{mode.capitalize()} Epoch {epoch}" + ) + for _, (x, y, x_mark, y_mark) in progress_bar: + # 转移数据 + x = x.to(self.device) + y = y[:, -self.args['pred_len']:, :self.args["output_dim"]].to(self.device) + x_mark = x_mark.to(self.device) + y_mark = y_mark.to(self.device) + # [256, 24, 6] + dec_inp = torch.zeros_like(y[:, -self.args['pred_len']:, :]).float() + # [256, 48(pred+label), 6] + dec_inp = torch.cat([y[:, :self.args['label_len'], :], dec_inp], dim=1).float().to(self.device) + + # 计算loss和反归一化loss + output = self.model(x, x_mark, dec_inp, y_mark) + if os.environ.get("TRY") == "True": + print(f"[{'✅' if output.shape == y.shape else '❌'}]: output: {output.shape}, label: {y.shape}") + assert False + loss = self.loss(output, y) + d_output = self.scaler.inverse_transform(output) + d_label = self.scaler.inverse_transform(y) + d_loss = self.loss(d_output, d_label) + # 累积损失和预测结果 + total_loss += d_loss.item() + y_pred.append(d_output.detach().cpu()) + y_true.append(d_label.detach().cpu()) + # 反向传播和优化(仅在训练模式) + if optimizer_step and self.optimizer is not None: + self.optimizer.zero_grad() + loss.backward() + # 梯度裁剪(如果需要) + if self.args["grad_norm"]: + torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.args["max_grad_norm"]) + self.optimizer.step() + # 更新进度条 + progress_bar.set_postfix(loss=d_loss.item()) + + # 合并所有批次的预测结果 + y_pred = torch.cat(y_pred, dim=0) + y_true = torch.cat(y_true, dim=0) + # 计算损失并记录指标 + avg_loss = total_loss / len(dataloader) + mae, rmse, mape = all_metrics(y_pred, y_true, self.args["mae_thresh"], self.args["mape_thresh"]) + self.logger.info( + f"Epoch #{epoch:02d}: {mode.capitalize():<5} " + f"MAE:{mae:5.2f} | RMSE:{rmse:5.2f} | MAPE:{mape:7.4f} | Time: {time.time() - epoch_time:.2f} s" + ) + return avg_loss + + def train_epoch(self, epoch): + return self._run_epoch(epoch, self.train_loader, "train") + + def val_epoch(self, epoch): + return self._run_epoch(epoch, self.val_loader or self.test_loader, "val") + + def test_epoch(self, epoch): + return self._run_epoch(epoch, self.test_loader, "test") + + def train(self): + # 初始化记录 + best_model, best_test_model = None, None + best_loss, best_test_loss = float("inf"), float("inf") + not_improved_count = 0 + # 开始训练 + self.logger.info("Training process started") + # 训练循环 + for epoch in range(1, self.args["epochs"] + 1): + # 训练、验证和测试一个epoch + train_epoch_loss = self.train_epoch(epoch) + val_epoch_loss = self.val_epoch(epoch) + test_epoch_loss = self.test_epoch(epoch) + # 检查梯度爆炸 + if train_epoch_loss > 1e6: + self.logger.warning("Gradient explosion detected. Ending...") + break + # 更新最佳验证模型 + if val_epoch_loss < best_loss: + best_loss = val_epoch_loss + not_improved_count = 0 + best_model = copy.deepcopy(self.model.state_dict()) + self.logger.info("Best validation model saved!") + else: + not_improved_count += 1 + # 早停 + if self._should_early_stop(not_improved_count): + break + # 更新最佳测试模型 + if test_epoch_loss < best_test_loss: + best_test_loss = test_epoch_loss + best_test_model = copy.deepcopy(self.model.state_dict()) + # 保存最佳模型 + if not self.args["debug"]: + self._save_best_models(best_model, best_test_model) + # 最终评估 + self._finalize_training(best_model, best_test_model) + + def _should_early_stop(self, not_improved_count): + """检查是否满足早停条件""" + if ( + self.args["early_stop"] + and not_improved_count == self.args["early_stop_patience"] + ): + self.logger.info( + f"Validation performance didn't improve for {self.args['early_stop_patience']} epochs. Training stops." + ) + return True + return False + + def _save_best_models(self, best_model, best_test_model): + """保存最佳模型到文件""" + torch.save(best_model, self.best_path) + torch.save(best_test_model, self.best_test_path) + self.logger.info( + f"Best models saved at {self.best_path} and {self.best_test_path}" + ) + + def _log_model_params(self): + """输出模型可训练参数数量""" + total_params = sum( p.numel() for p in self.model.parameters() if p.requires_grad) + self.logger.info(f"Trainable params: {total_params}") + + + def _finalize_training(self, best_model, best_test_model): + self.model.load_state_dict(best_model) + self.logger.info("Testing on best validation model") + self.test(self.model, self.args, self.test_loader, self.scaler, self.logger) + self.model.load_state_dict(best_test_model) + self.logger.info("Testing on best test model") + self.test(self.model, self.args, self.test_loader, self.scaler, self.logger) + + @staticmethod + def test(model, args, data_loader, scaler, logger, path=None): + """对模型进行评估并输出性能指标,支持多输入模型""" + device = args["device"] + + if path: + checkpoint = torch.load(path) + model.load_state_dict(checkpoint["state_dict"]) + model.to(device) + + # 设置为评估模式 + model.eval() + + # 收集预测和真实标签 + y_pred, y_true = [], [] + pred_len = args['pred_len'] + label_len = args['label_len'] + output_dim = args['output_dim'] + + # 不计算梯度的情况下进行预测 + with torch.no_grad(): + for _, (x, y, x_mark, y_mark) in enumerate(data_loader): + # 转移数据 + x = x.to(device) + y = y[:, -pred_len:, :output_dim].to(device) + x_mark = x_mark.to(device) + y_mark = y_mark.to(device) + # 生成dec_inp + dec_inp = torch.zeros_like(y[:, -pred_len:, :]).float() + dec_inp = torch.cat([y[:, :label_len, :], dec_inp], dim=1).float().to(device) + output = model(x, x_mark, dec_inp, y_mark) + y_pred.append(output.detach().cpu()) + y_true.append(y.detach().cpu()) + + d_y_pred = scaler.inverse_transform(torch.cat(y_pred, dim=0)) + d_y_true = scaler.inverse_transform(torch.cat(y_true, dim=0)) + mae_thresh = args["mae_thresh"] + mape_thresh = args["mape_thresh"] + + # 计算并记录每个时间步的指标 + for t in range(d_y_true.shape[1]): + mae, rmse, mape = all_metrics( + d_y_pred[:, t, ...], + d_y_true[:, t, ...], + mae_thresh, + mape_thresh, + ) + logger.info(f"Horizon {t + 1:02d}, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}") + + # 计算并记录平均指标 + mae, rmse, mape = all_metrics(d_y_pred, d_y_true, mae_thresh, mape_thresh) + logger.info( f"Average Horizon, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}") + + @staticmethod + def _compute_sampling_threshold(global_step, k): + return k / (k + math.exp(global_step / k)) diff --git a/trainer/PDG2SEQ_Trainer.py b/trainer/PDG2SEQ_Trainer.py index 72d155d..e42e841 100755 --- a/trainer/PDG2SEQ_Trainer.py +++ b/trainer/PDG2SEQ_Trainer.py @@ -23,35 +23,57 @@ class Trainer: args, lr_scheduler=None, ): + # 设备和基本参数 + self.device = args["basic"]["device"] + train_args = args["train"] + + # 模型和训练相关组件 self.model = model self.loss = loss self.optimizer = optimizer + self.lr_scheduler = lr_scheduler + + # 数据加载器 self.train_loader = train_loader self.val_loader = val_loader self.test_loader = test_loader + + # 数据处理工具 self.scaler = scaler - self.args = args - self.lr_scheduler = lr_scheduler + self.args = train_args + self.batches_seen = 0 + + # 统计信息 self.train_per_epoch = len(train_loader) self.val_per_epoch = len(val_loader) if val_loader else 0 - self.batches_seen = 0 - # Paths for saving models and logs + # 初始化路径、日志和统计 + self._initialize_paths(train_args) + self._initialize_logger(train_args) + self._initialize_stats() + + def _initialize_paths(self, args): + """初始化模型保存路径""" self.best_path = os.path.join(args["log_dir"], "best_model.pth") self.best_test_path = os.path.join(args["log_dir"], "best_test_model.pth") self.loss_figure_path = os.path.join(args["log_dir"], "loss.png") - - # Initialize logger + + def _initialize_logger(self, args): + """初始化日志记录器""" if not os.path.isdir(args["log_dir"]) and not args["debug"]: os.makedirs(args["log_dir"], exist_ok=True) self.logger = get_logger( args["log_dir"], name=self.model.__class__.__name__, debug=args["debug"] ) self.logger.info(f"Experiment log path in: {args['log_dir']}") - # Stats tracker - self.stats = TrainingStats(device=args["device"]) + + def _initialize_stats(self): + """初始化统计信息记录器""" + self.stats = TrainingStats(device=self.device) def _run_epoch(self, epoch, dataloader, mode): + """运行一个训练/验证/测试epoch""" + # 设置模型模式和是否进行优化 if mode == "train": self.model.train() optimizer_step = True @@ -59,55 +81,96 @@ class Trainer: self.model.eval() optimizer_step = False + # 初始化变量 total_loss = 0 epoch_time = time.time() + y_pred, y_true = [], [] with torch.set_grad_enabled(optimizer_step): - with tqdm( - total=len(dataloader), desc=f"{mode.capitalize()} Epoch {epoch}" - ) as pbar: - for batch_idx, (data, target) in enumerate(dataloader): - start_time = time.time() - self.batches_seen += 1 - label = target[..., : self.args["output_dim"]].clone() - output = self.model(data, target, self.batches_seen).to( - self.args["device"] + progress_bar = tqdm( + enumerate(dataloader), + total=len(dataloader), + desc=f"{mode.capitalize()} Epoch {epoch}" + ) + + for batch_idx, (data, target) in progress_bar: + start_time = time.time() + self.batches_seen += 1 + label = target[..., : self.args["output_dim"]].clone() + + # 前向传播 + if mode == "train": + output = self.model(data, target, self.batches_seen).to(self.device) + else: + output = self.model(data, target).to(self.device) + + # 计算原始loss + loss = self.loss(output, label) + + # 检查output和label的shape是否一致 + if output.shape == label.shape: + print(f"✓ Test passed: output shape {output.shape} matches label shape {label.shape}") + import sys + sys.exit(0) + else: + print(f"✗ Test failed: output shape {output.shape} does not match label shape {label.shape}") + import sys + sys.exit(1) + + # 反归一化 + d_output = self.scaler.inverse_transform(output) + d_label = self.scaler.inverse_transform(label) + + # 反归一化的loss + d_loss = self.loss(d_output, d_label) + + # 反向传播和优化(仅在训练模式) + if optimizer_step and self.optimizer is not None: + self.optimizer.zero_grad() + loss.backward() + + if self.args["grad_norm"]: + torch.nn.utils.clip_grad_norm_( + self.model.parameters(), self.args["max_grad_norm"] + ) + self.optimizer.step() + + # 记录步骤时间 + step_time = time.time() - start_time + self.stats.record_step_time(step_time, mode) + total_loss += d_loss.item() + + # 累积预测结果 + y_pred.append(d_output.detach().cpu()) + y_true.append(d_label.detach().cpu()) + + if mode == "train" and (batch_idx + 1) % self.args["log_step"] == 0: + self.logger.info( + f"Train Epoch {epoch}: {batch_idx + 1}/{len(dataloader)} Loss: {d_loss.item():.6f}" ) - if self.args["real_value"]: - output = self.scaler.inverse_transform(output) + # 更新 tqdm 的进度 + progress_bar.update(1) + progress_bar.set_postfix(loss=d_loss.item()) - loss = self.loss(output, label) - if optimizer_step and self.optimizer is not None: - self.optimizer.zero_grad() - loss.backward() - - if self.args["grad_norm"]: - torch.nn.utils.clip_grad_norm_( - self.model.parameters(), self.args["max_grad_norm"] - ) - self.optimizer.step() - - # record step time - step_time = time.time() - start_time - self.stats.record_step_time(step_time, mode) - total_loss += loss.item() - - if mode == "train" and (batch_idx + 1) % self.args["log_step"] == 0: - self.logger.info( - f"Train Epoch {epoch}: {batch_idx + 1}/{len(dataloader)} Loss: {loss.item():.6f}" - ) - - # 更新 tqdm 的进度 - pbar.update(1) - pbar.set_postfix(loss=loss.item()) + # 合并所有批次的预测结果 + y_pred = torch.cat(y_pred, dim=0) + y_true = torch.cat(y_true, dim=0) + # 计算平均损失 avg_loss = total_loss / len(dataloader) - self.logger.info( - f"{mode.capitalize()} Epoch {epoch}: average Loss: {avg_loss:.6f}, time: {time.time() - epoch_time:.2f} s" + + # 计算并记录指标 + mae, rmse, mape = all_metrics( + y_pred, y_true, self.args["mae_thresh"], self.args["mape_thresh"] ) - # 记录内存 + self.logger.info( + f"Epoch #{epoch:02d}: {mode.capitalize():<5} MAE:{mae:5.2f} | RMSE:{rmse:5.2f} | MAPE:{mape:7.4f} | Time: {time.time() - epoch_time:.2f} s" + ) + + # 记录内存使用情况 self.stats.record_memory_usage() + return avg_loss def train_epoch(self, epoch): @@ -120,21 +183,29 @@ class Trainer: return self._run_epoch(epoch, self.test_loader, "test") def train(self): + """执行完整的训练流程""" + # 初始化最佳模型和损失记录 best_model, best_test_model = None, None best_loss, best_test_loss = float("inf"), float("inf") not_improved_count = 0 + # 开始训练 self.stats.start_training() self.logger.info("Training process started") + + # 训练循环 for epoch in range(1, self.args["epochs"] + 1): + # 训练、验证和测试一个epoch train_epoch_loss = self.train_epoch(epoch) val_epoch_loss = self.val_epoch(epoch) test_epoch_loss = self.test_epoch(epoch) + # 检查梯度爆炸 if train_epoch_loss > 1e6: self.logger.warning("Gradient explosion detected. Ending...") break + # 更新最佳验证模型 if val_epoch_loss < best_loss: best_loss = val_epoch_loss not_improved_count = 0 @@ -143,37 +214,54 @@ class Trainer: else: not_improved_count += 1 - if ( - self.args["early_stop"] - and not_improved_count == self.args["early_stop_patience"] - ): - self.logger.info( - f"Validation performance didn't improve for {self.args['early_stop_patience']} epochs. Training stops." - ) + # 检查早停条件 + if self._should_early_stop(not_improved_count): break + # 更新最佳测试模型 if test_epoch_loss < best_test_loss: best_test_loss = test_epoch_loss best_test_model = copy.deepcopy(self.model.state_dict()) + # 保存最佳模型 if not self.args["debug"]: - torch.save(best_model, self.best_path) - torch.save(best_test_model, self.best_test_path) - self.logger.info( - f"Best models saved at {self.best_path} and {self.best_test_path}" - ) + self._save_best_models(best_model, best_test_model) - # 输出统计与参数 + # 结束训练并输出统计信息 self.stats.end_training() self.stats.report(self.logger) - try: - total_params = sum( - p.numel() for p in self.model.parameters() if p.requires_grad - ) - self.logger.info(f"Trainable params: {total_params}") - except Exception: - pass + + # 输出模型参数量 + self._log_model_params() + + # 最终评估 self._finalize_training(best_model, best_test_model) + + def _should_early_stop(self, not_improved_count): + """检查是否满足早停条件""" + if ( + self.args["early_stop"] + and not_improved_count == self.args["early_stop_patience"] + ): + self.logger.info( + f"Validation performance didn't improve for {self.args['early_stop_patience']} epochs. Training stops." + ) + return True + return False + + def _save_best_models(self, best_model, best_test_model): + """保存最佳模型到文件""" + torch.save(best_model, self.best_path) + torch.save(best_test_model, self.best_test_path) + self.logger.info( + f"Best models saved at {self.best_path} and {self.best_test_path}" + ) + + def _log_model_params(self): + """输出模型可训练参数数量""" + total_params = sum( p.numel() for p in self.model.parameters() if p.requires_grad) + self.logger.info(f"Trainable params: {total_params}") + def _finalize_training(self, best_model, best_test_model): self.model.load_state_dict(best_model) @@ -186,44 +274,44 @@ class Trainer: @staticmethod def test(model, args, data_loader, scaler, logger, path=None): + """对模型进行评估并输出性能指标""" + # 加载模型检查点(如果提供了路径) if path: checkpoint = torch.load(path) model.load_state_dict(checkpoint["state_dict"]) - model.to(args["device"]) + model.to(args["basic"]["device"]) + # 设置为评估模式 model.eval() + + # 收集预测和真实标签 y_pred, y_true = [], [] + # 不计算梯度的情况下进行预测 with torch.no_grad(): for data, target in data_loader: label = target[..., : args["output_dim"]].clone() output = model(data, target) - y_pred.append(output) - y_true.append(label) + y_pred.append(output.detach().cpu()) + y_true.append(label.detach().cpu()) - if args["real_value"]: - y_pred = scaler.inverse_transform(torch.cat(y_pred, dim=0)) - else: - y_pred = torch.cat(y_pred, dim=0) - y_true = torch.cat(y_true, dim=0) + # 反归一化 + d_y_pred = scaler.inverse_transform(torch.cat(y_pred, dim=0)) + d_y_true = scaler.inverse_transform(torch.cat(y_true, dim=0)) - for t in range(y_true.shape[1]): + # 计算并记录每个时间步的指标 + for t in range(d_y_true.shape[1]): mae, rmse, mape = all_metrics( - y_pred[:, t, ...], - y_true[:, t, ...], + d_y_pred[:, t, ...], + d_y_true[:, t, ...], args["mae_thresh"], args["mape_thresh"], ) - logger.info( - f"Horizon {t + 1:02d}, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}" - ) + logger.info(f"Horizon {t + 1:02d}, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}") - mae, rmse, mape = all_metrics( - y_pred, y_true, args["mae_thresh"], args["mape_thresh"] - ) - logger.info( - f"Average Horizon, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}" - ) + # 计算并记录平均指标 + mae, rmse, mape = all_metrics(d_y_pred, d_y_true, args["mae_thresh"], args["mape_thresh"]) + logger.info( f"Average Horizon, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}") @staticmethod def _compute_sampling_threshold(global_step, k): diff --git a/trainer/STMLP_Trainer.py b/trainer/STMLP_Trainer.py index 6b2217a..4f3d576 100644 --- a/trainer/STMLP_Trainer.py +++ b/trainer/STMLP_Trainer.py @@ -26,42 +26,35 @@ class Trainer: args, lr_scheduler=None, ): + # 设备和基本参数 + self.device = args["basic"]["device"] + train_args = args["train"] + + # 模型和训练相关组件 self.model = model self.loss = loss self.optimizer = optimizer + self.lr_scheduler = lr_scheduler + + # 数据加载器 self.train_loader = train_loader self.val_loader = val_loader self.test_loader = test_loader + + # 数据处理工具 self.scaler = scaler - self.args = args["train"] - self.lr_scheduler = lr_scheduler + self.args = train_args + + # 统计信息 self.train_per_epoch = len(train_loader) self.val_per_epoch = len(val_loader) if val_loader else 0 - # Paths for saving models and logs - self.best_path = os.path.join(self.args["log_dir"], "best_model.pth") - self.best_test_path = os.path.join(self.args["log_dir"], "best_test_model.pth") - self.loss_figure_path = os.path.join(self.args["log_dir"], "loss.png") - self.pretrain_dir = ( - f"./pre-train/{args['model']['type']}/{args['data']['type']}" - ) - self.pretrain_path = os.path.join(self.pretrain_dir, "best_model.pth") - self.pretrain_best_path = os.path.join(self.pretrain_dir, "best_test_model.pth") - - # Initialize logger - if not os.path.isdir(self.args["log_dir"]) and not self.args["debug"]: - os.makedirs(self.args["log_dir"], exist_ok=True) - if not os.path.isdir(self.pretrain_dir) and not self.args["debug"]: - os.makedirs(self.pretrain_dir, exist_ok=True) - self.logger = get_logger( - self.args["log_dir"], - name=self.model.__class__.__name__, - debug=self.args["debug"], - ) - self.logger.info(f"Experiment log path in: {self.args['log_dir']}") - # Stats tracker - self.stats = TrainingStats(device=args["device"]) - + # 初始化路径、日志和统计 + self._initialize_paths(args, train_args) + self._initialize_logger(train_args) + self._initialize_stats() + + # 教师-学生蒸馏相关 if self.args["teacher_stu"]: self.tmodel = self.loadTeacher(args) else: @@ -70,9 +63,41 @@ class Trainer: f"./pre-train/{args['model']['type']}/{args['data']['type']}/best_model.pth" f"然后在config中配置train.teacher_stu模式为True开启蒸馏模式" ) + + def _initialize_paths(self, args, train_args): + """初始化模型保存路径""" + self.best_path = os.path.join(train_args["log_dir"], "best_model.pth") + self.best_test_path = os.path.join(train_args["log_dir"], "best_test_model.pth") + self.loss_figure_path = os.path.join(train_args["log_dir"], "loss.png") + self.pretrain_dir = ( + f"./pre-train/{args['model']['type']}/{args['data']['type']}" + ) + self.pretrain_path = os.path.join(self.pretrain_dir, "best_model.pth") + self.pretrain_best_path = os.path.join(self.pretrain_dir, "best_test_model.pth") + + # 创建预训练目录 + if not os.path.isdir(self.pretrain_dir) and not train_args["debug"]: + os.makedirs(self.pretrain_dir, exist_ok=True) + + def _initialize_logger(self, args): + """初始化日志记录器""" + if not os.path.isdir(args["log_dir"]) and not args["debug"]: + os.makedirs(args["log_dir"], exist_ok=True) + self.logger = get_logger( + args["log_dir"], + name=self.model.__class__.__name__, + debug=args["debug"], + ) + self.logger.info(f"Experiment log path in: {args['log_dir']}") + + def _initialize_stats(self): + """初始化统计信息记录器""" + self.stats = TrainingStats(device=self.device) def _run_epoch(self, epoch, dataloader, mode): + """运行一个训练/验证/测试epoch""" # self.tmodel.eval() + # 设置模型模式和是否进行优化 if mode == "train": self.model.train() optimizer_step = True @@ -80,8 +105,10 @@ class Trainer: self.model.eval() optimizer_step = False + # 初始化变量 total_loss = 0 epoch_time = time.time() + y_pred, y_true = [], [] with torch.set_grad_enabled(optimizer_step): with tqdm( @@ -89,15 +116,17 @@ class Trainer: ) as pbar: for batch_idx, (data, target) in enumerate(dataloader): start_time = time.time() + label = target[..., : self.args["output_dim"]] + if self.args["teacher_stu"]: - label = target[..., : self.args["output_dim"]] + # 教师-学生蒸馏模式 output, out_, _ = self.model(data) gout, tout, sout = self.tmodel(data) - - if self.args["real_value"]: - output = self.scaler.inverse_transform(output) - + + # 计算原始loss loss1 = self.loss(output, label) + + # 计算蒸馏相关loss scl = self.loss_cls(out_, sout) kl_loss = nn.KLDivLoss( reduction="batchmean", log_target=True @@ -105,17 +134,42 @@ class Trainer: gout = F.log_softmax(gout, dim=-1).cuda() mlp_emb_ = F.log_softmax(output, dim=-1).cuda() tkloss = kl_loss(mlp_emb_.cuda().float(), gout.cuda().float()) + + # 总loss loss = loss1 + 10 * tkloss + 1 * scl + # 检查output和label的shape是否一致 + if output.shape == label.shape: + print(f"✓ Test passed: output shape {output.shape} matches label shape {label.shape}") + import sys + sys.exit(0) + else: + print(f"✗ Test failed: output shape {output.shape} does not match label shape {label.shape}") + import sys + sys.exit(1) else: - label = target[..., : self.args["output_dim"]] + # 普通训练模式 output, out_, _ = self.model(data) - - if self.args["real_value"]: - output = self.scaler.inverse_transform(output) - loss = self.loss(output, label) + # 检查output和label的shape是否一致 + if output.shape == label.shape: + print(f"✓ Test passed: output shape {output.shape} matches label shape {label.shape}") + import sys + sys.exit(0) + else: + print(f"✗ Test failed: output shape {output.shape} does not match label shape {label.shape}") + import sys + sys.exit(1) + + # 反归一化 + d_output = self.scaler.inverse_transform(output) + d_label = self.scaler.inverse_transform(label) + + # 反归一化的loss + d_loss = self.loss(d_output, d_label) + + # 反向传播和优化(仅在训练模式) if optimizer_step and self.optimizer is not None: self.optimizer.zero_grad() loss.backward() @@ -128,20 +182,34 @@ class Trainer: step_time = time.time() - start_time self.stats.record_step_time(step_time, mode) - total_loss += loss.item() + total_loss += d_loss.item() + + # 累积预测结果 + y_pred.append(d_output.detach().cpu()) + y_true.append(d_label.detach().cpu()) if mode == "train" and (batch_idx + 1) % self.args["log_step"] == 0: self.logger.info( - f"Train Epoch {epoch}: {batch_idx + 1}/{len(dataloader)} Loss: {loss.item():.6f}" + f"Train Epoch {epoch}: {batch_idx + 1}/{len(dataloader)} Loss: {d_loss.item():.6f}" ) # 更新 tqdm 的进度 pbar.update(1) - pbar.set_postfix(loss=loss.item()) + pbar.set_postfix(loss=d_loss.item()) + # 合并所有批次的预测结果 + y_pred = torch.cat(y_pred, dim=0) + y_true = torch.cat(y_true, dim=0) + + # 计算平均损失 avg_loss = total_loss / len(dataloader) + + # 计算并记录指标 + mae, rmse, mape = all_metrics( + y_pred, y_true, self.args["mae_thresh"], self.args["mape_thresh"] + ) self.logger.info( - f"{mode.capitalize()} Epoch {epoch}: average Loss: {avg_loss:.6f}, time: {time.time() - epoch_time:.2f} s" + f"Epoch #{epoch:02d}: {mode.capitalize():<5} MAE:{mae:5.2f} | RMSE:{rmse:5.2f} | MAPE:{mape:7.4f} | Time: {time.time() - epoch_time:.2f} s" ) # 记录内存 self.stats.record_memory_usage() @@ -157,6 +225,7 @@ class Trainer: return self._run_epoch(epoch, self.test_loader, "test") def train(self): + """执行完整的训练流程""" best_model, best_test_model = None, None best_loss, best_test_loss = float("inf"), float("inf") not_improved_count = 0 @@ -182,13 +251,7 @@ class Trainer: else: not_improved_count += 1 - if ( - self.args["early_stop"] - and not_improved_count == self.args["early_stop_patience"] - ): - self.logger.info( - f"Validation performance didn't improve for {self.args['early_stop_patience']} epochs. Training stops." - ) + if self._should_early_stop(not_improved_count): break if test_epoch_loss < best_test_loss: @@ -207,14 +270,25 @@ class Trainer: # 输出统计与参数 self.stats.end_training() self.stats.report(self.logger) - try: - total_params = sum( - p.numel() for p in self.model.parameters() if p.requires_grad - ) - self.logger.info(f"Trainable params: {total_params}") - except Exception: - pass + self._log_model_params() self._finalize_training(best_model, best_test_model) + + def _should_early_stop(self, not_improved_count): + """检查是否满足早停条件""" + if ( + self.args["early_stop"] + and not_improved_count == self.args["early_stop_patience"] + ): + self.logger.info( + f"Validation performance didn't improve for {self.args['early_stop_patience']} epochs. Training stops." + ) + return True + return False + + def _log_model_params(self): + """输出模型可训练参数数量""" + total_params = sum( p.numel() for p in self.model.parameters() if p.requires_grad) + self.logger.info(f"Trainable params: {total_params}") def _finalize_training(self, best_model, best_test_model): self.model.load_state_dict(best_model) @@ -274,48 +348,44 @@ class Trainer: @staticmethod def test(model, args, data_loader, scaler, logger, path=None): + """对模型进行评估并输出性能指标""" + # 加载模型检查点(如果提供了路径) if path: checkpoint = torch.load(path) model.load_state_dict(checkpoint["state_dict"]) - model.to(args["device"]) + model.to(args["basic"]["device"]) + # 设置为评估模式 model.eval() + + # 收集预测和真实标签 y_pred, y_true = [], [] + # 不计算梯度的情况下进行预测 with torch.no_grad(): for data, target in data_loader: label = target[..., : args["output_dim"]] output, _, _ = model(data) - y_pred.append(output) - y_true.append(label) + y_pred.append(output.detach().cpu()) + y_true.append(label.detach().cpu()) - if args["real_value"]: - y_pred = scaler.inverse_transform(torch.cat(y_pred, dim=0)) - else: - y_pred = torch.cat(y_pred, dim=0) - y_true = torch.cat(y_true, dim=0) + # 反归一化 + d_y_pred = scaler.inverse_transform(torch.cat(y_pred, dim=0)) + d_y_true = scaler.inverse_transform(torch.cat(y_true, dim=0)) - # 你在这里需要把y_pred和y_true保存下来 - # torch.save(y_pred, "./test/PEMS07/y_pred_D.pt") # [3566,12,170,1] - # torch.save(y_true, "./test/PEMSD8/y_true.pt") # [3566,12,170,1] - - for t in range(y_true.shape[1]): + # 计算并记录每个时间步的指标 + for t in range(d_y_true.shape[1]): mae, rmse, mape = all_metrics( - y_pred[:, t, ...], - y_true[:, t, ...], + d_y_pred[:, t, ...], + d_y_true[:, t, ...], args["mae_thresh"], args["mape_thresh"], ) - logger.info( - f"Horizon {t + 1:02d}, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}" - ) + logger.info(f"Horizon {t + 1:02d}, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}") - mae, rmse, mape = all_metrics( - y_pred, y_true, args["mae_thresh"], args["mape_thresh"] - ) - logger.info( - f"Average Horizon, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}" - ) + # 计算并记录平均指标 + mae, rmse, mape = all_metrics(d_y_pred, d_y_true, args["mae_thresh"], args["mape_thresh"]) + logger.info( f"Average Horizon, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}") @staticmethod def _compute_sampling_threshold(global_step, k): diff --git a/trainer/TSTrainer.py b/trainer/TSTrainer.py new file mode 100755 index 0000000..11cd431 --- /dev/null +++ b/trainer/TSTrainer.py @@ -0,0 +1,116 @@ +import os, time, copy, torch +from tqdm import tqdm +from utils.logger import get_logger +from utils.loss_function import all_metrics + + +class Trainer: + def __init__(self, model, loss, optimizer, train_loader, val_loader, test_loader, scaler, args, lr_scheduler=None): + self.device, self.args = args["basic"]["device"], args["train"] + self.model, self.loss, self.optimizer, self.lr_scheduler = model.to(self.device), loss, optimizer, lr_scheduler + self.train_loader, self.val_loader, self.test_loader = train_loader, val_loader or test_loader, test_loader + self.scaler = scaler + self.inv = lambda x: torch.cat([s.inverse_transform(x[..., i:i+1]) for i, s in enumerate(self.scaler)], dim=-1) # 对每个维度调用反归一化器后cat + self._init_paths() + self._init_logger() + # ---------- shape magic (replace TSWrapper) ---------- + self.pack = lambda x:(x[..., :-2].permute(0, 2, 1, 3).reshape(-1, x.size(1), x.size(3) - 2), x.shape) + self.unpack = lambda y, s: (y.reshape(s[0], s[2], s[1], -1).permute(0, 2, 1, 3)) + + # ---------------- init ---------------- + def _init_paths(self): + d = self.args["log_dir"] + self.best_path, self.best_test_path = os.path.join(d, "best_model.pth"), os.path.join(d, "best_test_model.pth") + + def _init_logger(self): + if not self.args["debug"]: os.makedirs(self.args["log_dir"], exist_ok=True) + self.logger = get_logger(self.args["log_dir"], name=self.model.__class__.__name__, debug=self.args["debug"]) + + # ---------------- epoch ---------------- + def _run_epoch(self, epoch, loader, mode): + is_train = mode == "train" + self.model.train() if is_train else self.model.eval() + total_loss, start = 0.0, time.time() + y_pred, y_true = [], [] + + with torch.set_grad_enabled(is_train): + bar = tqdm(loader, desc=f"{mode} {epoch}", total=len(loader)) + for data, target in bar: + data, target = data.to(self.device), target.to(self.device) + label = target[..., :self.args["output_dim"]] + x, shp = self.pack(data) + out = self.unpack(self.model(x), shp) + if os.environ.get("TRY") == "True": print(f"{'[✅]' if out.shape == label.shape else '❌'} " + f"out: {out.shape}, label: {label.shape} \n"); assert False + loss = self.loss(out, label) + d_out, d_lbl = self.inv(out), self.inv(label) # 反归一化 + d_loss = self.loss(d_out, d_lbl) + total_loss += d_loss.item() + y_pred.append(d_out.detach().cpu()) + y_true.append(d_lbl.detach().cpu()) + + if is_train and self.optimizer: + self.optimizer.zero_grad() + loss.backward() + if self.args["grad_norm"]: torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.args["max_grad_norm"]) + self.optimizer.step() + bar.set_postfix({"loss": f"{d_loss.item():.4f}"}) + + y_pred, y_true = torch.cat(y_pred), torch.cat(y_true) + mae, rmse, mape = all_metrics(y_pred, y_true, self.args["mae_thresh"], self.args["mape_thresh"]) + self.logger.info(f"Epoch #{epoch:02d} {mode:<5} MAE:{mae:5.2f} RMSE:{rmse:5.2f} MAPE:{mape:7.4f} Time:{time.time()-start:.2f}s") + return total_loss / len(loader) + + # ---------------- train ---------------- + def train(self): + best, best_test = float("inf"), float("inf") + best_w, best_test_w = None, None + patience = 0 + self.logger.info("Training started") + + for epoch in range(1, self.args["epochs"] + 1): + losses = { + "train": self._run_epoch(epoch, self.train_loader, "train"), + "val": self._run_epoch(epoch, self.val_loader, "val"), + "test": self._run_epoch(epoch, self.test_loader, "test"), + } + + if losses["train"] > 1e6: self.logger.warning("Gradient explosion detected"); break + if losses["val"] < best: best, patience, best_w = losses["val"], 0, copy.deepcopy(self.model.state_dict()) + else: patience += 1 + if self.args["early_stop"] and patience == self.args["early_stop_patience"]: break + if losses["test"] < best_test: best_test, best_test_w = losses["test"], copy.deepcopy(self.model.state_dict()) + + if not self.args["debug"]: + torch.save(best_w, self.best_path) + torch.save(best_test_w, self.best_test_path) + self._final_test(best_w, best_test_w) + + # ---------------- final test ---------------- + def _final_test(self, best_w, best_test_w): + for name, w in [("best val", best_w), ("best test", best_test_w)]: + self.model.load_state_dict(w) + self.logger.info(f"Testing on {name} model") + self.evaluate() + + # ---------------- evaluate ---------------- + def evaluate(self): + self.model.eval() + y_pred, y_true = [], [] + + with torch.no_grad(): + for data, target in self.test_loader: + data, target = data.to(self.device), target.to(self.device) + label = target[..., :self.args["output_dim"]] + x, shp = self.pack(data) + out = self.unpack(self.model(x), shp) + y_pred.append(out.cpu()) + y_true.append(label.cpu()) + + d_pred, d_true = self.inv(torch.cat(y_pred)), self.inv(torch.cat(y_true)) # 反归一化 + for t in range(d_true.shape[1]): + mae, rmse, mape = all_metrics(d_pred[:, t], d_true[:, t], self.args["mae_thresh"], self.args["mape_thresh"]) + self.logger.info(f"Horizon {t+1:02d} MAE:{mae:.4f} RMSE:{rmse:.4f} MAPE:{mape:.4f}") + + avg_mae, avg_rmse, avg_mape = all_metrics(d_pred, d_true, self.args["mae_thresh"], self.args["mape_thresh"]) + self.logger.info(f"AVG MAE:{avg_mae:.4f} AVG RMSE:{avg_rmse:.4f} AVG MAPE:{avg_mape:.4f}") diff --git a/trainer/Trainer.py b/trainer/Trainer.py index 85060f1..7036e26 100755 --- a/trainer/Trainer.py +++ b/trainer/Trainer.py @@ -1,375 +1,109 @@ -import math -import os -import time -import copy -import psutil -import torch +import os, time, copy, torch +from tqdm import tqdm from utils.logger import get_logger from utils.loss_function import all_metrics -from tqdm import tqdm - - -class TrainingStats: - """记录训练过程中的统计信息""" - - def __init__(self, device): - self.device = device - self.reset() - - def reset(self): - """重置所有统计数据""" - self.gpu_mem_usage_list = [] - self.cpu_mem_usage_list = [] - self.train_time_list = [] - self.infer_time_list = [] - self.total_iters = 0 - self.start_time = None - self.end_time = None - - def start_training(self): - """记录训练开始时间""" - self.start_time = time.time() - - def end_training(self): - """记录训练结束时间""" - self.end_time = time.time() - - def record_step_time(self, duration, mode): - """记录单步耗时和总迭代次数""" - if mode == "train": - self.train_time_list.append(duration) - else: - self.infer_time_list.append(duration) - self.total_iters += 1 - - def record_memory_usage(self): - """记录当前 GPU 和 CPU 内存占用""" - process = psutil.Process(os.getpid()) - cpu_mem = process.memory_info().rss / (1024**2) - - if torch.cuda.is_available(): - gpu_mem = torch.cuda.max_memory_allocated(device=self.device) / (1024**2) - torch.cuda.reset_peak_memory_stats(device=self.device) - else: - gpu_mem = 0.0 - - self.cpu_mem_usage_list.append(cpu_mem) - self.gpu_mem_usage_list.append(gpu_mem) - - def _calculate_average(self, values_list): - """安全计算平均值,避免除零错误""" - return sum(values_list) / len(values_list) if values_list else 0 - - def report(self, logger): - """在训练结束时输出汇总统计""" - if not self.start_time or not self.end_time: - logger.warning("TrainingStats: start/end time not recorded properly.") - return - - total_time = self.end_time - self.start_time - avg_gpu_mem = self._calculate_average(self.gpu_mem_usage_list) - avg_cpu_mem = self._calculate_average(self.cpu_mem_usage_list) - avg_train_time = self._calculate_average(self.train_time_list) - avg_infer_time = self._calculate_average(self.infer_time_list) - iters_per_sec = self.total_iters / total_time if total_time > 0 else 0 - - logger.info("===== Training Summary =====") - logger.info(f"Total training time: {total_time:.2f} s") - logger.info(f"Total iterations: {self.total_iters}") - logger.info(f"Average iterations per second: {iters_per_sec:.2f}") - logger.info(f"Average GPU Memory Usage: {avg_gpu_mem:.2f} MB") - logger.info(f"Average CPU Memory Usage: {avg_cpu_mem:.2f} MB") - if avg_train_time: - logger.info(f"Average training step time: {avg_train_time * 1000:.2f} ms") - if avg_infer_time: - logger.info(f"Average inference step time: {avg_infer_time * 1000:.2f} ms") - class Trainer: - """模型训练器,负责整个训练流程的管理""" - - def __init__( - self, - model, - loss, - optimizer, - train_loader, - val_loader, - test_loader, - scaler, - args, - lr_scheduler=None, - ): - # 设备和基本参数 - self.device = args["basic"]["device"] - train_args = args["train"] - - # 模型和训练相关组件 - self.model = model - self.loss = loss - self.optimizer = optimizer - self.lr_scheduler = lr_scheduler - - # 数据加载器 - self.train_loader = train_loader - self.val_loader = val_loader - self.test_loader = test_loader - - # 数据处理工具 + def __init__(self, model, loss, optimizer, train_loader, val_loader, test_loader, scaler, args, lr_scheduler=None): + self.device, self.args = args["basic"]["device"], args["train"] + self.model, self.loss, self.optimizer, self.lr_scheduler = model.to(self.device), loss, optimizer, lr_scheduler + self.train_loader, self.val_loader, self.test_loader = train_loader, val_loader or test_loader, test_loader self.scaler = scaler - self.args = train_args - - # 统计信息 - self.train_per_epoch = len(train_loader) - self.val_per_epoch = len(val_loader) if val_loader else 0 + self.inv = lambda x: torch.cat([s.inverse_transform(x[..., i:i+1]) for i, s in enumerate(self.scaler)], dim=-1) # 对每个维度调用反归一化器后cat + self._init_paths() + self._init_logger() - # 初始化路径、日志和统计 - self._initialize_paths(train_args) - self._initialize_logger(train_args) - self._initialize_stats() - - def _initialize_paths(self, args): - """初始化模型保存路径""" - self.best_path = os.path.join(args["log_dir"], "best_model.pth") - self.best_test_path = os.path.join(args["log_dir"], "best_test_model.pth") - self.loss_figure_path = os.path.join(args["log_dir"], "loss.png") - - def _initialize_logger(self, args): - """初始化日志记录器""" - if not os.path.isdir(args["log_dir"]) and not args["debug"]: - os.makedirs(args["log_dir"], exist_ok=True) - self.logger = get_logger( - args["log_dir"], name=self.model.__class__.__name__, debug=args["debug"] - ) - self.logger.info(f"Experiment log path in: {args['log_dir']}") - - def _initialize_stats(self): - """初始化统计信息记录器""" - self.stats = TrainingStats(device=self.device) + # ---------------- init ---------------- + def _init_paths(self): + d = self.args["log_dir"] + self.best_path, self.best_test_path = os.path.join(d, "best_model.pth"), os.path.join(d, "best_test_model.pth") - def _run_epoch(self, epoch, dataloader, mode): - """运行一个训练/验证/测试epoch""" - # 设置模型模式和是否进行优化 - if mode == "train": - self.model.train() - optimizer_step = True - else: - self.model.eval() - optimizer_step = False + def _init_logger(self): + if not self.args["debug"]: os.makedirs(self.args["log_dir"], exist_ok=True) + self.logger = get_logger(self.args["log_dir"], name=self.model.__class__.__name__, debug=self.args["debug"]) - # 初始化变量 - total_loss = 0 - epoch_time = time.time() + # ---------------- epoch ---------------- + def _run_epoch(self, epoch, loader, mode): + is_train = mode == "train" + self.model.train() if is_train else self.model.eval() + total_loss, start = 0.0, time.time() y_pred, y_true = [], [] - # 训练/验证循环 - with torch.set_grad_enabled(optimizer_step): - progress_bar = tqdm( - enumerate(dataloader), - total=len(dataloader), - desc=f"{mode.capitalize()} Epoch {epoch}" - ) - - for _, (data, target) in progress_bar: - # 记录步骤开始时间 - start_time = time.time() + with torch.set_grad_enabled(is_train): + bar = tqdm(loader, desc=f"{mode} {epoch}", total=len(loader)) + for data, target in bar: + data, target = data.to(self.device), target.to(self.device) + label = target[..., :self.args["output_dim"]] + out = self.model(data) + if os.environ.get("TRY") == "True": print(f"{'[✅]' if out.shape == label.shape else '❌'} " + f"out: {out.shape}, label: {label.shape} \n"); assert False + loss = self.loss(out, label) + d_out, d_lbl = self.inv(out), self.inv(label) # 反归一化 + d_loss = self.loss(d_out, d_lbl) + total_loss += d_loss.item() + y_pred.append(d_out.detach().cpu()) + y_true.append(d_lbl.detach().cpu()) - # 前向传播 - label = target[..., : self.args["output_dim"]] - output = self.model(data).to(self.device) - loss = self.loss(output, label) - - # 反归一化 - d_output = self.scaler.inverse_transform(output) - d_label = self.scaler.inverse_transform(label) - - # 反向传播和优化(仅在训练模式) - if optimizer_step and self.optimizer is not None: + if is_train and self.optimizer: self.optimizer.zero_grad() loss.backward() - - # 梯度裁剪(如果需要) - if self.args["grad_norm"]: - torch.nn.utils.clip_grad_norm_( - self.model.parameters(), self.args["max_grad_norm"] - ) + if self.args["grad_norm"]: torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.args["max_grad_norm"]) self.optimizer.step() - - # 反归一化的loss - d_loss = self.loss(d_output, d_label) + bar.set_postfix({"loss": f"{d_loss.item():.4f}"}) - # 记录步骤时间和内存使用 - step_time = time.time() - start_time - self.stats.record_step_time(step_time, mode) - - # 累积损失和预测结果 - total_loss += d_loss.item() - y_pred.append(d_output.detach().cpu()) - y_true.append(d_label.detach().cpu()) - - # 更新进度条 - progress_bar.set_postfix(loss=d_loss.item()) - - # 合并所有批次的预测结果 - y_pred = torch.cat(y_pred, dim=0) - y_true = torch.cat(y_true, dim=0) - - # 计算平均损失 - avg_loss = total_loss / len(dataloader) - - # 计算并记录指标 - mae, rmse, mape = all_metrics( - y_pred, y_true, self.args["mae_thresh"], self.args["mape_thresh"] - ) - self.logger.info( - f"Epoch #{epoch:02d}: {mode.capitalize():<5} MAE:{mae:5.2f} | RMSE:{rmse:5.2f} | MAPE:{mape:7.4f} | Time: {time.time() - epoch_time:.2f} s" - ) - - # 记录内存使用情况 - self.stats.record_memory_usage() - - return avg_loss - - def train_epoch(self, epoch): - return self._run_epoch(epoch, self.train_loader, "train") - - def val_epoch(self, epoch): - return self._run_epoch(epoch, self.val_loader or self.test_loader, "val") - - def test_epoch(self, epoch): - return self._run_epoch(epoch, self.test_loader, "test") + y_pred, y_true = torch.cat(y_pred), torch.cat(y_true) + mae, rmse, mape = all_metrics(y_pred, y_true, self.args["mae_thresh"], self.args["mape_thresh"]) + self.logger.info(f"Epoch #{epoch:02d} {mode:<5} MAE:{mae:5.2f} RMSE:{rmse:5.2f} MAPE:{mape:7.4f} Time:{time.time()-start:.2f}s") + return total_loss / len(loader) + # ---------------- train ---------------- def train(self): - """执行完整的训练流程""" - # 初始化最佳模型和损失记录 - best_model, best_test_model = None, None - best_loss, best_test_loss = float("inf"), float("inf") - not_improved_count = 0 + best, best_test = float("inf"), float("inf") + best_w, best_test_w = None, None + patience = 0 + self.logger.info("Training started") - # 开始训练 - self.stats.start_training() - self.logger.info("Training process started") - - # 训练循环 for epoch in range(1, self.args["epochs"] + 1): - # 训练、验证和测试一个epoch - train_epoch_loss = self.train_epoch(epoch) - val_epoch_loss = self.val_epoch(epoch) - test_epoch_loss = self.test_epoch(epoch) + losses = { + "train": self._run_epoch(epoch, self.train_loader, "train"), + "val": self._run_epoch(epoch, self.val_loader, "val"), + "test": self._run_epoch(epoch, self.test_loader, "test"), + } - # 检查梯度爆炸 - if train_epoch_loss > 1e6: - self.logger.warning("Gradient explosion detected. Ending...") - break + if losses["train"] > 1e6: self.logger.warning("Gradient explosion detected"); break + if losses["val"] < best: best, patience, best_w = losses["val"], 0, copy.deepcopy(self.model.state_dict()) + else: patience += 1 + if self.args["early_stop"] and patience == self.args["early_stop_patience"]: break + if losses["test"] < best_test: best_test, best_test_w = losses["test"], copy.deepcopy(self.model.state_dict()) - # 更新最佳验证模型 - if val_epoch_loss < best_loss: - best_loss = val_epoch_loss - not_improved_count = 0 - best_model = copy.deepcopy(self.model.state_dict()) - self.logger.info("Best validation model saved!") - else: - not_improved_count += 1 - - # 检查早停条件 - if self._should_early_stop(not_improved_count): - break - - # 更新最佳测试模型 - if test_epoch_loss < best_test_loss: - best_test_loss = test_epoch_loss - best_test_model = copy.deepcopy(self.model.state_dict()) - - # 保存最佳模型 if not self.args["debug"]: - self._save_best_models(best_model, best_test_model) + torch.save(best_w, self.best_path) + torch.save(best_test_w, self.best_test_path) + self._final_test(best_w, best_test_w) - # 结束训练并输出统计信息 - self.stats.end_training() - self.stats.report(self.logger) + # ---------------- final test ---------------- + def _final_test(self, best_w, best_test_w): + for name, w in [("best val", best_w), ("best test", best_test_w)]: + self.model.load_state_dict(w) + self.logger.info(f"Testing on {name} model") + self.evaluate() - # 最终评估 - self._finalize_training(best_model, best_test_model) - - # 输出模型参数量 - self._log_model_params() - - def _should_early_stop(self, not_improved_count): - """检查是否满足早停条件""" - if ( - self.args["early_stop"] - and not_improved_count == self.args["early_stop_patience"] - ): - self.logger.info( - f"Validation performance didn't improve for {self.args['early_stop_patience']} epochs. Training stops." - ) - return True - return False - - def _save_best_models(self, best_model, best_test_model): - """保存最佳模型到文件""" - torch.save(best_model, self.best_path) - torch.save(best_test_model, self.best_test_path) - self.logger.info( - f"Best models saved at {self.best_path} and {self.best_test_path}" - ) - - def _log_model_params(self): - """输出模型可训练参数数量""" - total_params = sum( p.numel() for p in self.model.parameters() if p.requires_grad) - self.logger.info(f"Trainable params: {total_params}") - - - def _finalize_training(self, best_model, best_test_model): - self.model.load_state_dict(best_model) - self.logger.info("Testing on best validation model") - self.test(self.model, self.args, self.test_loader, self.scaler, self.logger) - - self.model.load_state_dict(best_test_model) - self.logger.info("Testing on best test model") - self.test(self.model, self.args, self.test_loader, self.scaler, self.logger) - - @staticmethod - def test(model, args, data_loader, scaler, logger, path=None): - """对模型进行评估并输出性能指标""" - # 加载模型检查点(如果提供了路径) - if path: - checkpoint = torch.load(path) - model.load_state_dict(checkpoint["state_dict"]) - model.to(args["basic"]["device"]) - - # 设置为评估模式 - model.eval() - - # 收集预测和真实标签 + # ---------------- evaluate ---------------- + def evaluate(self): + self.model.eval() y_pred, y_true = [], [] - # 不计算梯度的情况下进行预测 with torch.no_grad(): - for data, target in data_loader: - label = target[..., : args["output_dim"]] - output = model(data) - y_pred.append(output.detach().cpu()) - y_true.append(label.detach().cpu()) + for data, target in self.test_loader: + data, target = data.to(self.device), target.to(self.device) + label = target[..., :self.args["output_dim"]] + y_pred.append(self.model(data).cpu()) + y_true.append(label.cpu()) - - d_y_pred = scaler.inverse_transform(torch.cat(y_pred, dim=0)) - d_y_true = scaler.inverse_transform(torch.cat(y_true, dim=0)) + d_pred, d_true = self.inv(torch.cat(y_pred)), self.inv(torch.cat(y_true)) # 反归一化 + for t in range(d_true.shape[1]): + mae, rmse, mape = all_metrics(d_pred[:, t], d_true[:, t], self.args["mae_thresh"], self.args["mape_thresh"]) + self.logger.info(f"Horizon {t+1:02d} MAE:{mae:.4f} RMSE:{rmse:.4f} MAPE:{mape:.4f}") - # 计算并记录每个时间步的指标 - for t in range(d_y_true.shape[1]): - mae, rmse, mape = all_metrics( - d_y_pred[:, t, ...], - d_y_true[:, t, ...], - args["mae_thresh"], - args["mape_thresh"], - ) - logger.info(f"Horizon {t + 1:02d}, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}") - - # 计算并记录平均指标 - mae, rmse, mape = all_metrics(d_y_pred, d_y_true, args["mae_thresh"], args["mape_thresh"]) - logger.info( f"Average Horizon, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}") - - @staticmethod - def _compute_sampling_threshold(global_step, k): - return k / (k + math.exp(global_step / k)) + avg_mae, avg_rmse, avg_mape = all_metrics(d_pred, d_true, self.args["mae_thresh"], self.args["mape_thresh"]) + self.logger.info(f"AVG MAE:{avg_mae:.4f} AVG RMSE:{avg_rmse:.4f} AVG MAPE:{avg_mape:.4f}") diff --git a/trainer/Trainer_bk.py b/trainer/Trainer_bk.py new file mode 100755 index 0000000..ee6e388 --- /dev/null +++ b/trainer/Trainer_bk.py @@ -0,0 +1,420 @@ +import math +import os +import time +import copy +import psutil +import torch +from utils.logger import get_logger +from utils.loss_function import all_metrics +from tqdm import tqdm + + +# class TrainingStats: +# """记录训练过程中的统计信息""" + +# def __init__(self, device): +# self.device = device +# self.reset() + +# def reset(self): +# """重置所有统计数据""" +# self.gpu_mem_usage_list = [] +# self.cpu_mem_usage_list = [] +# self.train_time_list = [] +# self.infer_time_list = [] +# self.total_iters = 0 +# self.start_time = None +# self.end_time = None + +# def start_training(self): +# """记录训练开始时间""" +# self.start_time = time.time() + +# def end_training(self): +# """记录训练结束时间""" +# self.end_time = time.time() + +# def record_step_time(self, duration, mode): +# """记录单步耗时和总迭代次数""" +# if mode == "train": +# self.train_time_list.append(duration) +# else: +# self.infer_time_list.append(duration) +# self.total_iters += 1 + +# def record_memory_usage(self): +# """记录当前 GPU 和 CPU 内存占用""" +# process = psutil.Process(os.getpid()) +# cpu_mem = process.memory_info().rss / (1024**2) + +# if torch.cuda.is_available(): +# gpu_mem = torch.cuda.max_memory_allocated(device=self.device) / (1024**2) +# torch.cuda.reset_peak_memory_stats(device=self.device) +# else: +# gpu_mem = 0.0 + +# self.cpu_mem_usage_list.append(cpu_mem) +# self.gpu_mem_usage_list.append(gpu_mem) + +# def _calculate_average(self, values_list): +# """安全计算平均值,避免除零错误""" +# return sum(values_list) / len(values_list) if values_list else 0 + +# def report(self, logger): +# """在训练结束时输出汇总统计""" +# if not self.start_time or not self.end_time: +# logger.warning("TrainingStats: start/end time not recorded properly.") +# return + +# total_time = self.end_time - self.start_time +# avg_gpu_mem = self._calculate_average(self.gpu_mem_usage_list) +# avg_cpu_mem = self._calculate_average(self.cpu_mem_usage_list) +# avg_train_time = self._calculate_average(self.train_time_list) +# avg_infer_time = self._calculate_average(self.infer_time_list) +# iters_per_sec = self.total_iters / total_time if total_time > 0 else 0 + +# logger.info("===== Training Summary =====") +# logger.info(f"Total training time: {total_time:.2f} s") +# logger.info(f"Total iterations: {self.total_iters}") +# logger.info(f"Average iterations per second: {iters_per_sec:.2f}") +# logger.info(f"Average GPU Memory Usage: {avg_gpu_mem:.2f} MB") +# logger.info(f"Average CPU Memory Usage: {avg_cpu_mem:.2f} MB") +# if avg_train_time: +# logger.info(f"Average training step time: {avg_train_time * 1000:.2f} ms") +# if avg_infer_time: +# logger.info(f"Average inference step time: {avg_infer_time * 1000:.2f} ms") + + +class Trainer: + """模型训练器,负责整个训练流程的管理""" + + def __init__( + self, + model, + loss, + optimizer, + train_loader, + val_loader, + test_loader, + scaler, + args, + lr_scheduler=None, + ): + # 设备和基本参数 + self.device = args["basic"]["device"] + self.config = args # 保存完整的配置参数 + train_args = args["train"] + + # 模型和训练相关组件 + self.model = model + self.loss = loss + self.optimizer = optimizer + self.lr_scheduler = lr_scheduler + + # 数据加载器 + self.train_loader = train_loader + self.val_loader = val_loader + self.test_loader = test_loader + + # 数据处理工具 + self.scaler = scaler + self.args = train_args + + # 统计信息 + # self.train_per_epoch = len(train_loader) + # self.val_per_epoch = len(val_loader) if val_loader else 0 + + # 初始化路径、日志和统计 + self._initialize_paths(train_args) + self._initialize_logger(train_args) + self._initialize_stats() + + def _initialize_paths(self, args): + """初始化模型保存路径""" + self.best_path = os.path.join(args["log_dir"], "best_model.pth") + self.best_test_path = os.path.join(args["log_dir"], "best_test_model.pth") + self.loss_figure_path = os.path.join(args["log_dir"], "loss.png") + + def _initialize_logger(self, args): + """初始化日志记录器""" + if not os.path.isdir(args["log_dir"]) and not args["debug"]: + os.makedirs(args["log_dir"], exist_ok=True) + self.logger = get_logger( + args["log_dir"], name=self.model.__class__.__name__, debug=args["debug"] + ) + self.logger.info(f"Experiment log path in: {args['log_dir']}") + + # def _initialize_stats(self): + # """初始化统计信息记录器""" + # self.stats = TrainingStats(device=self.device) + + def _run_epoch(self, epoch, dataloader, mode): + """运行一个训练/验证/测试epoch""" + # 设置模型模式和是否进行优化 + if mode == "train": + self.model.train() + optimizer_step = True + else: + self.model.eval() + optimizer_step = False + + # 初始化变量 + total_loss = 0 + epoch_time = time.time() + y_pred, y_true = [], [] + + # 训练/验证循环 + with torch.set_grad_enabled(optimizer_step): + progress_bar = tqdm( + enumerate(dataloader), + total=len(dataloader), + desc=f"{mode.capitalize()} Epoch {epoch}" + ) + + for _, (data, target) in progress_bar: + # 记录步骤开始时间 + start_time = time.time() + + # 将数据和标签移动到指定设备 + data = data.to(self.device) + target = target.to(self.device) + + # 前向传播 + label = target[..., : self.args["output_dim"]] + output = self.model(data) + # if output.shape != label.shape: + # import sys + # print(f"[Wrong]: Output shape: {output.shape}, Label shape: {label.shape}") + # sys.exit(1) + # else: + # import sys + # print(f"[Right]: Output shape: {output.shape}, Label shape: {label.shape}") + # sys.exit(0) + loss = self.loss(output, label) + + # 反归一化 + d_output = self.scaler.inverse_transform(output) + d_label = self.scaler.inverse_transform(label) + + # 反向传播和优化(仅在训练模式) + if optimizer_step and self.optimizer is not None: + self.optimizer.zero_grad() + loss.backward() + + # 梯度裁剪(如果需要) + if self.args["grad_norm"]: + torch.nn.utils.clip_grad_norm_( + self.model.parameters(), self.args["max_grad_norm"] + ) + self.optimizer.step() + + # 反归一化的loss + d_loss = self.loss(d_output, d_label) + + # 记录步骤时间和内存使用 + # step_time = time.time() - start_time + # self.stats.record_step_time(step_time, mode) + + # 累积损失和预测结果 + total_loss += d_loss.item() + y_pred.append(d_output.detach().cpu()) + y_true.append(d_label.detach().cpu()) + + # 更新进度条 + progress_bar.set_postfix(loss=d_loss.item()) + + # 合并所有批次的预测结果 + y_pred = torch.cat(y_pred, dim=0) + y_true = torch.cat(y_true, dim=0) + + # 计算平均损失 + avg_loss = total_loss / len(dataloader) + + # 计算并记录指标 + mae, rmse, mape = all_metrics( + y_pred, y_true, self.args["mae_thresh"], self.args["mape_thresh"] + ) + self.logger.info( + f"Epoch #{epoch:02d}: {mode.capitalize():<5} MAE:{mae:5.2f} | RMSE:{rmse:5.2f} | MAPE:{mape:7.4f} | Time: {time.time() - epoch_time:.2f} s" + ) + + # 记录内存使用情况 + # self.stats.record_memory_usage() + + return avg_loss + + def train_epoch(self, epoch): + return self._run_epoch(epoch, self.train_loader, "train") + + def val_epoch(self, epoch): + return self._run_epoch(epoch, self.val_loader or self.test_loader, "val") + + def test_epoch(self, epoch): + return self._run_epoch(epoch, self.test_loader, "test") + + def train(self): + """执行完整的训练流程""" + # 初始化最佳模型和损失记录 + best_model, best_test_model = None, None + best_loss, best_test_loss = float("inf"), float("inf") + not_improved_count = 0 + + # 开始训练 + # self.stats.start_training() + self.logger.info("Training process started") + + # 训练循环 + for epoch in range(1, self.args["epochs"] + 1): + # 训练、验证和测试一个epoch + train_epoch_loss = self.train_epoch(epoch) + val_epoch_loss = self.val_epoch(epoch) + test_epoch_loss = self.test_epoch(epoch) + + # 检查梯度爆炸 + if train_epoch_loss > 1e6: + self.logger.warning("Gradient explosion detected. Ending...") + break + + # 更新最佳验证模型 + if val_epoch_loss < best_loss: + best_loss = val_epoch_loss + not_improved_count = 0 + best_model = copy.deepcopy(self.model.state_dict()) + self.logger.info("Best validation model saved!") + else: + not_improved_count += 1 + + # 检查早停条件 + if self._should_early_stop(not_improved_count): + break + + # 更新最佳测试模型 + if test_epoch_loss < best_test_loss: + best_test_loss = test_epoch_loss + best_test_model = copy.deepcopy(self.model.state_dict()) + + # 保存最佳模型 + if not self.args["debug"]: + self._save_best_models(best_model, best_test_model) + + # 结束训练并输出统计信息 + # self.stats.end_training() + # self.stats.report(self.logger) + + # 最终评估 + self._finalize_training(best_model, best_test_model) + + # 输出模型参数量 + self._log_model_params() + + def _should_early_stop(self, not_improved_count): + """检查是否满足早停条件""" + if ( + self.args["early_stop"] + and not_improved_count == self.args["early_stop_patience"] + ): + self.logger.info( + f"Validation performance didn't improve for {self.args['early_stop_patience']} epochs. Training stops." + ) + return True + return False + + def _save_best_models(self, best_model, best_test_model): + """保存最佳模型到文件""" + torch.save(best_model, self.best_path) + torch.save(best_test_model, self.best_test_path) + self.logger.info( + f"Best models saved at {self.best_path} and {self.best_test_path}" + ) + + def _log_model_params(self): + """输出模型可训练参数数量""" + total_params = sum( p.numel() for p in self.model.parameters() if p.requires_grad) + self.logger.info(f"Trainable params: {total_params}") + + + def _finalize_training(self, best_model, best_test_model): + self.model.load_state_dict(best_model) + self.logger.info("Testing on best validation model") + self.test(self.model, self.config, self.test_loader, self.scaler, self.logger) + + self.model.load_state_dict(best_test_model) + self.logger.info("Testing on best test model") + self.test(self.model, self.config, self.test_loader, self.scaler, self.logger) + + @staticmethod + def test(model, args, data_loader, scaler, logger, path=None): + """对模型进行评估并输出性能指标""" + # 确定设备信息 + device = None + output_dim = None + + # 处理不同的参数格式 + if isinstance(args, dict): + if "basic" in args: + # 完整配置情况 + device = args["basic"]["device"] + output_dim = args["train"]["output_dim"] + else: + # 只有train_args情况 + # 从模型获取设备 + device = next(model.parameters()).device + output_dim = args["output_dim"] + else: + raise ValueError(f"Unsupported args type: {type(args)}") + + # 加载模型检查点(如果提供了路径) + if path: + checkpoint = torch.load(path) + model.load_state_dict(checkpoint["state_dict"]) + model.to(device) + + # 设置为评估模式 + model.eval() + + # 收集预测和真实标签 + y_pred, y_true = [], [] + + # 不计算梯度的情况下进行预测 + with torch.no_grad(): + for data, target in data_loader: + # 将数据和标签移动到指定设备 + data = data.to(device) + target = target.to(device) + + label = target[..., : output_dim] + output = model(data) + y_pred.append(output.detach().cpu()) + y_true.append(label.detach().cpu()) + + + d_y_pred = scaler.inverse_transform(torch.cat(y_pred, dim=0)) + d_y_true = scaler.inverse_transform(torch.cat(y_true, dim=0)) + + # 获取metrics参数 + if "basic" in args: + # 完整配置情况 + mae_thresh = args["train"]["mae_thresh"] + mape_thresh = args["train"]["mape_thresh"] + else: + # 只有train_args情况 + mae_thresh = args["mae_thresh"] + mape_thresh = args["mape_thresh"] + + # 计算并记录每个时间步的指标 + for t in range(d_y_true.shape[1]): + mae, rmse, mape = all_metrics( + d_y_pred[:, t, ...], + d_y_true[:, t, ...], + mae_thresh, + mape_thresh, + ) + logger.info(f"Horizon {t + 1:02d}, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}") + + # 计算并记录平均指标 + mae, rmse, mape = all_metrics(d_y_pred, d_y_true, mae_thresh, mape_thresh) + logger.info( f"Average Horizon, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}") + + @staticmethod + def _compute_sampling_threshold(global_step, k): + return k / (k + math.exp(global_step / k)) diff --git a/trainer/Trainer_old.py b/trainer/Trainer_old.py deleted file mode 100755 index bd49b29..0000000 --- a/trainer/Trainer_old.py +++ /dev/null @@ -1,229 +0,0 @@ -import math -import os -import time -import copy -from tqdm import tqdm - -import torch -from utils.logger import get_logger -from utils.loss_function import all_metrics -from utils.training_stats import TrainingStats - - -class Trainer: - def __init__( - self, - model, - loss, - optimizer, - train_loader, - val_loader, - test_loader, - scaler, - args, - lr_scheduler=None, - ): - self.model = model - self.loss = loss - self.optimizer = optimizer - self.train_loader = train_loader - self.val_loader = val_loader - self.test_loader = test_loader - self.scaler = scaler - self.args = args - self.lr_scheduler = lr_scheduler - self.train_per_epoch = len(train_loader) - self.val_per_epoch = len(val_loader) if val_loader else 0 - - # Paths for saving models and logs - self.best_path = os.path.join(args["log_dir"], "best_model.pth") - self.best_test_path = os.path.join(args["log_dir"], "best_test_model.pth") - self.loss_figure_path = os.path.join(args["log_dir"], "loss.png") - - # Initialize logger - if not os.path.isdir(args["log_dir"]) and not args["debug"]: - os.makedirs(args["log_dir"], exist_ok=True) - self.logger = get_logger( - args["log_dir"], name=self.model.__class__.__name__, debug=args["debug"] - ) - self.logger.info(f"Experiment log path in: {args['log_dir']}") - # Stats tracker - self.stats = TrainingStats(device=args["device"]) - - def _run_epoch(self, epoch, dataloader, mode): - if mode == "train": - self.model.train() - optimizer_step = True - else: - self.model.eval() - optimizer_step = False - - total_loss = 0 - epoch_time = time.time() - - with torch.set_grad_enabled(optimizer_step): - with tqdm( - total=len(dataloader), desc=f"{mode.capitalize()} Epoch {epoch}" - ) as pbar: - for batch_idx, (data, target) in enumerate(dataloader): - start_time = time.time() - label = target[..., : self.args["output_dim"]] - output = self.model(data).to(self.args["device"]) - - if self.args["real_value"]: - output = self.scaler.inverse_transform(output) - - loss = self.loss(output, label) - if optimizer_step and self.optimizer is not None: - self.optimizer.zero_grad() - loss.backward() - - if self.args["grad_norm"]: - torch.nn.utils.clip_grad_norm_( - self.model.parameters(), self.args["max_grad_norm"] - ) - self.optimizer.step() - - step_time = time.time() - start_time - self.stats.record_step_time(step_time, mode) - total_loss += loss.item() - - if mode == "train" and (batch_idx + 1) % self.args["log_step"] == 0: - self.logger.info( - f"Train Epoch {epoch}: {batch_idx + 1}/{len(dataloader)} Loss: {loss.item():.6f}" - ) - - # 更新 tqdm 的进度 - pbar.update(1) - pbar.set_postfix(loss=loss.item()) - - avg_loss = total_loss / len(dataloader) - self.logger.info( - f"{mode.capitalize()} Epoch {epoch}: average Loss: {avg_loss:.6f}, time: {time.time() - epoch_time:.2f} s" - ) - # 记录内存 - self.stats.record_memory_usage() - return avg_loss - - def train_epoch(self, epoch): - return self._run_epoch(epoch, self.train_loader, "train") - - def val_epoch(self, epoch): - return self._run_epoch(epoch, self.val_loader or self.test_loader, "val") - - def test_epoch(self, epoch): - return self._run_epoch(epoch, self.test_loader, "test") - - def train(self): - best_model, best_test_model = None, None - best_loss, best_test_loss = float("inf"), float("inf") - not_improved_count = 0 - - self.stats.start_training() - self.logger.info("Training process started") - for epoch in range(1, self.args["epochs"] + 1): - train_epoch_loss = self.train_epoch(epoch) - val_epoch_loss = self.val_epoch(epoch) - test_epoch_loss = self.test_epoch(epoch) - - if train_epoch_loss > 1e6: - self.logger.warning("Gradient explosion detected. Ending...") - break - - if val_epoch_loss < best_loss: - best_loss = val_epoch_loss - not_improved_count = 0 - best_model = copy.deepcopy(self.model.state_dict()) - self.logger.info("Best validation model saved!") - else: - not_improved_count += 1 - - if ( - self.args["early_stop"] - and not_improved_count == self.args["early_stop_patience"] - ): - self.logger.info( - f"Validation performance didn't improve for {self.args['early_stop_patience']} epochs. Training stops." - ) - break - - if test_epoch_loss < best_test_loss: - best_test_loss = test_epoch_loss - best_test_model = copy.deepcopy(self.model.state_dict()) - - if not self.args["debug"]: - torch.save(best_model, self.best_path) - torch.save(best_test_model, self.best_test_path) - self.logger.info( - f"Best models saved at {self.best_path} and {self.best_test_path}" - ) - - # 输出统计与参数 - self.stats.end_training() - self.stats.report(self.logger) - try: - total_params = sum( - p.numel() for p in self.model.parameters() if p.requires_grad - ) - self.logger.info(f"Trainable params: {total_params}") - except Exception: - pass - self._finalize_training(best_model, best_test_model) - - def _finalize_training(self, best_model, best_test_model): - self.model.load_state_dict(best_model) - self.logger.info("Testing on best validation model") - self.test(self.model, self.args, self.test_loader, self.scaler, self.logger) - - self.model.load_state_dict(best_test_model) - self.logger.info("Testing on best test model") - self.test(self.model, self.args, self.test_loader, self.scaler, self.logger) - - @staticmethod - def test(model, args, data_loader, scaler, logger, path=None): - if path: - checkpoint = torch.load(path) - model.load_state_dict(checkpoint["state_dict"]) - model.to(args["device"]) - - model.eval() - y_pred, y_true = [], [] - - with torch.no_grad(): - for data, target in data_loader: - label = target[..., : args["output_dim"]] - output = model(data) - y_pred.append(output) - y_true.append(label) - - if args["real_value"]: - y_pred = scaler.inverse_transform(torch.cat(y_pred, dim=0)) - else: - y_pred = torch.cat(y_pred, dim=0) - y_true = torch.cat(y_true, dim=0) - - # 你在这里需要把y_pred和y_true保存下来 - # torch.save(y_pred, "./test/PEMS07/y_pred_D.pt") # [3566,12,170,1] - # torch.save(y_true, "./test/PEMS08/y_true.pt") # [3566,12,170,1] - - for t in range(y_true.shape[1]): - mae, rmse, mape = all_metrics( - y_pred[:, t, ...], - y_true[:, t, ...], - args["mae_thresh"], - args["mape_thresh"], - ) - logger.info( - f"Horizon {t + 1:02d}, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}" - ) - - mae, rmse, mape = all_metrics( - y_pred, y_true, args["mae_thresh"], args["mape_thresh"] - ) - logger.info( - f"Average Horizon, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}" - ) - - @staticmethod - def _compute_sampling_threshold(global_step, k): - return k / (k + math.exp(global_step / k)) diff --git a/trainer/cdeTrainer/__init__.py b/trainer/cdeTrainer/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/trainer/cdeTrainer/cdetrainer.py b/trainer/cdeTrainer/cdetrainer.py index 5678a7c..cda4a10 100755 --- a/trainer/cdeTrainer/cdetrainer.py +++ b/trainer/cdeTrainer/cdetrainer.py @@ -25,37 +25,60 @@ class Trainer: times, w, ): + # 设备和基本参数 + self.device = args["basic"]["device"] + train_args = args["train"] + + # 模型和训练相关组件 self.model = model self.loss = loss self.optimizer = optimizer + self.lr_scheduler = lr_scheduler + + # 数据加载器 self.train_loader = train_loader self.val_loader = val_loader self.test_loader = test_loader + + # 数据处理工具 self.scaler = scaler - self.args = args - self.lr_scheduler = lr_scheduler + self.args = train_args + + # 统计信息 self.train_per_epoch = len(train_loader) self.val_per_epoch = len(val_loader) if val_loader else 0 - self.device = args["device"] - - # Paths for saving models and logs + + # 初始化路径、日志和统计 + self._initialize_paths(train_args) + self._initialize_logger(train_args) + self._initialize_stats() + + # 模型特定参数 + self.times = times.to(self.device, dtype=torch.float) + self.w = w + + def _initialize_paths(self, args): + """初始化模型保存路径""" self.best_path = os.path.join(args["log_dir"], "best_model.pth") self.best_test_path = os.path.join(args["log_dir"], "best_test_model.pth") self.loss_figure_path = os.path.join(args["log_dir"], "loss.png") - - # Initialize logger + + def _initialize_logger(self, args): + """初始化日志记录器""" if not os.path.isdir(args["log_dir"]) and not args["debug"]: os.makedirs(args["log_dir"], exist_ok=True) self.logger = get_logger( args["log_dir"], name=self.model.__class__.__name__, debug=args["debug"] ) self.logger.info(f"Experiment log path in: {args['log_dir']}") - # Stats tracker - self.stats = TrainingStats(device=args["device"]) - self.times = times.to(self.device, dtype=torch.float) - self.w = w + + def _initialize_stats(self): + """初始化统计信息记录器""" + self.stats = TrainingStats(device=self.device) def _run_epoch(self, epoch, dataloader, mode): + """运行一个训练/验证/测试epoch""" + # 设置模型模式和是否进行优化 if mode == "train": self.model.train() optimizer_step = True @@ -63,53 +86,94 @@ class Trainer: self.model.eval() optimizer_step = False + # 初始化变量 total_loss = 0 epoch_time = time.time() + y_pred, y_true = [], [] with torch.set_grad_enabled(optimizer_step): - with tqdm( - total=len(dataloader), desc=f"{mode.capitalize()} Epoch {epoch}" - ) as pbar: - for batch_idx, batch in enumerate(dataloader): - start_time = time.time() - batch = tuple(b.to(self.device, dtype=torch.float) for b in batch) - *train_coeffs, target = batch - label = target[..., : self.args["output_dim"]] - output = self.model(self.times, train_coeffs) + progress_bar = tqdm( + enumerate(dataloader), + total=len(dataloader), + desc=f"{mode.capitalize()} Epoch {epoch}" + ) + + for batch_idx, batch in progress_bar: + start_time = time.time() + batch = tuple(b.to(self.device, dtype=torch.float) for b in batch) + *train_coeffs, target = batch + label = target[..., : self.args["output_dim"]] + + # 前向传播 + output = self.model(self.times, train_coeffs) + + # 计算原始loss + loss = self.loss(output, label) - # if self.args['real_value']: - # output = self.scaler.inverse_transform(output) + # 检查output和label的shape是否一致 + if output.shape == label.shape: + print(f"✓ Test passed: output shape {output.shape} matches label shape {label.shape}") + import sys + sys.exit(0) + else: + print(f"✗ Test failed: output shape {output.shape} does not match label shape {label.shape}") + import sys + sys.exit(1) - loss = self.loss(output, label) - if optimizer_step and self.optimizer is not None: - self.optimizer.zero_grad() - loss.backward() + # 反归一化 + d_output = self.scaler.inverse_transform(output) + d_label = self.scaler.inverse_transform(label) - if self.args["grad_norm"]: - torch.nn.utils.clip_grad_norm_( - self.model.parameters(), self.args["max_grad_norm"] - ) - self.optimizer.step() + # 反归一化的loss + d_loss = self.loss(d_output, d_label) - step_time = time.time() - start_time - self.stats.record_step_time(step_time, mode) - total_loss += loss.item() + # 反向传播和优化(仅在训练模式) + if optimizer_step and self.optimizer is not None: + self.optimizer.zero_grad() + loss.backward() - if mode == "train" and (batch_idx + 1) % self.args["log_step"] == 0: - self.logger.info( - f"Train Epoch {epoch}: {batch_idx + 1}/{len(dataloader)} Loss: {loss.item():.6f}" + if self.args["grad_norm"]: + torch.nn.utils.clip_grad_norm_( + self.model.parameters(), self.args["max_grad_norm"] ) + self.optimizer.step() - # 更新 tqdm 的进度 - pbar.update(1) - pbar.set_postfix(loss=loss.item()) + # 记录步骤时间 + step_time = time.time() - start_time + self.stats.record_step_time(step_time, mode) + total_loss += d_loss.item() + # 累积预测结果 + y_pred.append(d_output.detach().cpu()) + y_true.append(d_label.detach().cpu()) + + if mode == "train" and (batch_idx + 1) % self.args["log_step"] == 0: + self.logger.info( + f"Train Epoch {epoch}: {batch_idx + 1}/{len(dataloader)} Loss: {d_loss.item():.6f}" + ) + + # 更新 tqdm 的进度 + progress_bar.update(1) + progress_bar.set_postfix(loss=d_loss.item()) + + # 合并所有批次的预测结果 + y_pred = torch.cat(y_pred, dim=0) + y_true = torch.cat(y_true, dim=0) + + # 计算平均损失 avg_loss = total_loss / len(dataloader) - self.logger.info( - f"{mode.capitalize()} Epoch {epoch}: average Loss: {avg_loss:.6f}, time: {time.time() - epoch_time:.2f} s" + + # 计算并记录指标 + mae, rmse, mape = all_metrics( + y_pred, y_true, self.args["mae_thresh"], self.args["mape_thresh"] ) - # 记录内存 + self.logger.info( + f"Epoch #{epoch:02d}: {mode.capitalize():<5} MAE:{mae:5.2f} | RMSE:{rmse:5.2f} | MAPE:{mape:7.4f} | Time: {time.time() - epoch_time:.2f} s" + ) + + # 记录内存使用情况 self.stats.record_memory_usage() + return avg_loss def train_epoch(self, epoch): @@ -122,21 +186,29 @@ class Trainer: return self._run_epoch(epoch, self.test_loader, "test") def train(self): + """执行完整的训练流程""" + # 初始化最佳模型和损失记录 best_model, best_test_model = None, None best_loss, best_test_loss = float("inf"), float("inf") not_improved_count = 0 + # 开始训练 self.stats.start_training() self.logger.info("Training process started") + + # 训练循环 for epoch in range(1, self.args["epochs"] + 1): + # 训练、验证和测试一个epoch train_epoch_loss = self.train_epoch(epoch) val_epoch_loss = self.val_epoch(epoch) test_epoch_loss = self.test_epoch(epoch) + # 检查梯度爆炸 if train_epoch_loss > 1e6: self.logger.warning("Gradient explosion detected. Ending...") break + # 更新最佳验证模型 if val_epoch_loss < best_loss: best_loss = val_epoch_loss not_improved_count = 0 @@ -145,37 +217,54 @@ class Trainer: else: not_improved_count += 1 - if ( - self.args["early_stop"] - and not_improved_count == self.args["early_stop_patience"] - ): - self.logger.info( - f"Validation performance didn't improve for {self.args['early_stop_patience']} epochs. Training stops." - ) + # 检查早停条件 + if self._should_early_stop(not_improved_count): break + # 更新最佳测试模型 if test_epoch_loss < best_test_loss: best_test_loss = test_epoch_loss best_test_model = copy.deepcopy(self.model.state_dict()) + # 保存最佳模型 if not self.args["debug"]: - torch.save(best_model, self.best_path) - torch.save(best_test_model, self.best_test_path) - self.logger.info( - f"Best models saved at {self.best_path} and {self.best_test_path}" - ) + self._save_best_models(best_model, best_test_model) - # 输出统计与参数 + # 结束训练并输出统计信息 self.stats.end_training() self.stats.report(self.logger) - try: - total_params = sum( - p.numel() for p in self.model.parameters() if p.requires_grad - ) - self.logger.info(f"Trainable params: {total_params}") - except Exception: - pass + + # 输出模型参数量 + self._log_model_params() + + # 最终评估 self._finalize_training(best_model, best_test_model) + + def _should_early_stop(self, not_improved_count): + """检查是否满足早停条件""" + if ( + self.args["early_stop"] + and not_improved_count == self.args["early_stop_patience"] + ): + self.logger.info( + f"Validation performance didn't improve for {self.args['early_stop_patience']} epochs. Training stops." + ) + return True + return False + + def _save_best_models(self, best_model, best_test_model): + """保存最佳模型到文件""" + torch.save(best_model, self.best_path) + torch.save(best_test_model, self.best_test_path) + self.logger.info( + f"Best models saved at {self.best_path} and {self.best_test_path}" + ) + + def _log_model_params(self): + """输出模型可训练参数数量""" + total_params = sum( p.numel() for p in self.model.parameters() if p.requires_grad) + self.logger.info(f"Trainable params: {total_params}") + def _finalize_training(self, best_model, best_test_model): self.model.load_state_dict(best_model) @@ -188,42 +277,41 @@ class Trainer: @staticmethod def test(model, args, data_loader, scaler, logger): + """对模型进行评估并输出性能指标""" + # 设置为评估模式 model.eval() + + # 收集预测和真实标签 y_pred, y_true = [], [] times = torch.linspace(0, 11, 12) + # 不计算梯度的情况下进行预测 with torch.no_grad(): for batch_idx, batch in enumerate(data_loader): - batch = tuple(b.to(args["device"], dtype=torch.float) for b in batch) + batch = tuple(b.to(args["basic"]["device"], dtype=torch.float) for b in batch) *test_coeffs, target = batch label = target[..., : args["output_dim"]] - output = model(times.to(args["device"], dtype=torch.float), test_coeffs) - y_true.append(label) - y_pred.append(output) + output = model(times.to(args["basic"]["device"], dtype=torch.float), test_coeffs) + y_true.append(label.detach().cpu()) + y_pred.append(output.detach().cpu()) - # if args['real_value']: - # y_pred = scaler.inverse_transform(torch.cat(y_pred, dim=0)) - # else: - y_pred = torch.cat(y_pred, dim=0) - y_true = torch.cat(y_true, dim=0) + # 反归一化 + d_y_pred = scaler.inverse_transform(torch.cat(y_pred, dim=0)) + d_y_true = scaler.inverse_transform(torch.cat(y_true, dim=0)) - for t in range(y_true.shape[1]): + # 计算并记录每个时间步的指标 + for t in range(d_y_true.shape[1]): mae, rmse, mape = all_metrics( - y_pred[:, t, ...], - y_true[:, t, ...], + d_y_pred[:, t, ...], + d_y_true[:, t, ...], args["mae_thresh"], args["mape_thresh"], ) - logger.info( - f"Horizon {t + 1:02d}, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}" - ) + logger.info(f"Horizon {t + 1:02d}, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}") - mae, rmse, mape = all_metrics( - y_pred, y_true, args["mae_thresh"], args["mape_thresh"] - ) - logger.info( - f"Average Horizon, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}" - ) + # 计算并记录平均指标 + mae, rmse, mape = all_metrics(d_y_pred, d_y_true, args["mae_thresh"], args["mape_thresh"]) + logger.info( f"Average Horizon, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}") @staticmethod def _compute_sampling_threshold(global_step, k): diff --git a/trainer/trainer_selector.py b/trainer/trainer_selector.py index 97d6b0b..24d8a10 100755 --- a/trainer/trainer_selector.py +++ b/trainer/trainer_selector.py @@ -4,107 +4,34 @@ from trainer.DCRNN_Trainer import Trainer as DCRNN_Trainer from trainer.PDG2SEQ_Trainer import Trainer as PDG2SEQ_Trainer from trainer.STMLP_Trainer import Trainer as STMLP_Trainer from trainer.E32Trainer import Trainer as EXP_Trainer +from trainer.InformerTrainer import InformerTrainer +from trainer.TSTrainer import Trainer as TSTrainer def select_trainer( - model, - loss, - optimizer, - train_loader, - val_loader, - test_loader, - scaler, - args, - lr_scheduler, - kwargs, + model, loss, optimizer, + train_loader, val_loader, test_loader, + scaler, args, lr_scheduler, kwargs ): model_name = args["basic"]["model"] - match model_name: - case "STGNCDE": - return cdeTrainer( - model, - loss, - optimizer, - train_loader, - val_loader, - test_loader, - scaler, - args, - lr_scheduler, - kwargs[0], - None, - ) - case "STGNRDE": - return cdeTrainer( - model, - loss, - optimizer, - train_loader, - val_loader, - test_loader, - scaler, - args, - lr_scheduler, - kwargs[0], - None, - ) - case "DCRNN": - return DCRNN_Trainer( - model, - loss, - optimizer, - train_loader, - val_loader, - test_loader, - scaler, - args, - lr_scheduler, - ) - case "PDG2SEQ": - return PDG2SEQ_Trainer( - model, - loss, - optimizer, - train_loader, - val_loader, - test_loader, - scaler, - args, - lr_scheduler, - ) - case "STMLP": - return STMLP_Trainer( - model, - loss, - optimizer, - train_loader, - val_loader, - test_loader, - scaler, - args, - lr_scheduler, - ) - case "EXP": - return EXP_Trainer( - model, - loss, - optimizer, - train_loader, - val_loader, - test_loader, - scaler, - args, - lr_scheduler, - ) - case _: - return Trainer( - model, - loss, - optimizer, - train_loader, - val_loader, - test_loader, - scaler, - args, - lr_scheduler, - ) + base_args = ( + model, loss, optimizer, + train_loader, val_loader, test_loader, + scaler, args, lr_scheduler + ) + + if model_name in {"HI", "PatchTST", "iTransformer", "FPT"}: + return TSTrainer(*base_args) + + trainer_map = { + "DCRNN": DCRNN_Trainer, + "PDG2SEQ": PDG2SEQ_Trainer, + "STMLP": STMLP_Trainer, + "EXP": EXP_Trainer, + "Informer": InformerTrainer, + } + + if model_name in {"STGNCDE", "STGNRDE"}: + return cdeTrainer(*base_args, kwargs[0], None) + + return trainer_map.get(model_name, Trainer)(*base_args) diff --git a/utils/initializer.py b/utils/initializer.py index b69c67f..183bfd3 100755 --- a/utils/initializer.py +++ b/utils/initializer.py @@ -9,9 +9,9 @@ import os import yaml -def init_model(args): - device = args["device"] - model = model_selector(args).to(device) +def init_model(config): + device = config["basic"]["device"] + model = model_selector(config).to(device) for p in model.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p) @@ -23,6 +23,9 @@ def init_model(args): def init_optimizer(model, args): + optimizer = None + lr_scheduler = None + optim = args.get("optimizer", "Adam") match optim : case "Adam": diff --git a/utils/logger.py b/utils/logger.py index 8a2f187..7a818f6 100755 --- a/utils/logger.py +++ b/utils/logger.py @@ -18,7 +18,7 @@ def get_logger(root, name=None, debug=True): logger.handlers.clear() # 时间格式改为 年/月/日 时:分:秒 - formatter = logging.Formatter("%(asctime)s - %(message)s", "%Y/%m/%d %H:%M:%S") + formatter = logging.Formatter("%(asctime)s - %(message)s", "%m/%d %H:%M") # 控制台输出 console_handler = logging.StreamHandler() diff --git a/utils/training_stats.py b/utils/training_stats.py index ecda094..9483354 100644 --- a/utils/training_stats.py +++ b/utils/training_stats.py @@ -47,6 +47,10 @@ class TrainingStats: self.cpu_mem_usage_list.append(cpu_mem) self.gpu_mem_usage_list.append(gpu_mem) + def _calculate_average(self, values_list): + """安全计算平均值,避免除零错误""" + return sum(values_list) / len(values_list) if values_list else 0 + def report(self, logger): """在训练结束时输出汇总统计""" if not self.start_time or not self.end_time: @@ -54,26 +58,10 @@ class TrainingStats: return total_time = self.end_time - self.start_time - avg_gpu_mem = ( - sum(self.gpu_mem_usage_list) / len(self.gpu_mem_usage_list) - if self.gpu_mem_usage_list - else 0 - ) - avg_cpu_mem = ( - sum(self.cpu_mem_usage_list) / len(self.cpu_mem_usage_list) - if self.cpu_mem_usage_list - else 0 - ) - avg_train_time = ( - sum(self.train_time_list) / len(self.train_time_list) - if self.train_time_list - else 0 - ) - avg_infer_time = ( - sum(self.infer_time_list) / len(self.infer_time_list) - if self.infer_time_list - else 0 - ) + avg_gpu_mem = self._calculate_average(self.gpu_mem_usage_list) + avg_cpu_mem = self._calculate_average(self.cpu_mem_usage_list) + avg_train_time = self._calculate_average(self.train_time_list) + avg_infer_time = self._calculate_average(self.infer_time_list) iters_per_sec = self.total_iters / total_time if total_time > 0 else 0 logger.info("===== Training Summary =====")