diff --git a/baseline.ipynb b/baseline.ipynb
index e033395..f5975f7 100644
--- a/baseline.ipynb
+++ b/baseline.ipynb
@@ -2,12 +2,12 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 23,
    "id": "73ce6820-8bcf-48bc-852d-41c5a6b7a440",
    "metadata": {
     "collapsed": false,
-    "is_executing": true,
     "ExecuteTime": {
+     "end_time": "2025-04-18T07:11:48.999436Z",
      "start_time": "2025-04-18T06:58:57.940747Z"
     }
    },
@@ -27,7 +27,562 @@
       "Test Epoch 1: 100%|█████████████████| 82/82 [00:00<00:00, 157.36it/s, loss=20.9]\r\n",
       "04/18 14:59: Test Epoch 1: average Loss: 19.833939, time: 0.52 s\r\n",
       "04/18 14:59: Best validation model saved!\r\n",
-      "Train Epoch 2:  18%|██▊             | 44/245 [00:01<00:06, 30.37it/s, loss=21.2]"
+      "Train Epoch 2: 100%|███████████████| 245/245 [00:07<00:00, 30.66it/s, loss=16.5]\r\n",
+      "04/18 14:59: Train Epoch 2: average Loss: 18.955400, time: 7.99 s\r\n",
+      "Val Epoch 2: 100%|██████████████████| 81/81 [00:00<00:00, 152.93it/s, loss=16.8]\r\n",
+      "04/18 14:59: Val Epoch 2: average Loss: 17.667724, time: 0.53 s\r\n",
+      "Test Epoch 2: 100%|█████████████████| 82/82 [00:00<00:00, 157.44it/s, loss=19.1]\r\n",
+      "04/18 14:59: Test Epoch 2: average Loss: 17.492063, time: 0.52 s\r\n",
+      "04/18 14:59: Best validation model saved!\r\n",
+      "Train Epoch 3: 100%|███████████████| 245/245 [00:08<00:00, 30.39it/s, loss=18.5]\r\n",
+      "04/18 14:59: Train Epoch 3: average Loss: 17.757062, time: 8.06 s\r\n",
+      "Val Epoch 3: 100%|██████████████████| 81/81 [00:00<00:00, 153.40it/s, loss=17.3]\r\n",
+      "04/18 14:59: Val Epoch 3: average Loss: 17.849575, time: 0.53 s\r\n",
+      "Test Epoch 3: 100%|█████████████████| 82/82 [00:00<00:00, 157.48it/s, loss=19.4]\r\n",
+      "04/18 14:59: Test Epoch 3: average Loss: 17.989512, time: 0.52 s\r\n",
+      "Train Epoch 4: 100%|███████████████| 245/245 [00:07<00:00, 31.12it/s, loss=16.1]\r\n",
+      "04/18 14:59: Train Epoch 4: average Loss: 17.367368, time: 7.87 s\r\n",
+      "Val Epoch 4: 100%|██████████████████| 81/81 [00:00<00:00, 153.58it/s, loss=16.6]\r\n",
+      "04/18 14:59: Val Epoch 4: average Loss: 17.069438, time: 0.53 s\r\n",
+      "Test Epoch 4: 100%|█████████████████| 82/82 [00:00<00:00, 157.25it/s, loss=19.3]\r\n",
+      "04/18 14:59: Test Epoch 4: average Loss: 17.217621, time: 0.52 s\r\n",
+      "04/18 14:59: Best validation model saved!\r\n",
+      "Train Epoch 5: 100%|███████████████| 245/245 [00:07<00:00, 31.54it/s, loss=17.9]\r\n",
+      "04/18 15:00: Train Epoch 5: average Loss: 16.739775, time: 7.77 s\r\n",
+      "Val Epoch 5: 100%|██████████████████| 81/81 [00:00<00:00, 153.79it/s, loss=17.5]\r\n",
+      "04/18 15:00: Val Epoch 5: average Loss: 17.040500, time: 0.53 s\r\n",
+      "Test Epoch 5: 100%|█████████████████| 82/82 [00:00<00:00, 157.95it/s, loss=19.4]\r\n",
+      "04/18 15:00: Test Epoch 5: average Loss: 17.067980, time: 0.52 s\r\n",
+      "04/18 15:00: Best validation model saved!\r\n",
+      "Train Epoch 6: 100%|███████████████| 245/245 [00:07<00:00, 30.79it/s, loss=15.6]\r\n",
+      "04/18 15:00: Train Epoch 6: average Loss: 16.363637, time: 7.96 s\r\n",
+      "Val Epoch 6: 100%|██████████████████| 81/81 [00:00<00:00, 153.59it/s, loss=16.4]\r\n",
+      "04/18 15:00: Val Epoch 6: average Loss: 15.945692, time: 0.53 s\r\n",
+      "Test Epoch 6: 100%|█████████████████| 82/82 [00:00<00:00, 156.96it/s, loss=17.9]\r\n",
+      "04/18 15:00: Test Epoch 6: average Loss: 16.459437, time: 0.52 s\r\n",
+      "04/18 15:00: Best validation model saved!\r\n",
+      "Train Epoch 7: 100%|███████████████| 245/245 [00:07<00:00, 30.99it/s, loss=15.4]\r\n",
+      "04/18 15:00: Train Epoch 7: average Loss: 16.168401, time: 7.91 s\r\n",
+      "Val Epoch 7: 100%|██████████████████| 81/81 [00:00<00:00, 153.68it/s, loss=16.8]\r\n",
+      "04/18 15:00: Val Epoch 7: average Loss: 16.000951, time: 0.53 s\r\n",
+      "Test Epoch 7: 100%|█████████████████| 82/82 [00:00<00:00, 157.79it/s, loss=18.2]\r\n",
+      "04/18 15:00: Test Epoch 7: average Loss: 16.602080, time: 0.52 s\r\n",
+      "Train Epoch 8: 100%|███████████████| 245/245 [00:07<00:00, 30.95it/s, loss=14.9]\r\n",
+      "04/18 15:00: Train Epoch 8: average Loss: 15.795147, time: 7.92 s\r\n",
+      "Val Epoch 8: 100%|██████████████████| 81/81 [00:00<00:00, 153.46it/s, loss=16.7]\r\n",
+      "04/18 15:00: Val Epoch 8: average Loss: 15.875261, time: 0.53 s\r\n",
+      "Test Epoch 8: 100%|█████████████████| 82/82 [00:00<00:00, 157.43it/s, loss=18.2]\r\n",
+      "04/18 15:00: Test Epoch 8: average Loss: 16.407824, time: 0.52 s\r\n",
+      "04/18 15:00: Best validation model saved!\r\n",
+      "Train Epoch 9: 100%|███████████████| 245/245 [00:08<00:00, 29.40it/s, loss=15.9]\r\n",
+      "04/18 15:00: Train Epoch 9: average Loss: 15.555818, time: 8.33 s\r\n",
+      "Val Epoch 9: 100%|████████████████████| 81/81 [00:00<00:00, 153.59it/s, loss=16]\r\n",
+      "04/18 15:00: Val Epoch 9: average Loss: 15.353782, time: 0.53 s\r\n",
+      "Test Epoch 9: 100%|█████████████████| 82/82 [00:00<00:00, 157.77it/s, loss=17.3]\r\n",
+      "04/18 15:00: Test Epoch 9: average Loss: 16.096373, time: 0.52 s\r\n",
+      "04/18 15:00: Best validation model saved!\r\n",
+      "Train Epoch 10: 100%|██████████████| 245/245 [00:07<00:00, 31.05it/s, loss=14.9]\r\n",
+      "04/18 15:00: Train Epoch 10: average Loss: 15.516910, time: 7.89 s\r\n",
+      "Val Epoch 10: 100%|███████████████████| 81/81 [00:00<00:00, 153.25it/s, loss=16]\r\n",
+      "04/18 15:00: Val Epoch 10: average Loss: 15.266865, time: 0.53 s\r\n",
+      "Test Epoch 10: 100%|████████████████| 82/82 [00:00<00:00, 156.89it/s, loss=17.2]\r\n",
+      "04/18 15:00: Test Epoch 10: average Loss: 15.990968, time: 0.52 s\r\n",
+      "04/18 15:00: Best validation model saved!\r\n",
+      "Train Epoch 11: 100%|██████████████| 245/245 [00:07<00:00, 31.28it/s, loss=14.4]\r\n",
+      "04/18 15:00: Train Epoch 11: average Loss: 15.228227, time: 7.83 s\r\n",
+      "Val Epoch 11: 100%|█████████████████| 81/81 [00:00<00:00, 153.25it/s, loss=16.2]\r\n",
+      "04/18 15:00: Val Epoch 11: average Loss: 15.461508, time: 0.53 s\r\n",
+      "Test Epoch 11: 100%|████████████████| 82/82 [00:00<00:00, 157.35it/s, loss=17.6]\r\n",
+      "04/18 15:00: Test Epoch 11: average Loss: 16.216885, time: 0.52 s\r\n",
+      "Train Epoch 12: 100%|██████████████| 245/245 [00:07<00:00, 31.28it/s, loss=14.7]\r\n",
+      "04/18 15:01: Train Epoch 12: average Loss: 15.153359, time: 7.83 s\r\n",
+      "Val Epoch 12: 100%|███████████████████| 81/81 [00:00<00:00, 153.59it/s, loss=16]\r\n",
+      "04/18 15:01: Val Epoch 12: average Loss: 15.187264, time: 0.53 s\r\n",
+      "Test Epoch 12: 100%|████████████████| 82/82 [00:00<00:00, 157.68it/s, loss=17.6]\r\n",
+      "04/18 15:01: Test Epoch 12: average Loss: 15.924902, time: 0.52 s\r\n",
+      "04/18 15:01: Best validation model saved!\r\n",
+      "Train Epoch 13: 100%|██████████████| 245/245 [00:07<00:00, 31.22it/s, loss=16.4]\r\n",
+      "04/18 15:01: Train Epoch 13: average Loss: 15.097116, time: 7.85 s\r\n",
+      "Val Epoch 13: 100%|███████████████████| 81/81 [00:00<00:00, 153.27it/s, loss=16]\r\n",
+      "04/18 15:01: Val Epoch 13: average Loss: 15.215066, time: 0.53 s\r\n",
+      "Test Epoch 13: 100%|████████████████| 82/82 [00:00<00:00, 157.54it/s, loss=17.2]\r\n",
+      "04/18 15:01: Test Epoch 13: average Loss: 16.029764, time: 0.52 s\r\n",
+      "Train Epoch 14: 100%|██████████████| 245/245 [00:07<00:00, 31.24it/s, loss=14.9]\r\n",
+      "04/18 15:01: Train Epoch 14: average Loss: 15.088215, time: 7.84 s\r\n",
+      "Val Epoch 14: 100%|█████████████████| 81/81 [00:00<00:00, 152.35it/s, loss=15.9]\r\n",
+      "04/18 15:01: Val Epoch 14: average Loss: 15.005296, time: 0.53 s\r\n",
+      "Test Epoch 14: 100%|████████████████| 82/82 [00:00<00:00, 156.52it/s, loss=17.5]\r\n",
+      "04/18 15:01: Test Epoch 14: average Loss: 15.850943, time: 0.52 s\r\n",
+      "04/18 15:01: Best validation model saved!\r\n",
+      "Train Epoch 15: 100%|██████████████| 245/245 [00:07<00:00, 31.81it/s, loss=15.6]\r\n",
+      "04/18 15:01: Train Epoch 15: average Loss: 14.816309, time: 7.70 s\r\n",
+      "Val Epoch 15: 100%|█████████████████| 81/81 [00:00<00:00, 153.22it/s, loss=15.8]\r\n",
+      "04/18 15:01: Val Epoch 15: average Loss: 14.949909, time: 0.53 s\r\n",
+      "Test Epoch 15: 100%|████████████████| 82/82 [00:00<00:00, 159.32it/s, loss=16.9]\r\n",
+      "04/18 15:01: Test Epoch 15: average Loss: 15.961560, time: 0.52 s\r\n",
+      "04/18 15:01: Best validation model saved!\r\n",
+      "Train Epoch 16: 100%|██████████████| 245/245 [00:07<00:00, 31.45it/s, loss=14.3]\r\n",
+      "04/18 15:01: Train Epoch 16: average Loss: 14.762852, time: 7.79 s\r\n",
+      "Val Epoch 16: 100%|█████████████████| 81/81 [00:00<00:00, 153.04it/s, loss=16.3]\r\n",
+      "04/18 15:01: Val Epoch 16: average Loss: 14.845161, time: 0.53 s\r\n",
+      "Test Epoch 16: 100%|████████████████| 82/82 [00:00<00:00, 157.20it/s, loss=17.1]\r\n",
+      "04/18 15:01: Test Epoch 16: average Loss: 15.910873, time: 0.52 s\r\n",
+      "04/18 15:01: Best validation model saved!\r\n",
+      "Train Epoch 17: 100%|██████████████| 245/245 [00:07<00:00, 31.85it/s, loss=14.9]\r\n",
+      "04/18 15:01: Train Epoch 17: average Loss: 14.555154, time: 7.69 s\r\n",
+      "Val Epoch 17: 100%|█████████████████| 81/81 [00:00<00:00, 153.11it/s, loss=15.6]\r\n",
+      "04/18 15:01: Val Epoch 17: average Loss: 14.700343, time: 0.53 s\r\n",
+      "Test Epoch 17: 100%|██████████████████| 82/82 [00:00<00:00, 157.00it/s, loss=17]\r\n",
+      "04/18 15:01: Test Epoch 17: average Loss: 15.714252, time: 0.52 s\r\n",
+      "04/18 15:01: Best validation model saved!\r\n",
+      "Train Epoch 18: 100%|██████████████| 245/245 [00:08<00:00, 30.51it/s, loss=13.4]\r\n",
+      "04/18 15:01: Train Epoch 18: average Loss: 14.663447, time: 8.03 s\r\n",
+      "Val Epoch 18: 100%|█████████████████| 81/81 [00:00<00:00, 153.33it/s, loss=15.6]\r\n",
+      "04/18 15:01: Val Epoch 18: average Loss: 14.697129, time: 0.53 s\r\n",
+      "Test Epoch 18: 100%|████████████████| 82/82 [00:00<00:00, 157.20it/s, loss=16.9]\r\n",
+      "04/18 15:01: Test Epoch 18: average Loss: 15.599584, time: 0.52 s\r\n",
+      "04/18 15:01: Best validation model saved!\r\n",
+      "Train Epoch 19: 100%|██████████████| 245/245 [00:07<00:00, 31.01it/s, loss=13.6]\r\n",
+      "04/18 15:02: Train Epoch 19: average Loss: 14.458139, time: 7.90 s\r\n",
+      "Val Epoch 19: 100%|███████████████████| 81/81 [00:00<00:00, 153.39it/s, loss=16]\r\n",
+      "04/18 15:02: Val Epoch 19: average Loss: 14.989674, time: 0.53 s\r\n",
+      "Test Epoch 19: 100%|████████████████| 82/82 [00:00<00:00, 157.76it/s, loss=16.9]\r\n",
+      "04/18 15:02: Test Epoch 19: average Loss: 15.709684, time: 0.52 s\r\n",
+      "Train Epoch 20: 100%|██████████████| 245/245 [00:07<00:00, 31.13it/s, loss=14.2]\r\n",
+      "04/18 15:02: Train Epoch 20: average Loss: 14.453694, time: 7.87 s\r\n",
+      "Val Epoch 20: 100%|█████████████████| 81/81 [00:00<00:00, 153.34it/s, loss=15.8]\r\n",
+      "04/18 15:02: Val Epoch 20: average Loss: 15.205679, time: 0.53 s\r\n",
+      "Test Epoch 20: 100%|████████████████| 82/82 [00:00<00:00, 160.28it/s, loss=17.1]\r\n",
+      "04/18 15:02: Test Epoch 20: average Loss: 16.253983, time: 0.51 s\r\n",
+      "Train Epoch 21: 100%|██████████████| 245/245 [00:07<00:00, 31.04it/s, loss=14.2]\r\n",
+      "04/18 15:02: Train Epoch 21: average Loss: 14.348933, time: 7.89 s\r\n",
+      "Val Epoch 21: 100%|█████████████████| 81/81 [00:00<00:00, 152.81it/s, loss=15.5]\r\n",
+      "04/18 15:02: Val Epoch 21: average Loss: 14.825251, time: 0.53 s\r\n",
+      "Test Epoch 21: 100%|████████████████| 82/82 [00:00<00:00, 157.18it/s, loss=16.7]\r\n",
+      "04/18 15:02: Test Epoch 21: average Loss: 15.773787, time: 0.52 s\r\n",
+      "Train Epoch 22: 100%|██████████████| 245/245 [00:07<00:00, 31.17it/s, loss=13.8]\r\n",
+      "04/18 15:02: Train Epoch 22: average Loss: 14.388334, time: 7.86 s\r\n",
+      "Val Epoch 22: 100%|█████████████████| 81/81 [00:00<00:00, 153.28it/s, loss=15.9]\r\n",
+      "04/18 15:02: Val Epoch 22: average Loss: 15.048875, time: 0.53 s\r\n",
+      "Test Epoch 22: 100%|████████████████| 82/82 [00:00<00:00, 157.58it/s, loss=17.2]\r\n",
+      "04/18 15:02: Test Epoch 22: average Loss: 15.926125, time: 0.52 s\r\n",
+      "Train Epoch 23: 100%|██████████████| 245/245 [00:07<00:00, 31.04it/s, loss=13.9]\r\n",
+      "04/18 15:02: Train Epoch 23: average Loss: 14.409936, time: 7.89 s\r\n",
+      "Val Epoch 23: 100%|█████████████████| 81/81 [00:00<00:00, 153.62it/s, loss=15.7]\r\n",
+      "04/18 15:02: Val Epoch 23: average Loss: 14.645483, time: 0.53 s\r\n",
+      "Test Epoch 23: 100%|████████████████| 82/82 [00:00<00:00, 157.54it/s, loss=16.6]\r\n",
+      "04/18 15:02: Test Epoch 23: average Loss: 15.826965, time: 0.52 s\r\n",
+      "04/18 15:02: Best validation model saved!\r\n",
+      "Train Epoch 24: 100%|██████████████| 245/245 [00:07<00:00, 31.15it/s, loss=13.4]\r\n",
+      "04/18 15:02: Train Epoch 24: average Loss: 14.214980, time: 7.87 s\r\n",
+      "Val Epoch 24: 100%|█████████████████| 81/81 [00:00<00:00, 153.05it/s, loss=15.8]\r\n",
+      "04/18 15:02: Val Epoch 24: average Loss: 14.862851, time: 0.53 s\r\n",
+      "Test Epoch 24: 100%|████████████████| 82/82 [00:00<00:00, 157.33it/s, loss=17.1]\r\n",
+      "04/18 15:02: Test Epoch 24: average Loss: 16.018495, time: 0.52 s\r\n",
+      "Train Epoch 25: 100%|██████████████| 245/245 [00:07<00:00, 31.15it/s, loss=14.3]\r\n",
+      "04/18 15:02: Train Epoch 25: average Loss: 14.185303, time: 7.87 s\r\n",
+      "Val Epoch 25: 100%|█████████████████| 81/81 [00:00<00:00, 153.06it/s, loss=15.2]\r\n",
+      "04/18 15:02: Val Epoch 25: average Loss: 14.467945, time: 0.53 s\r\n",
+      "Test Epoch 25: 100%|████████████████| 82/82 [00:00<00:00, 157.54it/s, loss=16.3]\r\n",
+      "04/18 15:03: Test Epoch 25: average Loss: 15.556583, time: 0.52 s\r\n",
+      "04/18 15:03: Best validation model saved!\r\n",
+      "Train Epoch 26: 100%|██████████████| 245/245 [00:07<00:00, 31.23it/s, loss=13.6]\r\n",
+      "04/18 15:03: Train Epoch 26: average Loss: 14.176444, time: 7.85 s\r\n",
+      "Val Epoch 26: 100%|█████████████████| 81/81 [00:00<00:00, 153.18it/s, loss=15.5]\r\n",
+      "04/18 15:03: Val Epoch 26: average Loss: 14.485669, time: 0.53 s\r\n",
+      "Test Epoch 26: 100%|████████████████| 82/82 [00:00<00:00, 157.35it/s, loss=16.5]\r\n",
+      "04/18 15:03: Test Epoch 26: average Loss: 15.561294, time: 0.52 s\r\n",
+      "Train Epoch 27: 100%|████████████████| 245/245 [00:07<00:00, 31.21it/s, loss=14]\r\n",
+      "04/18 15:03: Train Epoch 27: average Loss: 14.104966, time: 7.85 s\r\n",
+      "Val Epoch 27: 100%|█████████████████| 81/81 [00:00<00:00, 153.14it/s, loss=15.5]\r\n",
+      "04/18 15:03: Val Epoch 27: average Loss: 14.471194, time: 0.53 s\r\n",
+      "Test Epoch 27: 100%|████████████████| 82/82 [00:00<00:00, 157.28it/s, loss=16.7]\r\n",
+      "04/18 15:03: Test Epoch 27: average Loss: 15.621981, time: 0.52 s\r\n",
+      "Train Epoch 28: 100%|██████████████| 245/245 [00:07<00:00, 31.15it/s, loss=13.4]\r\n",
+      "04/18 15:03: Train Epoch 28: average Loss: 14.070449, time: 7.86 s\r\n",
+      "Val Epoch 28: 100%|█████████████████| 81/81 [00:00<00:00, 152.62it/s, loss=15.2]\r\n",
+      "04/18 15:03: Val Epoch 28: average Loss: 14.635830, time: 0.53 s\r\n",
+      "Test Epoch 28: 100%|████████████████| 82/82 [00:00<00:00, 157.01it/s, loss=16.2]\r\n",
+      "04/18 15:03: Test Epoch 28: average Loss: 15.697868, time: 0.52 s\r\n",
+      "Train Epoch 29: 100%|██████████████| 245/245 [00:07<00:00, 31.06it/s, loss=14.3]\r\n",
+      "04/18 15:03: Train Epoch 29: average Loss: 14.068645, time: 7.89 s\r\n",
+      "Val Epoch 29: 100%|█████████████████| 81/81 [00:00<00:00, 152.93it/s, loss=15.4]\r\n",
+      "04/18 15:03: Val Epoch 29: average Loss: 14.442385, time: 0.53 s\r\n",
+      "Test Epoch 29: 100%|████████████████| 82/82 [00:00<00:00, 157.23it/s, loss=16.8]\r\n",
+      "04/18 15:03: Test Epoch 29: average Loss: 15.796008, time: 0.52 s\r\n",
+      "04/18 15:03: Best validation model saved!\r\n",
+      "Train Epoch 30: 100%|██████████████| 245/245 [00:07<00:00, 31.01it/s, loss=12.8]\r\n",
+      "04/18 15:03: Train Epoch 30: average Loss: 14.054648, time: 7.90 s\r\n",
+      "Val Epoch 30: 100%|█████████████████| 81/81 [00:00<00:00, 152.92it/s, loss=15.4]\r\n",
+      "04/18 15:03: Val Epoch 30: average Loss: 14.441602, time: 0.53 s\r\n",
+      "Test Epoch 30: 100%|████████████████| 82/82 [00:00<00:00, 157.03it/s, loss=16.4]\r\n",
+      "04/18 15:03: Test Epoch 30: average Loss: 15.612465, time: 0.52 s\r\n",
+      "04/18 15:03: Best validation model saved!\r\n",
+      "Train Epoch 31: 100%|██████████████| 245/245 [00:07<00:00, 31.11it/s, loss=14.5]\r\n",
+      "04/18 15:03: Train Epoch 31: average Loss: 13.999030, time: 7.88 s\r\n",
+      "Val Epoch 31: 100%|█████████████████| 81/81 [00:00<00:00, 152.71it/s, loss=15.6]\r\n",
+      "04/18 15:03: Val Epoch 31: average Loss: 14.416110, time: 0.53 s\r\n",
+      "Test Epoch 31: 100%|████████████████| 82/82 [00:00<00:00, 157.18it/s, loss=16.5]\r\n",
+      "04/18 15:03: Test Epoch 31: average Loss: 15.538215, time: 0.52 s\r\n",
+      "04/18 15:03: Best validation model saved!\r\n",
+      "Train Epoch 32: 100%|██████████████| 245/245 [00:07<00:00, 31.12it/s, loss=14.4]\r\n",
+      "04/18 15:04: Train Epoch 32: average Loss: 13.913729, time: 7.87 s\r\n",
+      "Val Epoch 32: 100%|█████████████████| 81/81 [00:00<00:00, 152.94it/s, loss=15.6]\r\n",
+      "04/18 15:04: Val Epoch 32: average Loss: 14.648036, time: 0.53 s\r\n",
+      "Test Epoch 32: 100%|████████████████| 82/82 [00:00<00:00, 156.98it/s, loss=16.7]\r\n",
+      "04/18 15:04: Test Epoch 32: average Loss: 15.823536, time: 0.52 s\r\n",
+      "Train Epoch 33: 100%|████████████████| 245/245 [00:07<00:00, 31.05it/s, loss=15]\r\n",
+      "04/18 15:04: Train Epoch 33: average Loss: 13.916763, time: 7.89 s\r\n",
+      "Val Epoch 33: 100%|█████████████████| 81/81 [00:00<00:00, 152.84it/s, loss=15.4]\r\n",
+      "04/18 15:04: Val Epoch 33: average Loss: 14.532857, time: 0.53 s\r\n",
+      "Test Epoch 33: 100%|████████████████| 82/82 [00:00<00:00, 157.02it/s, loss=16.4]\r\n",
+      "04/18 15:04: Test Epoch 33: average Loss: 15.775041, time: 0.52 s\r\n",
+      "Train Epoch 34: 100%|██████████████| 245/245 [00:07<00:00, 31.02it/s, loss=14.6]\r\n",
+      "04/18 15:04: Train Epoch 34: average Loss: 13.952852, time: 7.90 s\r\n",
+      "Val Epoch 34: 100%|█████████████████| 81/81 [00:00<00:00, 153.00it/s, loss=15.2]\r\n",
+      "04/18 15:04: Val Epoch 34: average Loss: 14.575427, time: 0.53 s\r\n",
+      "Test Epoch 34: 100%|████████████████| 82/82 [00:00<00:00, 157.22it/s, loss=16.1]\r\n",
+      "04/18 15:04: Test Epoch 34: average Loss: 15.818352, time: 0.52 s\r\n",
+      "Train Epoch 35: 100%|██████████████| 245/245 [00:07<00:00, 30.97it/s, loss=14.5]\r\n",
+      "04/18 15:04: Train Epoch 35: average Loss: 13.854347, time: 7.91 s\r\n",
+      "Val Epoch 35: 100%|█████████████████| 81/81 [00:00<00:00, 153.01it/s, loss=15.7]\r\n",
+      "04/18 15:04: Val Epoch 35: average Loss: 14.336807, time: 0.53 s\r\n",
+      "Test Epoch 35: 100%|████████████████| 82/82 [00:00<00:00, 157.16it/s, loss=16.9]\r\n",
+      "04/18 15:04: Test Epoch 35: average Loss: 15.649391, time: 0.52 s\r\n",
+      "04/18 15:04: Best validation model saved!\r\n",
+      "Train Epoch 36: 100%|██████████████| 245/245 [00:07<00:00, 31.04it/s, loss=13.7]\r\n",
+      "04/18 15:04: Train Epoch 36: average Loss: 13.829107, time: 7.89 s\r\n",
+      "Val Epoch 36: 100%|█████████████████| 81/81 [00:00<00:00, 153.21it/s, loss=15.8]\r\n",
+      "04/18 15:04: Val Epoch 36: average Loss: 14.235835, time: 0.53 s\r\n",
+      "Test Epoch 36: 100%|████████████████| 82/82 [00:00<00:00, 157.37it/s, loss=16.3]\r\n",
+      "04/18 15:04: Test Epoch 36: average Loss: 15.598080, time: 0.52 s\r\n",
+      "04/18 15:04: Best validation model saved!\r\n",
+      "Train Epoch 37: 100%|██████████████| 245/245 [00:07<00:00, 31.12it/s, loss=13.9]\r\n",
+      "04/18 15:04: Train Epoch 37: average Loss: 13.766311, time: 7.87 s\r\n",
+      "Val Epoch 37: 100%|█████████████████| 81/81 [00:00<00:00, 152.44it/s, loss=15.5]\r\n",
+      "04/18 15:04: Val Epoch 37: average Loss: 14.947377, time: 0.53 s\r\n",
+      "Test Epoch 37: 100%|████████████████| 82/82 [00:00<00:00, 157.06it/s, loss=16.6]\r\n",
+      "04/18 15:04: Test Epoch 37: average Loss: 16.292278, time: 0.52 s\r\n",
+      "Train Epoch 38: 100%|██████████████| 245/245 [00:07<00:00, 31.12it/s, loss=14.1]\r\n",
+      "04/18 15:04: Train Epoch 38: average Loss: 13.763178, time: 7.87 s\r\n",
+      "Val Epoch 38: 100%|█████████████████| 81/81 [00:00<00:00, 152.90it/s, loss=16.4]\r\n",
+      "04/18 15:04: Val Epoch 38: average Loss: 14.672801, time: 0.53 s\r\n",
+      "Test Epoch 38: 100%|████████████████| 82/82 [00:00<00:00, 157.15it/s, loss=17.2]\r\n",
+      "04/18 15:04: Test Epoch 38: average Loss: 15.774813, time: 0.52 s\r\n",
+      "Train Epoch 39: 100%|██████████████| 245/245 [00:07<00:00, 31.04it/s, loss=13.6]\r\n",
+      "04/18 15:05: Train Epoch 39: average Loss: 13.722722, time: 7.89 s\r\n",
+      "Val Epoch 39: 100%|█████████████████| 81/81 [00:00<00:00, 151.54it/s, loss=15.6]\r\n",
+      "04/18 15:05: Val Epoch 39: average Loss: 14.353744, time: 0.54 s\r\n",
+      "Test Epoch 39: 100%|████████████████| 82/82 [00:00<00:00, 157.21it/s, loss=16.2]\r\n",
+      "04/18 15:05: Test Epoch 39: average Loss: 15.589559, time: 0.52 s\r\n",
+      "Train Epoch 40: 100%|██████████████| 245/245 [00:07<00:00, 31.10it/s, loss=13.8]\r\n",
+      "04/18 15:05: Train Epoch 40: average Loss: 13.735809, time: 7.88 s\r\n",
+      "Val Epoch 40: 100%|█████████████████| 81/81 [00:00<00:00, 152.95it/s, loss=15.8]\r\n",
+      "04/18 15:05: Val Epoch 40: average Loss: 14.423929, time: 0.53 s\r\n",
+      "Test Epoch 40: 100%|████████████████| 82/82 [00:00<00:00, 157.22it/s, loss=16.1]\r\n",
+      "04/18 15:05: Test Epoch 40: average Loss: 15.780755, time: 0.52 s\r\n",
+      "Train Epoch 41: 100%|██████████████| 245/245 [00:07<00:00, 31.26it/s, loss=13.7]\r\n",
+      "04/18 15:05: Train Epoch 41: average Loss: 13.819473, time: 7.84 s\r\n",
+      "Val Epoch 41: 100%|█████████████████| 81/81 [00:00<00:00, 153.02it/s, loss=15.6]\r\n",
+      "04/18 15:05: Val Epoch 41: average Loss: 14.577788, time: 0.53 s\r\n",
+      "Test Epoch 41: 100%|████████████████| 82/82 [00:00<00:00, 157.23it/s, loss=16.3]\r\n",
+      "04/18 15:05: Test Epoch 41: average Loss: 15.794340, time: 0.52 s\r\n",
+      "Train Epoch 42: 100%|██████████████| 245/245 [00:07<00:00, 31.39it/s, loss=13.2]\r\n",
+      "04/18 15:05: Train Epoch 42: average Loss: 13.732110, time: 7.81 s\r\n",
+      "Val Epoch 42: 100%|█████████████████| 81/81 [00:00<00:00, 153.06it/s, loss=15.7]\r\n",
+      "04/18 15:05: Val Epoch 42: average Loss: 14.400696, time: 0.53 s\r\n",
+      "Test Epoch 42: 100%|████████████████| 82/82 [00:00<00:00, 157.32it/s, loss=16.2]\r\n",
+      "04/18 15:05: Test Epoch 42: average Loss: 15.716415, time: 0.52 s\r\n",
+      "Train Epoch 43: 100%|██████████████| 245/245 [00:07<00:00, 31.48it/s, loss=14.2]\r\n",
+      "04/18 15:05: Train Epoch 43: average Loss: 13.584692, time: 7.78 s\r\n",
+      "Val Epoch 43: 100%|█████████████████| 81/81 [00:00<00:00, 152.92it/s, loss=15.8]\r\n",
+      "04/18 15:05: Val Epoch 43: average Loss: 14.242085, time: 0.53 s\r\n",
+      "Test Epoch 43: 100%|████████████████| 82/82 [00:00<00:00, 157.20it/s, loss=16.4]\r\n",
+      "04/18 15:05: Test Epoch 43: average Loss: 15.598483, time: 0.52 s\r\n",
+      "Train Epoch 44: 100%|██████████████| 245/245 [00:07<00:00, 31.25it/s, loss=14.2]\r\n",
+      "04/18 15:05: Train Epoch 44: average Loss: 13.627383, time: 7.84 s\r\n",
+      "Val Epoch 44: 100%|█████████████████| 81/81 [00:00<00:00, 152.80it/s, loss=15.9]\r\n",
+      "04/18 15:05: Val Epoch 44: average Loss: 14.186897, time: 0.53 s\r\n",
+      "Test Epoch 44: 100%|████████████████| 82/82 [00:00<00:00, 157.22it/s, loss=16.2]\r\n",
+      "04/18 15:05: Test Epoch 44: average Loss: 15.619757, time: 0.52 s\r\n",
+      "04/18 15:05: Best validation model saved!\r\n",
+      "Train Epoch 45: 100%|██████████████| 245/245 [00:07<00:00, 31.28it/s, loss=12.8]\r\n",
+      "04/18 15:05: Train Epoch 45: average Loss: 13.606323, time: 7.83 s\r\n",
+      "Val Epoch 45: 100%|███████████████████| 81/81 [00:00<00:00, 152.43it/s, loss=16]\r\n",
+      "04/18 15:05: Val Epoch 45: average Loss: 14.176591, time: 0.53 s\r\n",
+      "Test Epoch 45: 100%|████████████████| 82/82 [00:00<00:00, 157.27it/s, loss=16.3]\r\n",
+      "04/18 15:05: Test Epoch 45: average Loss: 15.529232, time: 0.52 s\r\n",
+      "04/18 15:05: Best validation model saved!\r\n",
+      "Train Epoch 46: 100%|██████████████| 245/245 [00:07<00:00, 30.90it/s, loss=13.1]\r\n",
+      "04/18 15:06: Train Epoch 46: average Loss: 13.611591, time: 7.93 s\r\n",
+      "Val Epoch 46: 100%|█████████████████| 81/81 [00:00<00:00, 152.90it/s, loss=15.9]\r\n",
+      "04/18 15:06: Val Epoch 46: average Loss: 14.213882, time: 0.53 s\r\n",
+      "Test Epoch 46: 100%|████████████████| 82/82 [00:00<00:00, 157.03it/s, loss=16.1]\r\n",
+      "04/18 15:06: Test Epoch 46: average Loss: 15.506738, time: 0.52 s\r\n",
+      "Train Epoch 47: 100%|██████████████| 245/245 [00:07<00:00, 31.26it/s, loss=13.6]\r\n",
+      "04/18 15:06: Train Epoch 47: average Loss: 13.533474, time: 7.84 s\r\n",
+      "Val Epoch 47: 100%|█████████████████| 81/81 [00:00<00:00, 152.98it/s, loss=15.8]\r\n",
+      "04/18 15:06: Val Epoch 47: average Loss: 14.363502, time: 0.53 s\r\n",
+      "Test Epoch 47: 100%|████████████████| 82/82 [00:00<00:00, 157.12it/s, loss=16.3]\r\n",
+      "04/18 15:06: Test Epoch 47: average Loss: 15.754511, time: 0.52 s\r\n",
+      "Train Epoch 48: 100%|██████████████| 245/245 [00:07<00:00, 31.20it/s, loss=13.1]\r\n",
+      "04/18 15:06: Train Epoch 48: average Loss: 13.487003, time: 7.85 s\r\n",
+      "Val Epoch 48: 100%|█████████████████| 81/81 [00:00<00:00, 152.72it/s, loss=16.1]\r\n",
+      "04/18 15:06: Val Epoch 48: average Loss: 14.277329, time: 0.53 s\r\n",
+      "Test Epoch 48: 100%|████████████████| 82/82 [00:00<00:00, 157.03it/s, loss=16.4]\r\n",
+      "04/18 15:06: Test Epoch 48: average Loss: 15.584286, time: 0.52 s\r\n",
+      "Train Epoch 49: 100%|██████████████| 245/245 [00:07<00:00, 31.00it/s, loss=14.3]\r\n",
+      "04/18 15:06: Train Epoch 49: average Loss: 13.549889, time: 7.90 s\r\n",
+      "Val Epoch 49: 100%|█████████████████| 81/81 [00:00<00:00, 152.96it/s, loss=15.7]\r\n",
+      "04/18 15:06: Val Epoch 49: average Loss: 14.198317, time: 0.53 s\r\n",
+      "Test Epoch 49: 100%|████████████████| 82/82 [00:00<00:00, 157.05it/s, loss=16.3]\r\n",
+      "04/18 15:06: Test Epoch 49: average Loss: 15.605781, time: 0.52 s\r\n",
+      "Train Epoch 50: 100%|██████████████| 245/245 [00:07<00:00, 31.24it/s, loss=12.9]\r\n",
+      "04/18 15:06: Train Epoch 50: average Loss: 13.432133, time: 7.84 s\r\n",
+      "Val Epoch 50: 100%|█████████████████| 81/81 [00:00<00:00, 152.95it/s, loss=16.3]\r\n",
+      "04/18 15:06: Val Epoch 50: average Loss: 14.375532, time: 0.53 s\r\n",
+      "Test Epoch 50: 100%|████████████████| 82/82 [00:00<00:00, 157.06it/s, loss=16.1]\r\n",
+      "04/18 15:06: Test Epoch 50: average Loss: 15.527477, time: 0.52 s\r\n",
+      "Train Epoch 51: 100%|██████████████| 245/245 [00:07<00:00, 31.23it/s, loss=13.7]\r\n",
+      "04/18 15:06: Train Epoch 51: average Loss: 13.517806, time: 7.85 s\r\n",
+      "Val Epoch 51: 100%|█████████████████| 81/81 [00:00<00:00, 152.90it/s, loss=15.8]\r\n",
+      "04/18 15:06: Val Epoch 51: average Loss: 14.160355, time: 0.53 s\r\n",
+      "Test Epoch 51: 100%|████████████████| 82/82 [00:00<00:00, 157.01it/s, loss=16.1]\r\n",
+      "04/18 15:06: Test Epoch 51: average Loss: 15.480644, time: 0.52 s\r\n",
+      "04/18 15:06: Best validation model saved!\r\n",
+      "Train Epoch 52: 100%|██████████████| 245/245 [00:07<00:00, 31.17it/s, loss=14.4]\r\n",
+      "04/18 15:07: Train Epoch 52: average Loss: 13.456323, time: 7.86 s\r\n",
+      "Val Epoch 52: 100%|█████████████████| 81/81 [00:00<00:00, 152.91it/s, loss=15.7]\r\n",
+      "04/18 15:07: Val Epoch 52: average Loss: 14.203743, time: 0.53 s\r\n",
+      "Test Epoch 52: 100%|████████████████| 82/82 [00:00<00:00, 156.96it/s, loss=15.9]\r\n",
+      "04/18 15:07: Test Epoch 52: average Loss: 15.433528, time: 0.52 s\r\n",
+      "Train Epoch 53: 100%|██████████████| 245/245 [00:07<00:00, 31.18it/s, loss=12.9]\r\n",
+      "04/18 15:07: Train Epoch 53: average Loss: 13.456199, time: 7.86 s\r\n",
+      "Val Epoch 53: 100%|█████████████████| 81/81 [00:00<00:00, 152.75it/s, loss=15.7]\r\n",
+      "04/18 15:07: Val Epoch 53: average Loss: 14.709743, time: 0.53 s\r\n",
+      "Test Epoch 53: 100%|████████████████| 82/82 [00:00<00:00, 157.14it/s, loss=16.5]\r\n",
+      "04/18 15:07: Test Epoch 53: average Loss: 16.283345, time: 0.52 s\r\n",
+      "Train Epoch 54: 100%|██████████████| 245/245 [00:07<00:00, 31.28it/s, loss=13.5]\r\n",
+      "04/18 15:07: Train Epoch 54: average Loss: 13.406949, time: 7.83 s\r\n",
+      "Val Epoch 54: 100%|█████████████████| 81/81 [00:00<00:00, 152.88it/s, loss=15.5]\r\n",
+      "04/18 15:07: Val Epoch 54: average Loss: 14.085478, time: 0.53 s\r\n",
+      "Test Epoch 54: 100%|██████████████████| 82/82 [00:00<00:00, 157.12it/s, loss=16]\r\n",
+      "04/18 15:07: Test Epoch 54: average Loss: 15.492622, time: 0.52 s\r\n",
+      "04/18 15:07: Best validation model saved!\r\n",
+      "Train Epoch 55: 100%|██████████████| 245/245 [00:07<00:00, 31.32it/s, loss=14.1]\r\n",
+      "04/18 15:07: Train Epoch 55: average Loss: 13.409812, time: 7.82 s\r\n",
+      "Val Epoch 55: 100%|█████████████████| 81/81 [00:00<00:00, 152.89it/s, loss=16.3]\r\n",
+      "04/18 15:07: Val Epoch 55: average Loss: 14.175583, time: 0.53 s\r\n",
+      "Test Epoch 55: 100%|████████████████| 82/82 [00:00<00:00, 156.92it/s, loss=16.4]\r\n",
+      "04/18 15:07: Test Epoch 55: average Loss: 15.451688, time: 0.52 s\r\n",
+      "Train Epoch 56: 100%|██████████████| 245/245 [00:07<00:00, 31.38it/s, loss=13.2]\r\n",
+      "04/18 15:07: Train Epoch 56: average Loss: 13.343708, time: 7.81 s\r\n",
+      "Val Epoch 56: 100%|█████████████████| 81/81 [00:00<00:00, 153.01it/s, loss=15.7]\r\n",
+      "04/18 15:07: Val Epoch 56: average Loss: 14.285364, time: 0.53 s\r\n",
+      "Test Epoch 56: 100%|████████████████| 82/82 [00:00<00:00, 156.46it/s, loss=16.1]\r\n",
+      "04/18 15:07: Test Epoch 56: average Loss: 15.705351, time: 0.52 s\r\n",
+      "Train Epoch 57: 100%|██████████████| 245/245 [00:07<00:00, 31.22it/s, loss=13.6]\r\n",
+      "04/18 15:07: Train Epoch 57: average Loss: 13.409340, time: 7.85 s\r\n",
+      "Val Epoch 57: 100%|█████████████████| 81/81 [00:00<00:00, 152.17it/s, loss=15.7]\r\n",
+      "04/18 15:07: Val Epoch 57: average Loss: 14.305532, time: 0.53 s\r\n",
+      "Test Epoch 57: 100%|██████████████████| 82/82 [00:00<00:00, 156.68it/s, loss=16]\r\n",
+      "04/18 15:07: Test Epoch 57: average Loss: 15.948924, time: 0.52 s\r\n",
+      "Train Epoch 58: 100%|██████████████| 245/245 [00:07<00:00, 31.31it/s, loss=13.8]\r\n",
+      "04/18 15:07: Train Epoch 58: average Loss: 13.377977, time: 7.82 s\r\n",
+      "Val Epoch 58: 100%|█████████████████| 81/81 [00:00<00:00, 153.05it/s, loss=15.7]\r\n",
+      "04/18 15:07: Val Epoch 58: average Loss: 14.306823, time: 0.53 s\r\n",
+      "Test Epoch 58: 100%|████████████████| 82/82 [00:00<00:00, 157.18it/s, loss=16.4]\r\n",
+      "04/18 15:07: Test Epoch 58: average Loss: 15.842773, time: 0.52 s\r\n",
+      "Train Epoch 59: 100%|██████████████| 245/245 [00:07<00:00, 31.18it/s, loss=13.2]\r\n",
+      "04/18 15:08: Train Epoch 59: average Loss: 13.301966, time: 7.86 s\r\n",
+      "Val Epoch 59: 100%|█████████████████| 81/81 [00:00<00:00, 153.09it/s, loss=15.7]\r\n",
+      "04/18 15:08: Val Epoch 59: average Loss: 14.109362, time: 0.53 s\r\n",
+      "Test Epoch 59: 100%|████████████████| 82/82 [00:00<00:00, 157.24it/s, loss=16.1]\r\n",
+      "04/18 15:08: Test Epoch 59: average Loss: 15.568573, time: 0.52 s\r\n",
+      "Train Epoch 60: 100%|██████████████| 245/245 [00:07<00:00, 31.19it/s, loss=12.7]\r\n",
+      "04/18 15:08: Train Epoch 60: average Loss: 13.385729, time: 7.86 s\r\n",
+      "Val Epoch 60: 100%|█████████████████| 81/81 [00:00<00:00, 152.87it/s, loss=16.2]\r\n",
+      "04/18 15:08: Val Epoch 60: average Loss: 14.126553, time: 0.53 s\r\n",
+      "Test Epoch 60: 100%|████████████████| 82/82 [00:00<00:00, 157.39it/s, loss=16.1]\r\n",
+      "04/18 15:08: Test Epoch 60: average Loss: 15.491593, time: 0.52 s\r\n",
+      "Train Epoch 61: 100%|██████████████| 245/245 [00:07<00:00, 31.09it/s, loss=13.6]\r\n",
+      "04/18 15:08: Train Epoch 61: average Loss: 13.286482, time: 7.88 s\r\n",
+      "Val Epoch 61: 100%|█████████████████| 81/81 [00:00<00:00, 153.04it/s, loss=16.3]\r\n",
+      "04/18 15:08: Val Epoch 61: average Loss: 14.084741, time: 0.53 s\r\n",
+      "Test Epoch 61: 100%|██████████████████| 82/82 [00:00<00:00, 156.91it/s, loss=16]\r\n",
+      "04/18 15:08: Test Epoch 61: average Loss: 15.658612, time: 0.52 s\r\n",
+      "04/18 15:08: Best validation model saved!\r\n",
+      "Train Epoch 62: 100%|██████████████| 245/245 [00:07<00:00, 31.19it/s, loss=13.3]\r\n",
+      "04/18 15:08: Train Epoch 62: average Loss: 13.347921, time: 7.86 s\r\n",
+      "Val Epoch 62: 100%|█████████████████| 81/81 [00:00<00:00, 153.04it/s, loss=15.6]\r\n",
+      "04/18 15:08: Val Epoch 62: average Loss: 14.040155, time: 0.53 s\r\n",
+      "Test Epoch 62: 100%|████████████████| 82/82 [00:00<00:00, 156.77it/s, loss=16.1]\r\n",
+      "04/18 15:08: Test Epoch 62: average Loss: 15.638042, time: 0.52 s\r\n",
+      "04/18 15:08: Best validation model saved!\r\n",
+      "Train Epoch 63: 100%|██████████████| 245/245 [00:07<00:00, 31.80it/s, loss=13.3]\r\n",
+      "04/18 15:08: Train Epoch 63: average Loss: 13.222051, time: 7.70 s\r\n",
+      "Val Epoch 63: 100%|█████████████████| 81/81 [00:00<00:00, 153.42it/s, loss=16.2]\r\n",
+      "04/18 15:08: Val Epoch 63: average Loss: 14.072032, time: 0.53 s\r\n",
+      "Test Epoch 63: 100%|████████████████| 82/82 [00:00<00:00, 157.30it/s, loss=16.3]\r\n",
+      "04/18 15:08: Test Epoch 63: average Loss: 15.368541, time: 0.52 s\r\n",
+      "Train Epoch 64: 100%|████████████████| 245/245 [00:07<00:00, 31.31it/s, loss=14]\r\n",
+      "04/18 15:08: Train Epoch 64: average Loss: 13.217104, time: 7.83 s\r\n",
+      "Val Epoch 64: 100%|█████████████████| 81/81 [00:00<00:00, 153.10it/s, loss=15.8]\r\n",
+      "04/18 15:08: Val Epoch 64: average Loss: 14.066114, time: 0.53 s\r\n",
+      "Test Epoch 64: 100%|████████████████| 82/82 [00:00<00:00, 157.19it/s, loss=16.4]\r\n",
+      "04/18 15:08: Test Epoch 64: average Loss: 15.548564, time: 0.52 s\r\n",
+      "Train Epoch 65: 100%|████████████████| 245/245 [00:07<00:00, 31.01it/s, loss=13]\r\n",
+      "04/18 15:08: Train Epoch 65: average Loss: 13.257094, time: 7.90 s\r\n",
+      "Val Epoch 65: 100%|█████████████████| 81/81 [00:00<00:00, 153.00it/s, loss=15.6]\r\n",
+      "04/18 15:08: Val Epoch 65: average Loss: 14.020536, time: 0.53 s\r\n",
+      "Test Epoch 65: 100%|████████████████| 82/82 [00:00<00:00, 157.31it/s, loss=16.1]\r\n",
+      "04/18 15:08: Test Epoch 65: average Loss: 15.546160, time: 0.52 s\r\n",
+      "04/18 15:08: Best validation model saved!\r\n",
+      "Train Epoch 66: 100%|████████████████| 245/245 [00:07<00:00, 31.12it/s, loss=13]\r\n",
+      "04/18 15:09: Train Epoch 66: average Loss: 13.212056, time: 7.87 s\r\n",
+      "Val Epoch 66: 100%|█████████████████| 81/81 [00:00<00:00, 153.09it/s, loss=15.6]\r\n",
+      "04/18 15:09: Val Epoch 66: average Loss: 14.046396, time: 0.53 s\r\n",
+      "Test Epoch 66: 100%|████████████████| 82/82 [00:00<00:00, 157.34it/s, loss=15.9]\r\n",
+      "04/18 15:09: Test Epoch 66: average Loss: 15.670768, time: 0.52 s\r\n",
+      "Train Epoch 67: 100%|██████████████| 245/245 [00:07<00:00, 31.14it/s, loss=12.7]\r\n",
+      "04/18 15:09: Train Epoch 67: average Loss: 13.186405, time: 7.87 s\r\n",
+      "Val Epoch 67: 100%|█████████████████| 81/81 [00:00<00:00, 153.15it/s, loss=15.7]\r\n",
+      "04/18 15:09: Val Epoch 67: average Loss: 14.119034, time: 0.53 s\r\n",
+      "Test Epoch 67: 100%|████████████████| 82/82 [00:00<00:00, 157.05it/s, loss=16.2]\r\n",
+      "04/18 15:09: Test Epoch 67: average Loss: 15.613195, time: 0.52 s\r\n",
+      "Train Epoch 68: 100%|██████████████| 245/245 [00:07<00:00, 31.01it/s, loss=13.1]\r\n",
+      "04/18 15:09: Train Epoch 68: average Loss: 13.211606, time: 7.90 s\r\n",
+      "Val Epoch 68: 100%|█████████████████| 81/81 [00:00<00:00, 152.86it/s, loss=15.4]\r\n",
+      "04/18 15:09: Val Epoch 68: average Loss: 14.185503, time: 0.53 s\r\n",
+      "Test Epoch 68: 100%|████████████████| 82/82 [00:00<00:00, 157.27it/s, loss=15.9]\r\n",
+      "04/18 15:09: Test Epoch 68: average Loss: 15.765433, time: 0.52 s\r\n",
+      "Train Epoch 69: 100%|██████████████| 245/245 [00:07<00:00, 31.00it/s, loss=13.7]\r\n",
+      "04/18 15:09: Train Epoch 69: average Loss: 13.258614, time: 7.90 s\r\n",
+      "Val Epoch 69: 100%|█████████████████| 81/81 [00:00<00:00, 153.13it/s, loss=15.7]\r\n",
+      "04/18 15:09: Val Epoch 69: average Loss: 13.953542, time: 0.53 s\r\n",
+      "Test Epoch 69: 100%|████████████████| 82/82 [00:00<00:00, 157.23it/s, loss=16.1]\r\n",
+      "04/18 15:09: Test Epoch 69: average Loss: 15.602754, time: 0.52 s\r\n",
+      "04/18 15:09: Best validation model saved!\r\n",
+      "Train Epoch 70: 100%|████████████████| 245/245 [00:07<00:00, 30.92it/s, loss=13]\r\n",
+      "04/18 15:09: Train Epoch 70: average Loss: 13.152433, time: 7.92 s\r\n",
+      "Val Epoch 70: 100%|█████████████████| 81/81 [00:00<00:00, 152.32it/s, loss=15.8]\r\n",
+      "04/18 15:09: Val Epoch 70: average Loss: 14.054483, time: 0.53 s\r\n",
+      "Test Epoch 70: 100%|████████████████| 82/82 [00:00<00:00, 157.32it/s, loss=15.7]\r\n",
+      "04/18 15:09: Test Epoch 70: average Loss: 15.475987, time: 0.52 s\r\n",
+      "Train Epoch 71: 100%|██████████████| 245/245 [00:07<00:00, 30.81it/s, loss=13.2]\r\n",
+      "04/18 15:09: Train Epoch 71: average Loss: 13.175334, time: 7.95 s\r\n",
+      "Val Epoch 71: 100%|█████████████████| 81/81 [00:00<00:00, 152.19it/s, loss=15.8]\r\n",
+      "04/18 15:09: Val Epoch 71: average Loss: 13.995604, time: 0.53 s\r\n",
+      "Test Epoch 71: 100%|████████████████| 82/82 [00:00<00:00, 157.27it/s, loss=16.1]\r\n",
+      "04/18 15:09: Test Epoch 71: average Loss: 15.546816, time: 0.52 s\r\n",
+      "Train Epoch 72: 100%|██████████████| 245/245 [00:07<00:00, 31.02it/s, loss=13.1]\r\n",
+      "04/18 15:09: Train Epoch 72: average Loss: 13.150165, time: 7.90 s\r\n",
+      "Val Epoch 72: 100%|█████████████████| 81/81 [00:00<00:00, 153.29it/s, loss=16.3]\r\n",
+      "04/18 15:09: Val Epoch 72: average Loss: 14.204324, time: 0.53 s\r\n",
+      "Test Epoch 72: 100%|████████████████| 82/82 [00:00<00:00, 157.32it/s, loss=16.7]\r\n",
+      "04/18 15:09: Test Epoch 72: average Loss: 15.644656, time: 0.52 s\r\n",
+      "Train Epoch 73: 100%|██████████████| 245/245 [00:07<00:00, 31.00it/s, loss=12.3]\r\n",
+      "04/18 15:10: Train Epoch 73: average Loss: 13.143224, time: 7.90 s\r\n",
+      "Val Epoch 73: 100%|█████████████████| 81/81 [00:00<00:00, 152.83it/s, loss=15.7]\r\n",
+      "04/18 15:10: Val Epoch 73: average Loss: 13.991921, time: 0.53 s\r\n",
+      "Test Epoch 73: 100%|████████████████| 82/82 [00:00<00:00, 157.04it/s, loss=16.1]\r\n",
+      "04/18 15:10: Test Epoch 73: average Loss: 15.508103, time: 0.52 s\r\n",
+      "Train Epoch 74: 100%|██████████████| 245/245 [00:07<00:00, 31.11it/s, loss=12.5]\r\n",
+      "04/18 15:10: Train Epoch 74: average Loss: 13.137577, time: 7.88 s\r\n",
+      "Val Epoch 74: 100%|█████████████████| 81/81 [00:00<00:00, 153.04it/s, loss=15.6]\r\n",
+      "04/18 15:10: Val Epoch 74: average Loss: 14.068306, time: 0.53 s\r\n",
+      "Test Epoch 74: 100%|████████████████| 82/82 [00:00<00:00, 157.28it/s, loss=15.8]\r\n",
+      "04/18 15:10: Test Epoch 74: average Loss: 15.737338, time: 0.52 s\r\n",
+      "Train Epoch 75: 100%|██████████████| 245/245 [00:07<00:00, 31.07it/s, loss=13.4]\r\n",
+      "04/18 15:10: Train Epoch 75: average Loss: 13.116231, time: 7.89 s\r\n",
+      "Val Epoch 75: 100%|█████████████████| 81/81 [00:00<00:00, 153.01it/s, loss=15.5]\r\n",
+      "04/18 15:10: Val Epoch 75: average Loss: 14.047802, time: 0.53 s\r\n",
+      "Test Epoch 75: 100%|████████████████| 82/82 [00:00<00:00, 156.12it/s, loss=16.1]\r\n",
+      "04/18 15:10: Test Epoch 75: average Loss: 15.551578, time: 0.53 s\r\n",
+      "Train Epoch 76: 100%|████████████████| 245/245 [00:07<00:00, 30.92it/s, loss=13]\r\n",
+      "04/18 15:10: Train Epoch 76: average Loss: 13.116917, time: 7.92 s\r\n",
+      "Val Epoch 76: 100%|█████████████████| 81/81 [00:00<00:00, 153.09it/s, loss=15.9]\r\n",
+      "04/18 15:10: Val Epoch 76: average Loss: 14.126304, time: 0.53 s\r\n",
+      "Test Epoch 76: 100%|████████████████| 82/82 [00:00<00:00, 156.69it/s, loss=16.1]\r\n",
+      "04/18 15:10: Test Epoch 76: average Loss: 15.759728, time: 0.52 s\r\n",
+      "Train Epoch 77: 100%|██████████████| 245/245 [00:07<00:00, 30.99it/s, loss=11.9]\r\n",
+      "04/18 15:10: Train Epoch 77: average Loss: 13.050524, time: 7.91 s\r\n",
+      "Val Epoch 77: 100%|█████████████████| 81/81 [00:00<00:00, 153.01it/s, loss=15.9]\r\n",
+      "04/18 15:10: Val Epoch 77: average Loss: 14.211436, time: 0.53 s\r\n",
+      "Test Epoch 77: 100%|████████████████| 82/82 [00:00<00:00, 156.57it/s, loss=16.4]\r\n",
+      "04/18 15:10: Test Epoch 77: average Loss: 15.901283, time: 0.52 s\r\n",
+      "Train Epoch 78: 100%|██████████████| 245/245 [00:07<00:00, 31.01it/s, loss=13.2]\r\n",
+      "04/18 15:10: Train Epoch 78: average Loss: 13.085345, time: 7.90 s\r\n",
+      "Val Epoch 78: 100%|█████████████████| 81/81 [00:00<00:00, 153.01it/s, loss=15.8]\r\n",
+      "04/18 15:10: Val Epoch 78: average Loss: 13.987891, time: 0.53 s\r\n",
+      "Test Epoch 78: 100%|████████████████| 82/82 [00:00<00:00, 157.29it/s, loss=16.3]\r\n",
+      "04/18 15:10: Test Epoch 78: average Loss: 15.596493, time: 0.52 s\r\n",
+      "Train Epoch 79: 100%|██████████████| 245/245 [00:07<00:00, 31.06it/s, loss=12.4]\r\n",
+      "04/18 15:11: Train Epoch 79: average Loss: 13.060934, time: 7.89 s\r\n",
+      "Val Epoch 79: 100%|█████████████████| 81/81 [00:00<00:00, 152.95it/s, loss=15.8]\r\n",
+      "04/18 15:11: Val Epoch 79: average Loss: 14.009014, time: 0.53 s\r\n",
+      "Test Epoch 79: 100%|████████████████| 82/82 [00:00<00:00, 157.14it/s, loss=16.3]\r\n",
+      "04/18 15:11: Test Epoch 79: average Loss: 15.575473, time: 0.52 s\r\n",
+      "Train Epoch 80: 100%|██████████████| 245/245 [00:07<00:00, 31.20it/s, loss=13.1]\r\n",
+      "04/18 15:11: Train Epoch 80: average Loss: 13.105933, time: 7.85 s\r\n",
+      "Val Epoch 80: 100%|█████████████████| 81/81 [00:00<00:00, 153.15it/s, loss=15.7]\r\n",
+      "04/18 15:11: Val Epoch 80: average Loss: 14.038473, time: 0.53 s\r\n",
+      "Test Epoch 80: 100%|████████████████| 82/82 [00:00<00:00, 157.28it/s, loss=16.2]\r\n",
+      "04/18 15:11: Test Epoch 80: average Loss: 15.604638, time: 0.52 s\r\n",
+      "Train Epoch 81: 100%|████████████████| 245/245 [00:07<00:00, 31.13it/s, loss=13]\r\n",
+      "04/18 15:11: Train Epoch 81: average Loss: 13.001884, time: 7.87 s\r\n",
+      "Val Epoch 81: 100%|█████████████████| 81/81 [00:00<00:00, 152.97it/s, loss=15.8]\r\n",
+      "04/18 15:11: Val Epoch 81: average Loss: 14.330843, time: 0.53 s\r\n",
+      "Test Epoch 81: 100%|████████████████| 82/82 [00:00<00:00, 157.11it/s, loss=15.9]\r\n",
+      "04/18 15:11: Test Epoch 81: average Loss: 15.987774, time: 0.52 s\r\n",
+      "Train Epoch 82: 100%|██████████████| 245/245 [00:07<00:00, 31.13it/s, loss=13.7]\r\n",
+      "04/18 15:11: Train Epoch 82: average Loss: 13.064929, time: 7.87 s\r\n",
+      "Val Epoch 82: 100%|█████████████████| 81/81 [00:00<00:00, 153.01it/s, loss=15.5]\r\n",
+      "04/18 15:11: Val Epoch 82: average Loss: 14.630300, time: 0.53 s\r\n",
+      "Test Epoch 82: 100%|██████████████████| 82/82 [00:00<00:00, 157.25it/s, loss=16]\r\n",
+      "04/18 15:11: Test Epoch 82: average Loss: 16.239021, time: 0.52 s\r\n",
+      "Train Epoch 83: 100%|██████████████| 245/245 [00:07<00:00, 31.06it/s, loss=13.1]\r\n",
+      "04/18 15:11: Train Epoch 83: average Loss: 13.021222, time: 7.89 s\r\n",
+      "Val Epoch 83: 100%|█████████████████| 81/81 [00:00<00:00, 153.00it/s, loss=15.3]\r\n",
+      "04/18 15:11: Val Epoch 83: average Loss: 14.105357, time: 0.53 s\r\n",
+      "Test Epoch 83: 100%|██████████████████| 82/82 [00:00<00:00, 157.27it/s, loss=16]\r\n",
+      "04/18 15:11: Test Epoch 83: average Loss: 15.693887, time: 0.52 s\r\n",
+      "Train Epoch 84: 100%|██████████████| 245/245 [00:07<00:00, 31.01it/s, loss=12.6]\r\n",
+      "04/18 15:11: Train Epoch 84: average Loss: 13.084181, time: 7.90 s\r\n",
+      "Val Epoch 84: 100%|█████████████████| 81/81 [00:00<00:00, 153.05it/s, loss=15.9]\r\n",
+      "04/18 15:11: Val Epoch 84: average Loss: 14.044975, time: 0.53 s\r\n",
+      "Test Epoch 84: 100%|████████████████| 82/82 [00:00<00:00, 157.30it/s, loss=16.1]\r\n",
+      "04/18 15:11: Test Epoch 84: average Loss: 15.575870, time: 0.52 s\r\n",
+      "04/18 15:11: Validation performance didn't improve for 15 epochs. Training stops.\r\n",
+      "04/18 15:11: Best models saved at /home/czzhangheng/code/TrafficWheel/experiments/PEMSD3/2025-04-18_14-59-15/best_model.pth and /home/czzhangheng/code/TrafficWheel/experiments/PEMSD3/2025-04-18_14-59-15/best_test_model.pth\r\n",
+      "04/18 15:11: Testing on best validation model\r\n",
+      "04/18 15:11: Horizon 01, MAE: 12.9172, RMSE: 22.0949, MAPE: 0.1290\r\n",
+      "04/18 15:11: Horizon 02, MAE: 13.4756, RMSE: 23.2657, MAPE: 0.1394\r\n",
+      "04/18 15:11: Horizon 03, MAE: 14.1431, RMSE: 24.5150, MAPE: 0.1469\r\n",
+      "04/18 15:11: Horizon 04, MAE: 14.6724, RMSE: 25.3758, MAPE: 0.1475\r\n",
+      "04/18 15:11: Horizon 05, MAE: 15.1623, RMSE: 26.1197, MAPE: 0.1514\r\n",
+      "04/18 15:11: Horizon 06, MAE: 15.5208, RMSE: 26.7770, MAPE: 0.1540\r\n",
+      "04/18 15:11: Horizon 07, MAE: 15.9325, RMSE: 27.5243, MAPE: 0.1570\r\n",
+      "04/18 15:11: Horizon 08, MAE: 16.3481, RMSE: 28.1662, MAPE: 0.1606\r\n",
+      "04/18 15:11: Horizon 09, MAE: 16.7424, RMSE: 28.6960, MAPE: 0.1632\r\n",
+      "04/18 15:11: Horizon 10, MAE: 17.2141, RMSE: 29.3761, MAPE: 0.1681\r\n",
+      "04/18 15:11: Horizon 11, MAE: 17.4599, RMSE: 29.8254, MAPE: 0.1665\r\n",
+      "04/18 15:11: Horizon 12, MAE: 17.6331, RMSE: 30.0955, MAPE: 0.1741\r\n",
+      "04/18 15:11: Average Horizon, MAE: 15.6018, RMSE: 26.9355, MAPE: 0.1548\r\n",
+      "04/18 15:11: Testing on best test model\r\n",
+      "04/18 15:11: Horizon 01, MAE: 12.7532, RMSE: 21.7661, MAPE: 0.1364\r\n",
+      "04/18 15:11: Horizon 02, MAE: 13.3888, RMSE: 23.1458, MAPE: 0.1369\r\n",
+      "04/18 15:11: Horizon 03, MAE: 14.0166, RMSE: 24.4320, MAPE: 0.1428\r\n",
+      "04/18 15:11: Horizon 04, MAE: 14.4526, RMSE: 25.2964, MAPE: 0.1442\r\n",
+      "04/18 15:11: Horizon 05, MAE: 14.9031, RMSE: 26.0261, MAPE: 0.1453\r\n",
+      "04/18 15:11: Horizon 06, MAE: 15.3006, RMSE: 26.7311, MAPE: 0.1504\r\n",
+      "04/18 15:11: Horizon 07, MAE: 15.7486, RMSE: 27.4594, MAPE: 0.1553\r\n",
+      "04/18 15:11: Horizon 08, MAE: 16.1170, RMSE: 28.0083, MAPE: 0.1569\r\n",
+      "04/18 15:11: Horizon 09, MAE: 16.4533, RMSE: 28.4856, MAPE: 0.1577\r\n",
+      "04/18 15:11: Horizon 10, MAE: 16.7552, RMSE: 28.9605, MAPE: 0.1573\r\n",
+      "04/18 15:11: Horizon 11, MAE: 17.0502, RMSE: 29.5060, MAPE: 0.1600\r\n",
+      "04/18 15:11: Horizon 12, MAE: 17.4587, RMSE: 30.1155, MAPE: 0.1722\r\n",
+      "04/18 15:11: Average Horizon, MAE: 15.3665, RMSE: 26.7775, MAPE: 0.1513\r\n"
      ]
     }
    ],
@@ -37,39 +592,1143 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 23,
    "id": "413ac989e85adf6a",
    "metadata": {
     "collapsed": false,
-    "is_executing": true
+    "ExecuteTime": {
+     "end_time": "2025-04-18T07:11:49.008980Z",
+     "start_time": "2025-04-18T07:11:49.004237Z"
+    }
    },
    "outputs": [],
    "source": []
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 24,
    "id": "ba12f992-f8a5-418d-a136-a4e1bdf0a3f3",
    "metadata": {
     "collapsed": false,
     "scrolled": true,
-    "is_executing": true
+    "ExecuteTime": {
+     "end_time": "2025-04-18T07:20:02.794680Z",
+     "start_time": "2025-04-18T07:11:49.011317Z"
+    }
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "加载 PEMSD8 数据集中... \r\n",
+      "Creat Log File in:  /home/czzhangheng/code/TrafficWheel/experiments/PEMSD8/2025-04-18_15-11-57/run.log\r\n",
+      "04/18 15:11: Experiment log path in: /home/czzhangheng/code/TrafficWheel/experiments/PEMSD8/2025-04-18_15-11-57\r\n",
+      "04/18 15:11: Training process started\r\n",
+      "Train Epoch 1: 100%|███████████████| 167/167 [00:05<00:00, 30.48it/s, loss=20.9]\r\n",
+      "04/18 15:12: Train Epoch 1: average Loss: 50.214634, time: 5.48 s\r\n",
+      "Val Epoch 1: 100%|██████████████████| 55/55 [00:00<00:00, 227.24it/s, loss=30.4]\r\n",
+      "04/18 15:12: Val Epoch 1: average Loss: 22.711295, time: 0.24 s\r\n",
+      "Test Epoch 1: 100%|███████████████████| 56/56 [00:00<00:00, 254.66it/s, loss=19]\r\n",
+      "04/18 15:12: Test Epoch 1: average Loss: 21.988754, time: 0.22 s\r\n",
+      "04/18 15:12: Best validation model saved!\r\n",
+      "Train Epoch 2: 100%|███████████████| 167/167 [00:05<00:00, 31.99it/s, loss=21.2]\r\n",
+      "04/18 15:12: Train Epoch 2: average Loss: 21.884882, time: 5.22 s\r\n",
+      "Val Epoch 2: 100%|██████████████████| 55/55 [00:00<00:00, 228.38it/s, loss=27.9]\r\n",
+      "04/18 15:12: Val Epoch 2: average Loss: 20.506583, time: 0.24 s\r\n",
+      "Test Epoch 2: 100%|█████████████████| 56/56 [00:00<00:00, 255.79it/s, loss=16.5]\r\n",
+      "04/18 15:12: Test Epoch 2: average Loss: 19.861309, time: 0.22 s\r\n",
+      "04/18 15:12: Best validation model saved!\r\n",
+      "Train Epoch 3: 100%|███████████████| 167/167 [00:05<00:00, 32.04it/s, loss=20.4]\r\n",
+      "04/18 15:12: Train Epoch 3: average Loss: 20.049448, time: 5.21 s\r\n",
+      "Val Epoch 3: 100%|████████████████████| 55/55 [00:00<00:00, 228.73it/s, loss=25]\r\n",
+      "04/18 15:12: Val Epoch 3: average Loss: 19.088876, time: 0.24 s\r\n",
+      "Test Epoch 3: 100%|█████████████████| 56/56 [00:00<00:00, 256.21it/s, loss=14.9]\r\n",
+      "04/18 15:12: Test Epoch 3: average Loss: 18.462040, time: 0.22 s\r\n",
+      "04/18 15:12: Best validation model saved!\r\n",
+      "Train Epoch 4: 100%|███████████████| 167/167 [00:05<00:00, 32.10it/s, loss=19.9]\r\n",
+      "04/18 15:12: Train Epoch 4: average Loss: 19.192811, time: 5.20 s\r\n",
+      "Val Epoch 4: 100%|██████████████████| 55/55 [00:00<00:00, 229.46it/s, loss=24.3]\r\n",
+      "04/18 15:12: Val Epoch 4: average Loss: 18.616157, time: 0.24 s\r\n",
+      "Test Epoch 4: 100%|█████████████████| 56/56 [00:00<00:00, 257.08it/s, loss=14.3]\r\n",
+      "04/18 15:12: Test Epoch 4: average Loss: 18.032373, time: 0.22 s\r\n",
+      "04/18 15:12: Best validation model saved!\r\n",
+      "Train Epoch 5: 100%|███████████████| 167/167 [00:05<00:00, 32.11it/s, loss=18.4]\r\n",
+      "04/18 15:12: Train Epoch 5: average Loss: 18.843339, time: 5.20 s\r\n",
+      "Val Epoch 5: 100%|████████████████████| 55/55 [00:00<00:00, 229.47it/s, loss=24]\r\n",
+      "04/18 15:12: Val Epoch 5: average Loss: 18.529398, time: 0.24 s\r\n",
+      "Test Epoch 5: 100%|█████████████████| 56/56 [00:00<00:00, 257.13it/s, loss=14.6]\r\n",
+      "04/18 15:12: Test Epoch 5: average Loss: 17.933063, time: 0.22 s\r\n",
+      "04/18 15:12: Best validation model saved!\r\n",
+      "Train Epoch 6: 100%|███████████████| 167/167 [00:04<00:00, 33.78it/s, loss=17.3]\r\n",
+      "04/18 15:12: Train Epoch 6: average Loss: 18.314409, time: 4.94 s\r\n",
+      "Val Epoch 6: 100%|████████████████████| 55/55 [00:00<00:00, 247.29it/s, loss=23]\r\n",
+      "04/18 15:12: Val Epoch 6: average Loss: 17.916487, time: 0.22 s\r\n",
+      "Test Epoch 6: 100%|█████████████████| 56/56 [00:00<00:00, 283.64it/s, loss=14.1]\r\n",
+      "04/18 15:12: Test Epoch 6: average Loss: 17.403896, time: 0.20 s\r\n",
+      "04/18 15:12: Best validation model saved!\r\n",
+      "Train Epoch 7: 100%|███████████████| 167/167 [00:05<00:00, 31.48it/s, loss=17.9]\r\n",
+      "04/18 15:12: Train Epoch 7: average Loss: 17.944801, time: 5.31 s\r\n",
+      "Val Epoch 7: 100%|██████████████████| 55/55 [00:00<00:00, 228.92it/s, loss=22.6]\r\n",
+      "04/18 15:12: Val Epoch 7: average Loss: 17.618471, time: 0.24 s\r\n",
+      "Test Epoch 7: 100%|█████████████████| 56/56 [00:00<00:00, 257.22it/s, loss=13.8]\r\n",
+      "04/18 15:12: Test Epoch 7: average Loss: 17.098750, time: 0.22 s\r\n",
+      "04/18 15:12: Best validation model saved!\r\n",
+      "Train Epoch 8: 100%|███████████████| 167/167 [00:05<00:00, 30.73it/s, loss=17.2]\r\n",
+      "04/18 15:12: Train Epoch 8: average Loss: 17.525567, time: 5.44 s\r\n",
+      "Val Epoch 8: 100%|██████████████████| 55/55 [00:00<00:00, 228.09it/s, loss=21.9]\r\n",
+      "04/18 15:12: Val Epoch 8: average Loss: 17.163922, time: 0.24 s\r\n",
+      "Test Epoch 8: 100%|█████████████████| 56/56 [00:00<00:00, 255.28it/s, loss=13.6]\r\n",
+      "04/18 15:12: Test Epoch 8: average Loss: 16.654691, time: 0.22 s\r\n",
+      "04/18 15:12: Best validation model saved!\r\n",
+      "Train Epoch 9: 100%|███████████████| 167/167 [00:05<00:00, 30.47it/s, loss=16.9]\r\n",
+      "04/18 15:12: Train Epoch 9: average Loss: 17.373678, time: 5.48 s\r\n",
+      "Val Epoch 9: 100%|██████████████████| 55/55 [00:00<00:00, 226.65it/s, loss=22.2]\r\n",
+      "04/18 15:12: Val Epoch 9: average Loss: 17.393715, time: 0.24 s\r\n",
+      "Test Epoch 9: 100%|███████████████████| 56/56 [00:00<00:00, 250.56it/s, loss=14]\r\n",
+      "04/18 15:12: Test Epoch 9: average Loss: 16.874950, time: 0.22 s\r\n",
+      "Train Epoch 10: 100%|██████████████| 167/167 [00:05<00:00, 30.50it/s, loss=16.5]\r\n",
+      "04/18 15:12: Train Epoch 10: average Loss: 17.045280, time: 5.48 s\r\n",
+      "Val Epoch 10: 100%|█████████████████| 55/55 [00:00<00:00, 227.19it/s, loss=21.3]\r\n",
+      "04/18 15:12: Val Epoch 10: average Loss: 16.825079, time: 0.24 s\r\n",
+      "Test Epoch 10: 100%|████████████████| 56/56 [00:00<00:00, 253.63it/s, loss=13.7]\r\n",
+      "04/18 15:12: Test Epoch 10: average Loss: 16.373530, time: 0.22 s\r\n",
+      "04/18 15:12: Best validation model saved!\r\n",
+      "Train Epoch 11: 100%|████████████████| 167/167 [00:05<00:00, 30.57it/s, loss=16]\r\n",
+      "04/18 15:13: Train Epoch 11: average Loss: 16.787027, time: 5.46 s\r\n",
+      "Val Epoch 11: 100%|█████████████████| 55/55 [00:00<00:00, 228.03it/s, loss=20.9]\r\n",
+      "04/18 15:13: Val Epoch 11: average Loss: 16.489394, time: 0.24 s\r\n",
+      "Test Epoch 11: 100%|████████████████| 56/56 [00:00<00:00, 255.15it/s, loss=13.5]\r\n",
+      "04/18 15:13: Test Epoch 11: average Loss: 16.024435, time: 0.22 s\r\n",
+      "04/18 15:13: Best validation model saved!\r\n",
+      "Train Epoch 12: 100%|██████████████| 167/167 [00:05<00:00, 30.67it/s, loss=16.3]\r\n",
+      "04/18 15:13: Train Epoch 12: average Loss: 16.567867, time: 5.45 s\r\n",
+      "Val Epoch 12: 100%|█████████████████| 55/55 [00:00<00:00, 227.47it/s, loss=20.8]\r\n",
+      "04/18 15:13: Val Epoch 12: average Loss: 16.698400, time: 0.24 s\r\n",
+      "Test Epoch 12: 100%|████████████████| 56/56 [00:00<00:00, 254.71it/s, loss=13.8]\r\n",
+      "04/18 15:13: Test Epoch 12: average Loss: 16.300587, time: 0.22 s\r\n",
+      "Train Epoch 13: 100%|██████████████| 167/167 [00:05<00:00, 30.61it/s, loss=17.8]\r\n",
+      "04/18 15:13: Train Epoch 13: average Loss: 16.642440, time: 5.46 s\r\n",
+      "Val Epoch 13: 100%|█████████████████| 55/55 [00:00<00:00, 226.84it/s, loss=20.8]\r\n",
+      "04/18 15:13: Val Epoch 13: average Loss: 17.077953, time: 0.24 s\r\n",
+      "Test Epoch 13: 100%|████████████████| 56/56 [00:00<00:00, 254.79it/s, loss=13.7]\r\n",
+      "04/18 15:13: Test Epoch 13: average Loss: 16.662134, time: 0.22 s\r\n",
+      "Train Epoch 14: 100%|██████████████| 167/167 [00:05<00:00, 30.59it/s, loss=17.6]\r\n",
+      "04/18 15:13: Train Epoch 14: average Loss: 16.466119, time: 5.46 s\r\n",
+      "Val Epoch 14: 100%|█████████████████| 55/55 [00:00<00:00, 225.91it/s, loss=20.3]\r\n",
+      "04/18 15:13: Val Epoch 14: average Loss: 16.320250, time: 0.24 s\r\n",
+      "Test Epoch 14: 100%|████████████████| 56/56 [00:00<00:00, 255.12it/s, loss=13.6]\r\n",
+      "04/18 15:13: Test Epoch 14: average Loss: 15.925672, time: 0.22 s\r\n",
+      "04/18 15:13: Best validation model saved!\r\n",
+      "Train Epoch 15: 100%|██████████████| 167/167 [00:05<00:00, 30.56it/s, loss=16.4]\r\n",
+      "04/18 15:13: Train Epoch 15: average Loss: 16.323008, time: 5.47 s\r\n",
+      "Val Epoch 15: 100%|█████████████████| 55/55 [00:00<00:00, 227.01it/s, loss=20.1]\r\n",
+      "04/18 15:13: Val Epoch 15: average Loss: 16.468653, time: 0.24 s\r\n",
+      "Test Epoch 15: 100%|████████████████| 56/56 [00:00<00:00, 254.09it/s, loss=13.7]\r\n",
+      "04/18 15:13: Test Epoch 15: average Loss: 16.159190, time: 0.22 s\r\n",
+      "Train Epoch 16: 100%|██████████████| 167/167 [00:05<00:00, 30.61it/s, loss=16.2]\r\n",
+      "04/18 15:13: Train Epoch 16: average Loss: 16.103049, time: 5.46 s\r\n",
+      "Val Epoch 16: 100%|█████████████████| 55/55 [00:00<00:00, 227.69it/s, loss=20.2]\r\n",
+      "04/18 15:13: Val Epoch 16: average Loss: 16.179243, time: 0.24 s\r\n",
+      "Test Epoch 16: 100%|████████████████| 56/56 [00:00<00:00, 255.87it/s, loss=13.7]\r\n",
+      "04/18 15:13: Test Epoch 16: average Loss: 15.922677, time: 0.22 s\r\n",
+      "04/18 15:13: Best validation model saved!\r\n",
+      "Train Epoch 17: 100%|██████████████| 167/167 [00:05<00:00, 30.62it/s, loss=16.5]\r\n",
+      "04/18 15:13: Train Epoch 17: average Loss: 15.973722, time: 5.45 s\r\n",
+      "Val Epoch 17: 100%|█████████████████| 55/55 [00:00<00:00, 226.74it/s, loss=19.7]\r\n",
+      "04/18 15:13: Val Epoch 17: average Loss: 15.876451, time: 0.24 s\r\n",
+      "Test Epoch 17: 100%|████████████████| 56/56 [00:00<00:00, 254.97it/s, loss=13.3]\r\n",
+      "04/18 15:13: Test Epoch 17: average Loss: 15.578746, time: 0.22 s\r\n",
+      "04/18 15:13: Best validation model saved!\r\n",
+      "Train Epoch 18: 100%|██████████████| 167/167 [00:05<00:00, 30.71it/s, loss=15.7]\r\n",
+      "04/18 15:13: Train Epoch 18: average Loss: 16.256262, time: 5.44 s\r\n",
+      "Val Epoch 18: 100%|█████████████████| 55/55 [00:00<00:00, 227.40it/s, loss=19.8]\r\n",
+      "04/18 15:13: Val Epoch 18: average Loss: 15.894624, time: 0.24 s\r\n",
+      "Test Epoch 18: 100%|████████████████| 56/56 [00:00<00:00, 254.56it/s, loss=13.3]\r\n",
+      "04/18 15:13: Test Epoch 18: average Loss: 15.551777, time: 0.22 s\r\n",
+      "Train Epoch 19: 100%|██████████████| 167/167 [00:05<00:00, 30.63it/s, loss=14.6]\r\n",
+      "04/18 15:13: Train Epoch 19: average Loss: 15.812353, time: 5.45 s\r\n",
+      "Val Epoch 19: 100%|█████████████████| 55/55 [00:00<00:00, 226.20it/s, loss=20.1]\r\n",
+      "04/18 15:13: Val Epoch 19: average Loss: 16.090156, time: 0.24 s\r\n",
+      "Test Epoch 19: 100%|████████████████| 56/56 [00:00<00:00, 255.98it/s, loss=13.4]\r\n",
+      "04/18 15:13: Test Epoch 19: average Loss: 15.726525, time: 0.22 s\r\n",
+      "Train Epoch 20: 100%|██████████████| 167/167 [00:05<00:00, 30.67it/s, loss=15.9]\r\n",
+      "04/18 15:13: Train Epoch 20: average Loss: 15.780698, time: 5.45 s\r\n",
+      "Val Epoch 20: 100%|███████████████████| 55/55 [00:00<00:00, 226.55it/s, loss=20]\r\n",
+      "04/18 15:13: Val Epoch 20: average Loss: 16.308764, time: 0.24 s\r\n",
+      "Test Epoch 20: 100%|████████████████| 56/56 [00:00<00:00, 254.69it/s, loss=14.5]\r\n",
+      "04/18 15:13: Test Epoch 20: average Loss: 15.972266, time: 0.22 s\r\n",
+      "Train Epoch 21: 100%|██████████████| 167/167 [00:05<00:00, 30.68it/s, loss=16.2]\r\n",
+      "04/18 15:13: Train Epoch 21: average Loss: 15.723604, time: 5.44 s\r\n",
+      "Val Epoch 21: 100%|█████████████████| 55/55 [00:00<00:00, 227.52it/s, loss=19.9]\r\n",
+      "04/18 15:14: Val Epoch 21: average Loss: 16.222427, time: 0.24 s\r\n",
+      "Test Epoch 21: 100%|████████████████| 56/56 [00:00<00:00, 255.39it/s, loss=13.6]\r\n",
+      "04/18 15:14: Test Epoch 21: average Loss: 15.828279, time: 0.22 s\r\n",
+      "Train Epoch 22: 100%|██████████████| 167/167 [00:05<00:00, 30.72it/s, loss=15.4]\r\n",
+      "04/18 15:14: Train Epoch 22: average Loss: 15.801917, time: 5.44 s\r\n",
+      "Val Epoch 22: 100%|███████████████████| 55/55 [00:00<00:00, 227.56it/s, loss=20]\r\n",
+      "04/18 15:14: Val Epoch 22: average Loss: 16.132965, time: 0.24 s\r\n",
+      "Test Epoch 22: 100%|████████████████| 56/56 [00:00<00:00, 255.58it/s, loss=13.3]\r\n",
+      "04/18 15:14: Test Epoch 22: average Loss: 15.737014, time: 0.22 s\r\n",
+      "Train Epoch 23: 100%|██████████████| 167/167 [00:05<00:00, 30.70it/s, loss=15.2]\r\n",
+      "04/18 15:14: Train Epoch 23: average Loss: 15.618692, time: 5.44 s\r\n",
+      "Val Epoch 23: 100%|█████████████████| 55/55 [00:00<00:00, 227.64it/s, loss=19.6]\r\n",
+      "04/18 15:14: Val Epoch 23: average Loss: 15.933474, time: 0.24 s\r\n",
+      "Test Epoch 23: 100%|████████████████| 56/56 [00:00<00:00, 254.98it/s, loss=13.6]\r\n",
+      "04/18 15:14: Test Epoch 23: average Loss: 15.648318, time: 0.22 s\r\n",
+      "Train Epoch 24: 100%|████████████████| 167/167 [00:05<00:00, 30.74it/s, loss=15]\r\n",
+      "04/18 15:14: Train Epoch 24: average Loss: 15.618875, time: 5.43 s\r\n",
+      "Val Epoch 24: 100%|█████████████████| 55/55 [00:00<00:00, 227.72it/s, loss=19.1]\r\n",
+      "04/18 15:14: Val Epoch 24: average Loss: 15.686576, time: 0.24 s\r\n",
+      "Test Epoch 24: 100%|████████████████| 56/56 [00:00<00:00, 252.64it/s, loss=13.5]\r\n",
+      "04/18 15:14: Test Epoch 24: average Loss: 15.438568, time: 0.22 s\r\n",
+      "04/18 15:14: Best validation model saved!\r\n",
+      "Train Epoch 25: 100%|██████████████| 167/167 [00:05<00:00, 30.68it/s, loss=16.5]\r\n",
+      "04/18 15:14: Train Epoch 25: average Loss: 15.409614, time: 5.44 s\r\n",
+      "Val Epoch 25: 100%|█████████████████| 55/55 [00:00<00:00, 226.42it/s, loss=20.2]\r\n",
+      "04/18 15:14: Val Epoch 25: average Loss: 16.311260, time: 0.24 s\r\n",
+      "Test Epoch 25: 100%|████████████████| 56/56 [00:00<00:00, 253.40it/s, loss=14.9]\r\n",
+      "04/18 15:14: Test Epoch 25: average Loss: 16.133646, time: 0.22 s\r\n",
+      "Train Epoch 26: 100%|██████████████| 167/167 [00:05<00:00, 30.62it/s, loss=15.9]\r\n",
+      "04/18 15:14: Train Epoch 26: average Loss: 15.628087, time: 5.45 s\r\n",
+      "Val Epoch 26: 100%|█████████████████| 55/55 [00:00<00:00, 227.50it/s, loss=19.3]\r\n",
+      "04/18 15:14: Val Epoch 26: average Loss: 16.098852, time: 0.24 s\r\n",
+      "Test Epoch 26: 100%|████████████████| 56/56 [00:00<00:00, 255.32it/s, loss=13.9]\r\n",
+      "04/18 15:14: Test Epoch 26: average Loss: 15.840997, time: 0.22 s\r\n",
+      "Train Epoch 27: 100%|████████████████| 167/167 [00:05<00:00, 30.72it/s, loss=15]\r\n",
+      "04/18 15:14: Train Epoch 27: average Loss: 15.370727, time: 5.44 s\r\n",
+      "Val Epoch 27: 100%|█████████████████| 55/55 [00:00<00:00, 227.35it/s, loss=19.2]\r\n",
+      "04/18 15:14: Val Epoch 27: average Loss: 15.657997, time: 0.24 s\r\n",
+      "Test Epoch 27: 100%|████████████████| 56/56 [00:00<00:00, 254.99it/s, loss=13.4]\r\n",
+      "04/18 15:14: Test Epoch 27: average Loss: 15.382153, time: 0.22 s\r\n",
+      "04/18 15:14: Best validation model saved!\r\n",
+      "Train Epoch 28: 100%|██████████████| 167/167 [00:05<00:00, 30.63it/s, loss=16.4]\r\n",
+      "04/18 15:14: Train Epoch 28: average Loss: 15.346337, time: 5.45 s\r\n",
+      "Val Epoch 28: 100%|█████████████████| 55/55 [00:00<00:00, 226.62it/s, loss=19.2]\r\n",
+      "04/18 15:14: Val Epoch 28: average Loss: 15.701665, time: 0.24 s\r\n",
+      "Test Epoch 28: 100%|████████████████| 56/56 [00:00<00:00, 253.56it/s, loss=13.3]\r\n",
+      "04/18 15:14: Test Epoch 28: average Loss: 15.401968, time: 0.22 s\r\n",
+      "Train Epoch 29: 100%|██████████████| 167/167 [00:05<00:00, 30.71it/s, loss=14.6]\r\n",
+      "04/18 15:14: Train Epoch 29: average Loss: 15.311210, time: 5.44 s\r\n",
+      "Val Epoch 29: 100%|█████████████████| 55/55 [00:00<00:00, 227.97it/s, loss=19.3]\r\n",
+      "04/18 15:14: Val Epoch 29: average Loss: 15.763280, time: 0.24 s\r\n",
+      "Test Epoch 29: 100%|████████████████| 56/56 [00:00<00:00, 255.88it/s, loss=13.4]\r\n",
+      "04/18 15:14: Test Epoch 29: average Loss: 15.405051, time: 0.22 s\r\n",
+      "Train Epoch 30: 100%|██████████████| 167/167 [00:05<00:00, 30.68it/s, loss=16.9]\r\n",
+      "04/18 15:14: Train Epoch 30: average Loss: 15.432870, time: 5.44 s\r\n",
+      "Val Epoch 30: 100%|█████████████████| 55/55 [00:00<00:00, 227.98it/s, loss=19.9]\r\n",
+      "04/18 15:14: Val Epoch 30: average Loss: 15.968669, time: 0.24 s\r\n",
+      "Test Epoch 30: 100%|████████████████| 56/56 [00:00<00:00, 255.84it/s, loss=13.5]\r\n",
+      "04/18 15:14: Test Epoch 30: average Loss: 15.700474, time: 0.22 s\r\n",
+      "Train Epoch 31: 100%|██████████████| 167/167 [00:05<00:00, 30.71it/s, loss=14.7]\r\n",
+      "04/18 15:14: Train Epoch 31: average Loss: 15.230327, time: 5.44 s\r\n",
+      "Val Epoch 31: 100%|█████████████████| 55/55 [00:00<00:00, 228.57it/s, loss=18.9]\r\n",
+      "04/18 15:14: Val Epoch 31: average Loss: 15.366567, time: 0.24 s\r\n",
+      "Test Epoch 31: 100%|████████████████| 56/56 [00:00<00:00, 255.81it/s, loss=13.1]\r\n",
+      "04/18 15:14: Test Epoch 31: average Loss: 15.089513, time: 0.22 s\r\n",
+      "04/18 15:14: Best validation model saved!\r\n",
+      "Train Epoch 32: 100%|████████████████| 167/167 [00:05<00:00, 30.62it/s, loss=15]\r\n",
+      "04/18 15:15: Train Epoch 32: average Loss: 15.136701, time: 5.45 s\r\n",
+      "Val Epoch 32: 100%|█████████████████| 55/55 [00:00<00:00, 228.03it/s, loss=19.2]\r\n",
+      "04/18 15:15: Val Epoch 32: average Loss: 16.012017, time: 0.24 s\r\n",
+      "Test Epoch 32: 100%|████████████████| 56/56 [00:00<00:00, 255.41it/s, loss=13.5]\r\n",
+      "04/18 15:15: Test Epoch 32: average Loss: 15.683193, time: 0.22 s\r\n",
+      "Train Epoch 33: 100%|██████████████| 167/167 [00:05<00:00, 30.41it/s, loss=15.2]\r\n",
+      "04/18 15:15: Train Epoch 33: average Loss: 15.123099, time: 5.49 s\r\n",
+      "Val Epoch 33: 100%|█████████████████| 55/55 [00:00<00:00, 224.77it/s, loss=19.1]\r\n",
+      "04/18 15:15: Val Epoch 33: average Loss: 15.646507, time: 0.25 s\r\n",
+      "Test Epoch 33: 100%|████████████████| 56/56 [00:00<00:00, 254.99it/s, loss=13.4]\r\n",
+      "04/18 15:15: Test Epoch 33: average Loss: 15.384800, time: 0.22 s\r\n",
+      "Train Epoch 34: 100%|██████████████| 167/167 [00:05<00:00, 30.41it/s, loss=14.9]\r\n",
+      "04/18 15:15: Train Epoch 34: average Loss: 15.259948, time: 5.49 s\r\n",
+      "Val Epoch 34: 100%|█████████████████| 55/55 [00:00<00:00, 227.38it/s, loss=18.8]\r\n",
+      "04/18 15:15: Val Epoch 34: average Loss: 15.506219, time: 0.24 s\r\n",
+      "Test Epoch 34: 100%|████████████████| 56/56 [00:00<00:00, 255.30it/s, loss=13.5]\r\n",
+      "04/18 15:15: Test Epoch 34: average Loss: 15.216599, time: 0.22 s\r\n",
+      "Train Epoch 35: 100%|██████████████| 167/167 [00:05<00:00, 30.62it/s, loss=15.5]\r\n",
+      "04/18 15:15: Train Epoch 35: average Loss: 15.167943, time: 5.45 s\r\n",
+      "Val Epoch 35: 100%|█████████████████| 55/55 [00:00<00:00, 227.05it/s, loss=19.1]\r\n",
+      "04/18 15:15: Val Epoch 35: average Loss: 15.655087, time: 0.24 s\r\n",
+      "Test Epoch 35: 100%|████████████████| 56/56 [00:00<00:00, 255.20it/s, loss=13.5]\r\n",
+      "04/18 15:15: Test Epoch 35: average Loss: 15.513917, time: 0.22 s\r\n",
+      "Train Epoch 36: 100%|██████████████| 167/167 [00:05<00:00, 30.67it/s, loss=14.8]\r\n",
+      "04/18 15:15: Train Epoch 36: average Loss: 15.040895, time: 5.45 s\r\n",
+      "Val Epoch 36: 100%|█████████████████| 55/55 [00:00<00:00, 226.91it/s, loss=18.8]\r\n",
+      "04/18 15:15: Val Epoch 36: average Loss: 15.493915, time: 0.24 s\r\n",
+      "Test Epoch 36: 100%|████████████████| 56/56 [00:00<00:00, 254.43it/s, loss=13.3]\r\n",
+      "04/18 15:15: Test Epoch 36: average Loss: 15.369653, time: 0.22 s\r\n",
+      "Train Epoch 37: 100%|██████████████| 167/167 [00:05<00:00, 30.67it/s, loss=14.7]\r\n",
+      "04/18 15:15: Train Epoch 37: average Loss: 15.022617, time: 5.45 s\r\n",
+      "Val Epoch 37: 100%|█████████████████| 55/55 [00:00<00:00, 227.16it/s, loss=18.9]\r\n",
+      "04/18 15:15: Val Epoch 37: average Loss: 15.415350, time: 0.24 s\r\n",
+      "Test Epoch 37: 100%|████████████████| 56/56 [00:00<00:00, 252.26it/s, loss=13.2]\r\n",
+      "04/18 15:15: Test Epoch 37: average Loss: 15.182666, time: 0.22 s\r\n",
+      "Train Epoch 38: 100%|██████████████| 167/167 [00:05<00:00, 30.61it/s, loss=14.9]\r\n",
+      "04/18 15:15: Train Epoch 38: average Loss: 15.008983, time: 5.46 s\r\n",
+      "Val Epoch 38: 100%|███████████████████| 55/55 [00:00<00:00, 227.71it/s, loss=19]\r\n",
+      "04/18 15:15: Val Epoch 38: average Loss: 15.681897, time: 0.24 s\r\n",
+      "Test Epoch 38: 100%|████████████████| 56/56 [00:00<00:00, 255.22it/s, loss=14.1]\r\n",
+      "04/18 15:15: Test Epoch 38: average Loss: 15.569352, time: 0.22 s\r\n",
+      "Train Epoch 39: 100%|██████████████| 167/167 [00:05<00:00, 30.72it/s, loss=15.1]\r\n",
+      "04/18 15:15: Train Epoch 39: average Loss: 15.144960, time: 5.44 s\r\n",
+      "Val Epoch 39: 100%|█████████████████| 55/55 [00:00<00:00, 227.54it/s, loss=18.7]\r\n",
+      "04/18 15:15: Val Epoch 39: average Loss: 15.411151, time: 0.24 s\r\n",
+      "Test Epoch 39: 100%|████████████████| 56/56 [00:00<00:00, 255.27it/s, loss=13.3]\r\n",
+      "04/18 15:15: Test Epoch 39: average Loss: 15.168814, time: 0.22 s\r\n",
+      "Train Epoch 40: 100%|██████████████| 167/167 [00:05<00:00, 30.71it/s, loss=15.6]\r\n",
+      "04/18 15:15: Train Epoch 40: average Loss: 14.895088, time: 5.44 s\r\n",
+      "Val Epoch 40: 100%|█████████████████| 55/55 [00:00<00:00, 227.42it/s, loss=18.6]\r\n",
+      "04/18 15:15: Val Epoch 40: average Loss: 15.399760, time: 0.24 s\r\n",
+      "Test Epoch 40: 100%|████████████████| 56/56 [00:00<00:00, 255.39it/s, loss=13.3]\r\n",
+      "04/18 15:15: Test Epoch 40: average Loss: 15.174346, time: 0.22 s\r\n",
+      "Train Epoch 41: 100%|██████████████| 167/167 [00:05<00:00, 30.72it/s, loss=14.8]\r\n",
+      "04/18 15:15: Train Epoch 41: average Loss: 15.024992, time: 5.44 s\r\n",
+      "Val Epoch 41: 100%|█████████████████| 55/55 [00:00<00:00, 227.62it/s, loss=18.8]\r\n",
+      "04/18 15:15: Val Epoch 41: average Loss: 15.372228, time: 0.24 s\r\n",
+      "Test Epoch 41: 100%|████████████████| 56/56 [00:00<00:00, 255.28it/s, loss=13.2]\r\n",
+      "04/18 15:15: Test Epoch 41: average Loss: 15.167654, time: 0.22 s\r\n",
+      "Train Epoch 42: 100%|██████████████| 167/167 [00:05<00:00, 30.72it/s, loss=14.2]\r\n",
+      "04/18 15:16: Train Epoch 42: average Loss: 14.909395, time: 5.44 s\r\n",
+      "Val Epoch 42: 100%|███████████████████| 55/55 [00:00<00:00, 227.81it/s, loss=19]\r\n",
+      "04/18 15:16: Val Epoch 42: average Loss: 15.543973, time: 0.24 s\r\n",
+      "Test Epoch 42: 100%|████████████████| 56/56 [00:00<00:00, 255.56it/s, loss=13.6]\r\n",
+      "04/18 15:16: Test Epoch 42: average Loss: 15.435683, time: 0.22 s\r\n",
+      "Train Epoch 43: 100%|██████████████| 167/167 [00:05<00:00, 30.71it/s, loss=15.1]\r\n",
+      "04/18 15:16: Train Epoch 43: average Loss: 14.799964, time: 5.44 s\r\n",
+      "Val Epoch 43: 100%|█████████████████| 55/55 [00:00<00:00, 227.83it/s, loss=18.5]\r\n",
+      "04/18 15:16: Val Epoch 43: average Loss: 15.236528, time: 0.24 s\r\n",
+      "Test Epoch 43: 100%|████████████████| 56/56 [00:00<00:00, 254.73it/s, loss=13.2]\r\n",
+      "04/18 15:16: Test Epoch 43: average Loss: 15.039848, time: 0.22 s\r\n",
+      "04/18 15:16: Best validation model saved!\r\n",
+      "Train Epoch 44: 100%|██████████████| 167/167 [00:05<00:00, 30.60it/s, loss=14.6]\r\n",
+      "04/18 15:16: Train Epoch 44: average Loss: 14.949184, time: 5.46 s\r\n",
+      "Val Epoch 44: 100%|█████████████████| 55/55 [00:00<00:00, 222.13it/s, loss=18.8]\r\n",
+      "04/18 15:16: Val Epoch 44: average Loss: 15.568451, time: 0.25 s\r\n",
+      "Test Epoch 44: 100%|████████████████| 56/56 [00:00<00:00, 254.95it/s, loss=13.3]\r\n",
+      "04/18 15:16: Test Epoch 44: average Loss: 15.293935, time: 0.22 s\r\n",
+      "Train Epoch 45: 100%|██████████████| 167/167 [00:05<00:00, 30.65it/s, loss=14.7]\r\n",
+      "04/18 15:16: Train Epoch 45: average Loss: 14.754190, time: 5.45 s\r\n",
+      "Val Epoch 45: 100%|█████████████████| 55/55 [00:00<00:00, 226.89it/s, loss=18.9]\r\n",
+      "04/18 15:16: Val Epoch 45: average Loss: 15.483996, time: 0.24 s\r\n",
+      "Test Epoch 45: 100%|████████████████| 56/56 [00:00<00:00, 254.99it/s, loss=13.7]\r\n",
+      "04/18 15:16: Test Epoch 45: average Loss: 15.339272, time: 0.22 s\r\n",
+      "Train Epoch 46: 100%|██████████████| 167/167 [00:05<00:00, 30.58it/s, loss=15.1]\r\n",
+      "04/18 15:16: Train Epoch 46: average Loss: 14.810086, time: 5.46 s\r\n",
+      "Val Epoch 46: 100%|█████████████████| 55/55 [00:00<00:00, 227.30it/s, loss=18.5]\r\n",
+      "04/18 15:16: Val Epoch 46: average Loss: 15.207288, time: 0.24 s\r\n",
+      "Test Epoch 46: 100%|████████████████| 56/56 [00:00<00:00, 254.92it/s, loss=13.2]\r\n",
+      "04/18 15:16: Test Epoch 46: average Loss: 15.064002, time: 0.22 s\r\n",
+      "04/18 15:16: Best validation model saved!\r\n",
+      "Train Epoch 47: 100%|██████████████| 167/167 [00:05<00:00, 30.74it/s, loss=14.7]\r\n",
+      "04/18 15:16: Train Epoch 47: average Loss: 14.732276, time: 5.43 s\r\n",
+      "Val Epoch 47: 100%|█████████████████| 55/55 [00:00<00:00, 227.92it/s, loss=18.9]\r\n",
+      "04/18 15:16: Val Epoch 47: average Loss: 15.511761, time: 0.24 s\r\n",
+      "Test Epoch 47: 100%|████████████████| 56/56 [00:00<00:00, 255.70it/s, loss=13.4]\r\n",
+      "04/18 15:16: Test Epoch 47: average Loss: 15.404380, time: 0.22 s\r\n",
+      "Train Epoch 48: 100%|██████████████| 167/167 [00:05<00:00, 30.72it/s, loss=14.7]\r\n",
+      "04/18 15:16: Train Epoch 48: average Loss: 14.677143, time: 5.44 s\r\n",
+      "Val Epoch 48: 100%|█████████████████| 55/55 [00:00<00:00, 227.75it/s, loss=18.3]\r\n",
+      "04/18 15:16: Val Epoch 48: average Loss: 15.126700, time: 0.24 s\r\n",
+      "Test Epoch 48: 100%|████████████████| 56/56 [00:00<00:00, 254.56it/s, loss=13.1]\r\n",
+      "04/18 15:16: Test Epoch 48: average Loss: 14.937668, time: 0.22 s\r\n",
+      "04/18 15:16: Best validation model saved!\r\n",
+      "Train Epoch 49: 100%|██████████████| 167/167 [00:05<00:00, 30.63it/s, loss=15.9]\r\n",
+      "04/18 15:16: Train Epoch 49: average Loss: 14.767888, time: 5.45 s\r\n",
+      "Val Epoch 49: 100%|█████████████████| 55/55 [00:00<00:00, 227.76it/s, loss=18.6]\r\n",
+      "04/18 15:16: Val Epoch 49: average Loss: 15.370006, time: 0.24 s\r\n",
+      "Test Epoch 49: 100%|████████████████| 56/56 [00:00<00:00, 255.45it/s, loss=13.2]\r\n",
+      "04/18 15:16: Test Epoch 49: average Loss: 15.228811, time: 0.22 s\r\n",
+      "Train Epoch 50: 100%|████████████████| 167/167 [00:05<00:00, 30.68it/s, loss=15]\r\n",
+      "04/18 15:16: Train Epoch 50: average Loss: 14.711411, time: 5.44 s\r\n",
+      "Val Epoch 50: 100%|█████████████████| 55/55 [00:00<00:00, 227.43it/s, loss=18.6]\r\n",
+      "04/18 15:16: Val Epoch 50: average Loss: 15.492524, time: 0.24 s\r\n",
+      "Test Epoch 50: 100%|██████████████████| 56/56 [00:00<00:00, 255.25it/s, loss=14]\r\n",
+      "04/18 15:16: Test Epoch 50: average Loss: 15.426147, time: 0.22 s\r\n",
+      "Train Epoch 51: 100%|██████████████| 167/167 [00:05<00:00, 30.67it/s, loss=13.9]\r\n",
+      "04/18 15:16: Train Epoch 51: average Loss: 14.807569, time: 5.44 s\r\n",
+      "Val Epoch 51: 100%|█████████████████| 55/55 [00:00<00:00, 227.29it/s, loss=18.4]\r\n",
+      "04/18 15:16: Val Epoch 51: average Loss: 15.143139, time: 0.24 s\r\n",
+      "Test Epoch 51: 100%|████████████████| 56/56 [00:00<00:00, 254.55it/s, loss=13.1]\r\n",
+      "04/18 15:16: Test Epoch 51: average Loss: 15.028396, time: 0.22 s\r\n",
+      "Train Epoch 52: 100%|██████████████| 167/167 [00:05<00:00, 30.71it/s, loss=15.7]\r\n",
+      "04/18 15:17: Train Epoch 52: average Loss: 14.580482, time: 5.44 s\r\n",
+      "Val Epoch 52: 100%|█████████████████| 55/55 [00:00<00:00, 227.08it/s, loss=18.6]\r\n",
+      "04/18 15:17: Val Epoch 52: average Loss: 15.190062, time: 0.24 s\r\n",
+      "Test Epoch 52: 100%|████████████████| 56/56 [00:00<00:00, 253.24it/s, loss=13.3]\r\n",
+      "04/18 15:17: Test Epoch 52: average Loss: 15.015502, time: 0.22 s\r\n",
+      "Train Epoch 53: 100%|██████████████| 167/167 [00:05<00:00, 30.61it/s, loss=15.4]\r\n",
+      "04/18 15:17: Train Epoch 53: average Loss: 14.593077, time: 5.46 s\r\n",
+      "Val Epoch 53: 100%|█████████████████| 55/55 [00:00<00:00, 227.19it/s, loss=19.4]\r\n",
+      "04/18 15:17: Val Epoch 53: average Loss: 15.607855, time: 0.24 s\r\n",
+      "Test Epoch 53: 100%|████████████████| 56/56 [00:00<00:00, 254.96it/s, loss=13.4]\r\n",
+      "04/18 15:17: Test Epoch 53: average Loss: 15.462371, time: 0.22 s\r\n",
+      "Train Epoch 54: 100%|██████████████| 167/167 [00:05<00:00, 30.65it/s, loss=15.6]\r\n",
+      "04/18 15:17: Train Epoch 54: average Loss: 14.614372, time: 5.45 s\r\n",
+      "Val Epoch 54: 100%|█████████████████| 55/55 [00:00<00:00, 225.96it/s, loss=18.5]\r\n",
+      "04/18 15:17: Val Epoch 54: average Loss: 15.248776, time: 0.24 s\r\n",
+      "Test Epoch 54: 100%|████████████████| 56/56 [00:00<00:00, 255.16it/s, loss=13.5]\r\n",
+      "04/18 15:17: Test Epoch 54: average Loss: 15.140892, time: 0.22 s\r\n",
+      "Train Epoch 55: 100%|██████████████| 167/167 [00:05<00:00, 30.56it/s, loss=14.2]\r\n",
+      "04/18 15:17: Train Epoch 55: average Loss: 14.706540, time: 5.47 s\r\n",
+      "Val Epoch 55: 100%|█████████████████| 55/55 [00:00<00:00, 228.76it/s, loss=18.9]\r\n",
+      "04/18 15:17: Val Epoch 55: average Loss: 15.293262, time: 0.24 s\r\n",
+      "Test Epoch 55: 100%|████████████████| 56/56 [00:00<00:00, 255.36it/s, loss=13.1]\r\n",
+      "04/18 15:17: Test Epoch 55: average Loss: 15.157270, time: 0.22 s\r\n",
+      "Train Epoch 56: 100%|██████████████| 167/167 [00:05<00:00, 30.67it/s, loss=14.1]\r\n",
+      "04/18 15:17: Train Epoch 56: average Loss: 14.476066, time: 5.45 s\r\n",
+      "Val Epoch 56: 100%|█████████████████| 55/55 [00:00<00:00, 227.94it/s, loss=18.7]\r\n",
+      "04/18 15:17: Val Epoch 56: average Loss: 15.297700, time: 0.24 s\r\n",
+      "Test Epoch 56: 100%|████████████████| 56/56 [00:00<00:00, 255.15it/s, loss=13.2]\r\n",
+      "04/18 15:17: Test Epoch 56: average Loss: 15.113804, time: 0.22 s\r\n",
+      "Train Epoch 57: 100%|██████████████| 167/167 [00:05<00:00, 30.72it/s, loss=15.1]\r\n",
+      "04/18 15:17: Train Epoch 57: average Loss: 14.567475, time: 5.44 s\r\n",
+      "Val Epoch 57: 100%|█████████████████| 55/55 [00:00<00:00, 227.51it/s, loss=18.6]\r\n",
+      "04/18 15:17: Val Epoch 57: average Loss: 15.379013, time: 0.24 s\r\n",
+      "Test Epoch 57: 100%|████████████████| 56/56 [00:00<00:00, 255.51it/s, loss=13.5]\r\n",
+      "04/18 15:17: Test Epoch 57: average Loss: 15.246799, time: 0.22 s\r\n",
+      "Train Epoch 58: 100%|██████████████| 167/167 [00:05<00:00, 30.67it/s, loss=14.6]\r\n",
+      "04/18 15:17: Train Epoch 58: average Loss: 14.466954, time: 5.45 s\r\n",
+      "Val Epoch 58: 100%|█████████████████| 55/55 [00:00<00:00, 225.14it/s, loss=18.1]\r\n",
+      "04/18 15:17: Val Epoch 58: average Loss: 14.996333, time: 0.25 s\r\n",
+      "Test Epoch 58: 100%|████████████████| 56/56 [00:00<00:00, 255.33it/s, loss=13.2]\r\n",
+      "04/18 15:17: Test Epoch 58: average Loss: 14.877491, time: 0.22 s\r\n",
+      "04/18 15:17: Best validation model saved!\r\n",
+      "Train Epoch 59: 100%|██████████████| 167/167 [00:05<00:00, 30.68it/s, loss=14.2]\r\n",
+      "04/18 15:17: Train Epoch 59: average Loss: 14.467017, time: 5.44 s\r\n",
+      "Val Epoch 59: 100%|█████████████████| 55/55 [00:00<00:00, 227.68it/s, loss=18.6]\r\n",
+      "04/18 15:17: Val Epoch 59: average Loss: 15.122900, time: 0.24 s\r\n",
+      "Test Epoch 59: 100%|████████████████| 56/56 [00:00<00:00, 255.17it/s, loss=13.2]\r\n",
+      "04/18 15:17: Test Epoch 59: average Loss: 14.972972, time: 0.22 s\r\n",
+      "Train Epoch 60: 100%|██████████████| 167/167 [00:05<00:00, 30.67it/s, loss=15.2]\r\n",
+      "04/18 15:17: Train Epoch 60: average Loss: 14.518308, time: 5.45 s\r\n",
+      "Val Epoch 60: 100%|█████████████████| 55/55 [00:00<00:00, 227.73it/s, loss=18.9]\r\n",
+      "04/18 15:17: Val Epoch 60: average Loss: 15.760702, time: 0.24 s\r\n",
+      "Test Epoch 60: 100%|████████████████| 56/56 [00:00<00:00, 250.15it/s, loss=13.4]\r\n",
+      "04/18 15:17: Test Epoch 60: average Loss: 15.483933, time: 0.22 s\r\n",
+      "Train Epoch 61: 100%|██████████████| 167/167 [00:05<00:00, 30.60it/s, loss=14.6]\r\n",
+      "04/18 15:17: Train Epoch 61: average Loss: 14.729934, time: 5.46 s\r\n",
+      "Val Epoch 61: 100%|█████████████████| 55/55 [00:00<00:00, 227.33it/s, loss=18.9]\r\n",
+      "04/18 15:17: Val Epoch 61: average Loss: 15.405914, time: 0.24 s\r\n",
+      "Test Epoch 61: 100%|████████████████| 56/56 [00:00<00:00, 255.21it/s, loss=13.2]\r\n",
+      "04/18 15:17: Test Epoch 61: average Loss: 15.243720, time: 0.22 s\r\n",
+      "Train Epoch 62: 100%|██████████████| 167/167 [00:05<00:00, 30.80it/s, loss=13.7]\r\n",
+      "04/18 15:18: Train Epoch 62: average Loss: 14.359229, time: 5.42 s\r\n",
+      "Val Epoch 62: 100%|█████████████████| 55/55 [00:00<00:00, 223.39it/s, loss=18.4]\r\n",
+      "04/18 15:18: Val Epoch 62: average Loss: 15.075394, time: 0.25 s\r\n",
+      "Test Epoch 62: 100%|████████████████| 56/56 [00:00<00:00, 255.09it/s, loss=13.2]\r\n",
+      "04/18 15:18: Test Epoch 62: average Loss: 14.946538, time: 0.22 s\r\n",
+      "Train Epoch 63: 100%|██████████████| 167/167 [00:05<00:00, 30.58it/s, loss=14.2]\r\n",
+      "04/18 15:18: Train Epoch 63: average Loss: 14.419174, time: 5.46 s\r\n",
+      "Val Epoch 63: 100%|█████████████████| 55/55 [00:00<00:00, 226.69it/s, loss=18.3]\r\n",
+      "04/18 15:18: Val Epoch 63: average Loss: 15.016393, time: 0.24 s\r\n",
+      "Test Epoch 63: 100%|████████████████| 56/56 [00:00<00:00, 255.41it/s, loss=13.3]\r\n",
+      "04/18 15:18: Test Epoch 63: average Loss: 14.882133, time: 0.22 s\r\n",
+      "Train Epoch 64: 100%|██████████████| 167/167 [00:05<00:00, 30.58it/s, loss=14.4]\r\n",
+      "04/18 15:18: Train Epoch 64: average Loss: 14.373534, time: 5.46 s\r\n",
+      "Val Epoch 64: 100%|█████████████████| 55/55 [00:00<00:00, 226.68it/s, loss=18.3]\r\n",
+      "04/18 15:18: Val Epoch 64: average Loss: 15.077637, time: 0.24 s\r\n",
+      "Test Epoch 64: 100%|████████████████| 56/56 [00:00<00:00, 252.47it/s, loss=13.3]\r\n",
+      "04/18 15:18: Test Epoch 64: average Loss: 14.904741, time: 0.22 s\r\n",
+      "Train Epoch 65: 100%|██████████████| 167/167 [00:05<00:00, 30.64it/s, loss=14.1]\r\n",
+      "04/18 15:18: Train Epoch 65: average Loss: 14.329006, time: 5.45 s\r\n",
+      "Val Epoch 65: 100%|█████████████████| 55/55 [00:00<00:00, 227.32it/s, loss=18.2]\r\n",
+      "04/18 15:18: Val Epoch 65: average Loss: 14.992236, time: 0.24 s\r\n",
+      "Test Epoch 65: 100%|████████████████| 56/56 [00:00<00:00, 253.52it/s, loss=13.4]\r\n",
+      "04/18 15:18: Test Epoch 65: average Loss: 14.864876, time: 0.22 s\r\n",
+      "04/18 15:18: Best validation model saved!\r\n",
+      "Train Epoch 66: 100%|██████████████| 167/167 [00:05<00:00, 30.52it/s, loss=14.6]\r\n",
+      "04/18 15:18: Train Epoch 66: average Loss: 14.345801, time: 5.47 s\r\n",
+      "Val Epoch 66: 100%|█████████████████| 55/55 [00:00<00:00, 227.20it/s, loss=18.6]\r\n",
+      "04/18 15:18: Val Epoch 66: average Loss: 15.232930, time: 0.24 s\r\n",
+      "Test Epoch 66: 100%|████████████████| 56/56 [00:00<00:00, 254.95it/s, loss=13.5]\r\n",
+      "04/18 15:18: Test Epoch 66: average Loss: 15.089302, time: 0.22 s\r\n",
+      "Train Epoch 67: 100%|████████████████| 167/167 [00:05<00:00, 30.63it/s, loss=15]\r\n",
+      "04/18 15:18: Train Epoch 67: average Loss: 14.420319, time: 5.45 s\r\n",
+      "Val Epoch 67: 100%|█████████████████| 55/55 [00:00<00:00, 227.24it/s, loss=18.2]\r\n",
+      "04/18 15:18: Val Epoch 67: average Loss: 14.962841, time: 0.24 s\r\n",
+      "Test Epoch 67: 100%|████████████████| 56/56 [00:00<00:00, 255.31it/s, loss=13.1]\r\n",
+      "04/18 15:18: Test Epoch 67: average Loss: 14.824832, time: 0.22 s\r\n",
+      "04/18 15:18: Best validation model saved!\r\n",
+      "Train Epoch 68: 100%|██████████████| 167/167 [00:05<00:00, 30.68it/s, loss=13.4]\r\n",
+      "04/18 15:18: Train Epoch 68: average Loss: 14.399577, time: 5.44 s\r\n",
+      "Val Epoch 68: 100%|███████████████████| 55/55 [00:00<00:00, 226.65it/s, loss=19]\r\n",
+      "04/18 15:18: Val Epoch 68: average Loss: 15.582130, time: 0.24 s\r\n",
+      "Test Epoch 68: 100%|████████████████| 56/56 [00:00<00:00, 252.16it/s, loss=13.3]\r\n",
+      "04/18 15:18: Test Epoch 68: average Loss: 15.458371, time: 0.22 s\r\n",
+      "Train Epoch 69: 100%|██████████████| 167/167 [00:05<00:00, 30.64it/s, loss=13.9]\r\n",
+      "04/18 15:18: Train Epoch 69: average Loss: 14.388384, time: 5.45 s\r\n",
+      "Val Epoch 69: 100%|█████████████████| 55/55 [00:00<00:00, 227.69it/s, loss=18.4]\r\n",
+      "04/18 15:18: Val Epoch 69: average Loss: 15.103856, time: 0.24 s\r\n",
+      "Test Epoch 69: 100%|████████████████| 56/56 [00:00<00:00, 254.42it/s, loss=13.2]\r\n",
+      "04/18 15:18: Test Epoch 69: average Loss: 14.889678, time: 0.22 s\r\n",
+      "Train Epoch 70: 100%|██████████████| 167/167 [00:05<00:00, 30.57it/s, loss=14.5]\r\n",
+      "04/18 15:18: Train Epoch 70: average Loss: 14.279781, time: 5.46 s\r\n",
+      "Val Epoch 70: 100%|█████████████████| 55/55 [00:00<00:00, 228.29it/s, loss=18.3]\r\n",
+      "04/18 15:18: Val Epoch 70: average Loss: 15.176563, time: 0.24 s\r\n",
+      "Test Epoch 70: 100%|████████████████| 56/56 [00:00<00:00, 254.87it/s, loss=13.6]\r\n",
+      "04/18 15:18: Test Epoch 70: average Loss: 15.078206, time: 0.22 s\r\n",
+      "Train Epoch 71: 100%|████████████████| 167/167 [00:05<00:00, 30.73it/s, loss=15]\r\n",
+      "04/18 15:18: Train Epoch 71: average Loss: 14.267186, time: 5.43 s\r\n",
+      "Val Epoch 71: 100%|█████████████████| 55/55 [00:00<00:00, 229.83it/s, loss=18.7]\r\n",
+      "04/18 15:18: Val Epoch 71: average Loss: 15.177884, time: 0.24 s\r\n",
+      "Test Epoch 71: 100%|████████████████| 56/56 [00:00<00:00, 258.68it/s, loss=13.5]\r\n",
+      "04/18 15:18: Test Epoch 71: average Loss: 15.057321, time: 0.22 s\r\n",
+      "Train Epoch 72: 100%|████████████████| 167/167 [00:05<00:00, 30.77it/s, loss=15]\r\n",
+      "04/18 15:19: Train Epoch 72: average Loss: 14.278268, time: 5.43 s\r\n",
+      "Val Epoch 72: 100%|█████████████████| 55/55 [00:00<00:00, 230.04it/s, loss=18.2]\r\n",
+      "04/18 15:19: Val Epoch 72: average Loss: 15.002028, time: 0.24 s\r\n",
+      "Test Epoch 72: 100%|████████████████| 56/56 [00:00<00:00, 258.48it/s, loss=13.2]\r\n",
+      "04/18 15:19: Test Epoch 72: average Loss: 14.864541, time: 0.22 s\r\n",
+      "Train Epoch 73: 100%|██████████████| 167/167 [00:05<00:00, 30.77it/s, loss=14.5]\r\n",
+      "04/18 15:19: Train Epoch 73: average Loss: 14.188956, time: 5.43 s\r\n",
+      "Val Epoch 73: 100%|█████████████████| 55/55 [00:00<00:00, 230.30it/s, loss=18.3]\r\n",
+      "04/18 15:19: Val Epoch 73: average Loss: 15.056545, time: 0.24 s\r\n",
+      "Test Epoch 73: 100%|████████████████| 56/56 [00:00<00:00, 258.66it/s, loss=13.2]\r\n",
+      "04/18 15:19: Test Epoch 73: average Loss: 14.922194, time: 0.22 s\r\n",
+      "Train Epoch 74: 100%|██████████████| 167/167 [00:05<00:00, 30.62it/s, loss=13.6]\r\n",
+      "04/18 15:19: Train Epoch 74: average Loss: 14.239767, time: 5.45 s\r\n",
+      "Val Epoch 74: 100%|█████████████████| 55/55 [00:00<00:00, 229.87it/s, loss=18.5]\r\n",
+      "04/18 15:19: Val Epoch 74: average Loss: 15.175199, time: 0.24 s\r\n",
+      "Test Epoch 74: 100%|████████████████| 56/56 [00:00<00:00, 258.52it/s, loss=13.8]\r\n",
+      "04/18 15:19: Test Epoch 74: average Loss: 15.061075, time: 0.22 s\r\n",
+      "Train Epoch 75: 100%|██████████████| 167/167 [00:05<00:00, 30.70it/s, loss=13.7]\r\n",
+      "04/18 15:19: Train Epoch 75: average Loss: 14.148535, time: 5.44 s\r\n",
+      "Val Epoch 75: 100%|█████████████████| 55/55 [00:00<00:00, 230.15it/s, loss=18.3]\r\n",
+      "04/18 15:19: Val Epoch 75: average Loss: 15.078827, time: 0.24 s\r\n",
+      "Test Epoch 75: 100%|████████████████| 56/56 [00:00<00:00, 258.70it/s, loss=13.3]\r\n",
+      "04/18 15:19: Test Epoch 75: average Loss: 14.914262, time: 0.22 s\r\n",
+      "Train Epoch 76: 100%|██████████████| 167/167 [00:05<00:00, 30.64it/s, loss=13.6]\r\n",
+      "04/18 15:19: Train Epoch 76: average Loss: 14.306622, time: 5.45 s\r\n",
+      "Val Epoch 76: 100%|█████████████████| 55/55 [00:00<00:00, 228.94it/s, loss=18.4]\r\n",
+      "04/18 15:19: Val Epoch 76: average Loss: 15.181923, time: 0.24 s\r\n",
+      "Test Epoch 76: 100%|████████████████| 56/56 [00:00<00:00, 258.35it/s, loss=13.3]\r\n",
+      "04/18 15:19: Test Epoch 76: average Loss: 15.005987, time: 0.22 s\r\n",
+      "Train Epoch 77: 100%|██████████████| 167/167 [00:05<00:00, 30.66it/s, loss=13.3]\r\n",
+      "04/18 15:19: Train Epoch 77: average Loss: 14.229269, time: 5.45 s\r\n",
+      "Val Epoch 77: 100%|█████████████████| 55/55 [00:00<00:00, 229.86it/s, loss=18.3]\r\n",
+      "04/18 15:19: Val Epoch 77: average Loss: 15.076397, time: 0.24 s\r\n",
+      "Test Epoch 77: 100%|████████████████| 56/56 [00:00<00:00, 258.54it/s, loss=13.2]\r\n",
+      "04/18 15:19: Test Epoch 77: average Loss: 14.925647, time: 0.22 s\r\n",
+      "Train Epoch 78: 100%|██████████████| 167/167 [00:05<00:00, 30.77it/s, loss=14.9]\r\n",
+      "04/18 15:19: Train Epoch 78: average Loss: 14.279387, time: 5.43 s\r\n",
+      "Val Epoch 78: 100%|█████████████████| 55/55 [00:00<00:00, 228.85it/s, loss=18.3]\r\n",
+      "04/18 15:19: Val Epoch 78: average Loss: 14.999521, time: 0.24 s\r\n",
+      "Test Epoch 78: 100%|████████████████| 56/56 [00:00<00:00, 257.46it/s, loss=13.3]\r\n",
+      "04/18 15:19: Test Epoch 78: average Loss: 14.837998, time: 0.22 s\r\n",
+      "Train Epoch 79: 100%|██████████████| 167/167 [00:05<00:00, 30.63it/s, loss=13.5]\r\n",
+      "04/18 15:19: Train Epoch 79: average Loss: 14.094612, time: 5.45 s\r\n",
+      "Val Epoch 79: 100%|█████████████████| 55/55 [00:00<00:00, 229.10it/s, loss=18.7]\r\n",
+      "04/18 15:19: Val Epoch 79: average Loss: 15.371862, time: 0.24 s\r\n",
+      "Test Epoch 79: 100%|████████████████| 56/56 [00:00<00:00, 257.58it/s, loss=13.5]\r\n",
+      "04/18 15:19: Test Epoch 79: average Loss: 15.275037, time: 0.22 s\r\n",
+      "Train Epoch 80: 100%|██████████████| 167/167 [00:05<00:00, 30.69it/s, loss=13.8]\r\n",
+      "04/18 15:19: Train Epoch 80: average Loss: 14.158118, time: 5.44 s\r\n",
+      "Val Epoch 80: 100%|█████████████████| 55/55 [00:00<00:00, 229.52it/s, loss=18.6]\r\n",
+      "04/18 15:19: Val Epoch 80: average Loss: 15.123351, time: 0.24 s\r\n",
+      "Test Epoch 80: 100%|████████████████| 56/56 [00:00<00:00, 258.03it/s, loss=13.2]\r\n",
+      "04/18 15:19: Test Epoch 80: average Loss: 14.934606, time: 0.22 s\r\n",
+      "Train Epoch 81: 100%|██████████████| 167/167 [00:05<00:00, 30.59it/s, loss=13.9]\r\n",
+      "04/18 15:19: Train Epoch 81: average Loss: 14.101591, time: 5.46 s\r\n",
+      "Val Epoch 81: 100%|█████████████████| 55/55 [00:00<00:00, 229.73it/s, loss=18.3]\r\n",
+      "04/18 15:19: Val Epoch 81: average Loss: 15.066786, time: 0.24 s\r\n",
+      "Test Epoch 81: 100%|████████████████| 56/56 [00:00<00:00, 258.09it/s, loss=13.2]\r\n",
+      "04/18 15:19: Test Epoch 81: average Loss: 14.945487, time: 0.22 s\r\n",
+      "Train Epoch 82: 100%|██████████████| 167/167 [00:05<00:00, 30.54it/s, loss=14.4]\r\n",
+      "04/18 15:20: Train Epoch 82: average Loss: 14.142215, time: 5.47 s\r\n",
+      "Val Epoch 82: 100%|█████████████████| 55/55 [00:00<00:00, 229.43it/s, loss=18.6]\r\n",
+      "04/18 15:20: Val Epoch 82: average Loss: 15.158585, time: 0.24 s\r\n",
+      "Test Epoch 82: 100%|████████████████| 56/56 [00:00<00:00, 257.62it/s, loss=13.2]\r\n",
+      "04/18 15:20: Test Epoch 82: average Loss: 15.070672, time: 0.22 s\r\n",
+      "04/18 15:20: Validation performance didn't improve for 15 epochs. Training stops.\r\n",
+      "04/18 15:20: Best models saved at /home/czzhangheng/code/TrafficWheel/experiments/PEMSD8/2025-04-18_15-11-57/best_model.pth and /home/czzhangheng/code/TrafficWheel/experiments/PEMSD8/2025-04-18_15-11-57/best_test_model.pth\r\n",
+      "04/18 15:20: Testing on best validation model\r\n",
+      "04/18 15:20: Horizon 01, MAE: 12.9823, RMSE: 20.3477, MAPE: 0.0872\r\n",
+      "04/18 15:20: Horizon 02, MAE: 13.4034, RMSE: 21.3976, MAPE: 0.0871\r\n",
+      "04/18 15:20: Horizon 03, MAE: 13.8474, RMSE: 22.2569, MAPE: 0.0893\r\n",
+      "04/18 15:20: Horizon 04, MAE: 14.2901, RMSE: 23.0797, MAPE: 0.0929\r\n",
+      "04/18 15:20: Horizon 05, MAE: 14.5957, RMSE: 23.6330, MAPE: 0.1000\r\n",
+      "04/18 15:20: Horizon 06, MAE: 14.8787, RMSE: 24.1078, MAPE: 0.1017\r\n",
+      "04/18 15:20: Horizon 07, MAE: 15.1351, RMSE: 24.5556, MAPE: 0.1049\r\n",
+      "04/18 15:20: Horizon 08, MAE: 15.3589, RMSE: 24.9306, MAPE: 0.1010\r\n",
+      "04/18 15:20: Horizon 09, MAE: 15.5202, RMSE: 25.2831, MAPE: 0.1010\r\n",
+      "04/18 15:20: Horizon 10, MAE: 15.7203, RMSE: 25.6139, MAPE: 0.1026\r\n",
+      "04/18 15:20: Horizon 11, MAE: 15.9970, RMSE: 25.9403, MAPE: 0.1130\r\n",
+      "04/18 15:20: Horizon 12, MAE: 16.2731, RMSE: 26.3822, MAPE: 0.1061\r\n",
+      "04/18 15:20: Average Horizon, MAE: 14.8335, RMSE: 24.0284, MAPE: 0.0989\r\n",
+      "04/18 15:20: Testing on best test model\r\n",
+      "04/18 15:20: Horizon 01, MAE: 12.9823, RMSE: 20.3477, MAPE: 0.0872\r\n",
+      "04/18 15:20: Horizon 02, MAE: 13.4034, RMSE: 21.3976, MAPE: 0.0871\r\n",
+      "04/18 15:20: Horizon 03, MAE: 13.8474, RMSE: 22.2569, MAPE: 0.0893\r\n",
+      "04/18 15:20: Horizon 04, MAE: 14.2901, RMSE: 23.0797, MAPE: 0.0929\r\n",
+      "04/18 15:20: Horizon 05, MAE: 14.5957, RMSE: 23.6330, MAPE: 0.1000\r\n",
+      "04/18 15:20: Horizon 06, MAE: 14.8787, RMSE: 24.1078, MAPE: 0.1017\r\n",
+      "04/18 15:20: Horizon 07, MAE: 15.1351, RMSE: 24.5556, MAPE: 0.1049\r\n",
+      "04/18 15:20: Horizon 08, MAE: 15.3589, RMSE: 24.9306, MAPE: 0.1010\r\n",
+      "04/18 15:20: Horizon 09, MAE: 15.5202, RMSE: 25.2831, MAPE: 0.1010\r\n",
+      "04/18 15:20: Horizon 10, MAE: 15.7203, RMSE: 25.6139, MAPE: 0.1026\r\n",
+      "04/18 15:20: Horizon 11, MAE: 15.9970, RMSE: 25.9403, MAPE: 0.1130\r\n",
+      "04/18 15:20: Horizon 12, MAE: 16.2731, RMSE: 26.3822, MAPE: 0.1061\r\n",
+      "04/18 15:20: Average Horizon, MAE: 14.8335, RMSE: 24.0284, MAPE: 0.0989\r\n"
+     ]
+    }
+   ],
    "source": [
     "!python run.py --model EXP --dataset PEMSD8  --mode train  --device cuda:1"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 25,
    "id": "68bfc782-42a1-4d3e-aea6-8ca6d24e15a1",
    "metadata": {
     "collapsed": false,
     "scrolled": true,
-    "is_executing": true
+    "ExecuteTime": {
+     "end_time": "2025-04-18T07:47:34.843486Z",
+     "start_time": "2025-04-18T07:20:02.801453Z"
+    }
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "加载 PEMSD7 数据集中... \r\n",
+      "Creat Log File in:  /home/czzhangheng/code/TrafficWheel/experiments/PEMSD7/2025-04-18_15-20-48/run.log\r\n",
+      "04/18 15:20: Experiment log path in: /home/czzhangheng/code/TrafficWheel/experiments/PEMSD7/2025-04-18_15-20-48\r\n",
+      "04/18 15:20: Training process started\r\n",
+      "Train Epoch 1:  75%|███████████▎   | 199/264 [00:14<00:04, 14.12it/s, loss=33.4]04/18 15:21: Train Epoch 1: 200/264 Loss: 31.583134\r\n",
+      "Train Epoch 1: 100%|███████████████| 264/264 [00:19<00:00, 13.67it/s, loss=27.8]\r\n",
+      "04/18 15:21: Train Epoch 1: average Loss: 54.878681, time: 19.32 s\r\n",
+      "Val Epoch 1: 100%|███████████████████| 88/88 [00:02<00:00, 36.85it/s, loss=40.9]\r\n",
+      "04/18 15:21: Val Epoch 1: average Loss: 30.896043, time: 2.39 s\r\n",
+      "Test Epoch 1: 100%|██████████████████| 89/89 [00:02<00:00, 37.21it/s, loss=31.7]\r\n",
+      "04/18 15:21: Test Epoch 1: average Loss: 30.542740, time: 2.39 s\r\n",
+      "04/18 15:21: Best validation model saved!\r\n",
+      "Train Epoch 2:  75%|███████████▎   | 199/264 [00:14<00:04, 14.09it/s, loss=24.9]04/18 15:21: Train Epoch 2: 200/264 Loss: 27.297743\r\n",
+      "Train Epoch 2: 100%|███████████████| 264/264 [00:18<00:00, 14.11it/s, loss=28.3]\r\n",
+      "04/18 15:21: Train Epoch 2: average Loss: 28.020708, time: 18.71 s\r\n",
+      "Val Epoch 2: 100%|███████████████████| 88/88 [00:02<00:00, 37.06it/s, loss=35.7]\r\n",
+      "04/18 15:21: Val Epoch 2: average Loss: 25.851820, time: 2.37 s\r\n",
+      "Test Epoch 2: 100%|████████████████████| 89/89 [00:02<00:00, 37.39it/s, loss=25]\r\n",
+      "04/18 15:21: Test Epoch 2: average Loss: 26.210778, time: 2.38 s\r\n",
+      "04/18 15:21: Best validation model saved!\r\n",
+      "Train Epoch 3:  75%|███████████▎   | 199/264 [00:14<00:04, 14.13it/s, loss=25.5]04/18 15:21: Train Epoch 3: 200/264 Loss: 21.962288\r\n",
+      "Train Epoch 3: 100%|███████████████| 264/264 [00:18<00:00, 14.14it/s, loss=25.4]\r\n",
+      "04/18 15:21: Train Epoch 3: average Loss: 25.913550, time: 18.67 s\r\n",
+      "Val Epoch 3: 100%|███████████████████| 88/88 [00:02<00:00, 37.06it/s, loss=33.8]\r\n",
+      "04/18 15:21: Val Epoch 3: average Loss: 24.926414, time: 2.37 s\r\n",
+      "Test Epoch 3: 100%|██████████████████| 89/89 [00:02<00:00, 37.41it/s, loss=23.5]\r\n",
+      "04/18 15:21: Test Epoch 3: average Loss: 25.216497, time: 2.38 s\r\n",
+      "04/18 15:21: Best validation model saved!\r\n",
+      "Train Epoch 4:  75%|███████████▎   | 199/264 [00:14<00:04, 14.15it/s, loss=25.2]04/18 15:22: Train Epoch 4: 200/264 Loss: 24.388538\r\n",
+      "Train Epoch 4: 100%|█████████████████| 264/264 [00:18<00:00, 14.13it/s, loss=23]\r\n",
+      "04/18 15:22: Train Epoch 4: average Loss: 25.086952, time: 18.68 s\r\n",
+      "Val Epoch 4: 100%|█████████████████████| 88/88 [00:02<00:00, 37.03it/s, loss=32]\r\n",
+      "04/18 15:22: Val Epoch 4: average Loss: 24.118992, time: 2.38 s\r\n",
+      "Test Epoch 4: 100%|██████████████████| 89/89 [00:02<00:00, 37.41it/s, loss=23.8]\r\n",
+      "04/18 15:22: Test Epoch 4: average Loss: 24.358542, time: 2.38 s\r\n",
+      "04/18 15:22: Best validation model saved!\r\n",
+      "Train Epoch 5:  75%|████████████▊    | 199/264 [00:14<00:04, 14.13it/s, loss=22]04/18 15:22: Train Epoch 5: 200/264 Loss: 25.202778\r\n",
+      "Train Epoch 5: 100%|███████████████| 264/264 [00:18<00:00, 14.12it/s, loss=24.4]\r\n",
+      "04/18 15:22: Train Epoch 5: average Loss: 24.512054, time: 18.69 s\r\n",
+      "Val Epoch 5: 100%|███████████████████| 88/88 [00:02<00:00, 37.01it/s, loss=30.3]\r\n",
+      "04/18 15:22: Val Epoch 5: average Loss: 23.536590, time: 2.38 s\r\n",
+      "Test Epoch 5: 100%|██████████████████| 89/89 [00:02<00:00, 37.35it/s, loss=23.8]\r\n",
+      "04/18 15:22: Test Epoch 5: average Loss: 23.812165, time: 2.38 s\r\n",
+      "04/18 15:22: Best validation model saved!\r\n",
+      "Train Epoch 6:  75%|███████████▎   | 199/264 [00:14<00:04, 14.09it/s, loss=25.6]04/18 15:23: Train Epoch 6: 200/264 Loss: 23.087574\r\n",
+      "Train Epoch 6: 100%|███████████████| 264/264 [00:18<00:00, 14.12it/s, loss=25.4]\r\n",
+      "04/18 15:23: Train Epoch 6: average Loss: 23.749055, time: 18.70 s\r\n",
+      "Val Epoch 6: 100%|███████████████████| 88/88 [00:02<00:00, 37.00it/s, loss=29.3]\r\n",
+      "04/18 15:23: Val Epoch 6: average Loss: 23.338495, time: 2.38 s\r\n",
+      "Test Epoch 6: 100%|██████████████████| 89/89 [00:02<00:00, 37.35it/s, loss=24.4]\r\n",
+      "04/18 15:23: Test Epoch 6: average Loss: 23.585890, time: 2.38 s\r\n",
+      "04/18 15:23: Best validation model saved!\r\n",
+      "Train Epoch 7:  75%|███████████▎   | 199/264 [00:14<00:04, 14.08it/s, loss=23.5]04/18 15:23: Train Epoch 7: 200/264 Loss: 23.531998\r\n",
+      "Train Epoch 7: 100%|███████████████| 264/264 [00:18<00:00, 14.12it/s, loss=22.9]\r\n",
+      "04/18 15:23: Train Epoch 7: average Loss: 23.446277, time: 18.70 s\r\n",
+      "Val Epoch 7: 100%|███████████████████| 88/88 [00:02<00:00, 37.01it/s, loss=28.4]\r\n",
+      "04/18 15:23: Val Epoch 7: average Loss: 22.875079, time: 2.38 s\r\n",
+      "Test Epoch 7: 100%|██████████████████| 89/89 [00:02<00:00, 37.37it/s, loss=23.4]\r\n",
+      "04/18 15:23: Test Epoch 7: average Loss: 23.169232, time: 2.38 s\r\n",
+      "04/18 15:23: Best validation model saved!\r\n",
+      "Train Epoch 8:  75%|███████████▎   | 199/264 [00:14<00:04, 14.11it/s, loss=23.8]04/18 15:23: Train Epoch 8: 200/264 Loss: 24.358780\r\n",
+      "Train Epoch 8: 100%|███████████████| 264/264 [00:18<00:00, 14.10it/s, loss=22.6]\r\n",
+      "04/18 15:23: Train Epoch 8: average Loss: 23.044217, time: 18.72 s\r\n",
+      "Val Epoch 8: 100%|███████████████████| 88/88 [00:02<00:00, 36.99it/s, loss=27.6]\r\n",
+      "04/18 15:23: Val Epoch 8: average Loss: 22.935038, time: 2.38 s\r\n",
+      "Test Epoch 8: 100%|████████████████████| 89/89 [00:02<00:00, 37.36it/s, loss=24]\r\n",
+      "04/18 15:23: Test Epoch 8: average Loss: 23.122449, time: 2.38 s\r\n",
+      "Train Epoch 9:  75%|███████████▎   | 199/264 [00:14<00:04, 14.07it/s, loss=23.5]04/18 15:24: Train Epoch 9: 200/264 Loss: 21.342821\r\n",
+      "Train Epoch 9: 100%|███████████████| 264/264 [00:18<00:00, 14.10it/s, loss=24.1]\r\n",
+      "04/18 15:24: Train Epoch 9: average Loss: 22.452033, time: 18.72 s\r\n",
+      "Val Epoch 9: 100%|███████████████████| 88/88 [00:02<00:00, 37.00it/s, loss=27.2]\r\n",
+      "04/18 15:24: Val Epoch 9: average Loss: 22.687668, time: 2.38 s\r\n",
+      "Test Epoch 9: 100%|██████████████████| 89/89 [00:02<00:00, 37.35it/s, loss=23.6]\r\n",
+      "04/18 15:24: Test Epoch 9: average Loss: 22.862084, time: 2.38 s\r\n",
+      "04/18 15:24: Best validation model saved!\r\n",
+      "Train Epoch 10:  75%|██████████▌   | 199/264 [00:14<00:04, 14.10it/s, loss=23.4]04/18 15:24: Train Epoch 10: 200/264 Loss: 21.934868\r\n",
+      "Train Epoch 10: 100%|██████████████| 264/264 [00:18<00:00, 14.09it/s, loss=25.9]\r\n",
+      "04/18 15:24: Train Epoch 10: average Loss: 22.344034, time: 18.73 s\r\n",
+      "Val Epoch 10: 100%|██████████████████| 88/88 [00:02<00:00, 36.98it/s, loss=27.4]\r\n",
+      "04/18 15:24: Val Epoch 10: average Loss: 24.120734, time: 2.38 s\r\n",
+      "Test Epoch 10: 100%|█████████████████| 89/89 [00:02<00:00, 37.34it/s, loss=24.6]\r\n",
+      "04/18 15:24: Test Epoch 10: average Loss: 24.407873, time: 2.38 s\r\n",
+      "Train Epoch 11:  75%|████████████    | 199/264 [00:14<00:04, 14.11it/s, loss=22]04/18 15:24: Train Epoch 11: 200/264 Loss: 21.193386\r\n",
+      "Train Epoch 11: 100%|████████████████| 264/264 [00:18<00:00, 14.09it/s, loss=22]\r\n",
+      "04/18 15:25: Train Epoch 11: average Loss: 22.173430, time: 18.73 s\r\n",
+      "Val Epoch 11: 100%|██████████████████| 88/88 [00:02<00:00, 36.98it/s, loss=26.2]\r\n",
+      "04/18 15:25: Val Epoch 11: average Loss: 22.175462, time: 2.38 s\r\n",
+      "Test Epoch 11: 100%|█████████████████| 89/89 [00:02<00:00, 37.34it/s, loss=23.5]\r\n",
+      "04/18 15:25: Test Epoch 11: average Loss: 22.340100, time: 2.38 s\r\n",
+      "04/18 15:25: Best validation model saved!\r\n",
+      "Train Epoch 12:  75%|██████████▌   | 199/264 [00:14<00:04, 14.11it/s, loss=22.4]04/18 15:25: Train Epoch 12: 200/264 Loss: 21.252958\r\n",
+      "Train Epoch 12: 100%|██████████████| 264/264 [00:18<00:00, 14.09it/s, loss=22.6]\r\n",
+      "04/18 15:25: Train Epoch 12: average Loss: 21.898088, time: 18.73 s\r\n",
+      "Val Epoch 12: 100%|██████████████████| 88/88 [00:02<00:00, 37.00it/s, loss=26.6]\r\n",
+      "04/18 15:25: Val Epoch 12: average Loss: 22.329030, time: 2.38 s\r\n",
+      "Test Epoch 12: 100%|█████████████████| 89/89 [00:02<00:00, 37.34it/s, loss=23.6]\r\n",
+      "04/18 15:25: Test Epoch 12: average Loss: 22.496267, time: 2.38 s\r\n",
+      "Train Epoch 13:  75%|██████████▌   | 199/264 [00:14<00:04, 14.11it/s, loss=22.2]04/18 15:25: Train Epoch 13: 200/264 Loss: 22.514868\r\n",
+      "Train Epoch 13: 100%|████████████████| 264/264 [00:18<00:00, 14.09it/s, loss=21]\r\n",
+      "04/18 15:25: Train Epoch 13: average Loss: 21.681367, time: 18.74 s\r\n",
+      "Val Epoch 13: 100%|██████████████████| 88/88 [00:02<00:00, 36.98it/s, loss=25.4]\r\n",
+      "04/18 15:25: Val Epoch 13: average Loss: 21.712773, time: 2.38 s\r\n",
+      "Test Epoch 13: 100%|█████████████████| 89/89 [00:02<00:00, 37.32it/s, loss=23.4]\r\n",
+      "04/18 15:25: Test Epoch 13: average Loss: 21.897593, time: 2.39 s\r\n",
+      "04/18 15:25: Best validation model saved!\r\n",
+      "Train Epoch 14:  75%|██████████▌   | 199/264 [00:14<00:04, 14.08it/s, loss=21.8]04/18 15:26: Train Epoch 14: 200/264 Loss: 20.948587\r\n",
+      "Train Epoch 14: 100%|██████████████| 264/264 [00:18<00:00, 14.09it/s, loss=21.1]\r\n",
+      "04/18 15:26: Train Epoch 14: average Loss: 21.509418, time: 18.74 s\r\n",
+      "Val Epoch 14: 100%|██████████████████| 88/88 [00:02<00:00, 36.98it/s, loss=26.3]\r\n",
+      "04/18 15:26: Val Epoch 14: average Loss: 22.542560, time: 2.38 s\r\n",
+      "Test Epoch 14: 100%|█████████████████| 89/89 [00:02<00:00, 37.34it/s, loss=23.5]\r\n",
+      "04/18 15:26: Test Epoch 14: average Loss: 22.793961, time: 2.38 s\r\n",
+      "Train Epoch 15:  75%|██████████▌   | 199/264 [00:14<00:04, 14.09it/s, loss=22.7]04/18 15:26: Train Epoch 15: 200/264 Loss: 21.470264\r\n",
+      "Train Epoch 15: 100%|██████████████| 264/264 [00:18<00:00, 14.09it/s, loss=22.7]\r\n",
+      "04/18 15:26: Train Epoch 15: average Loss: 21.493210, time: 18.74 s\r\n",
+      "Val Epoch 15: 100%|██████████████████| 88/88 [00:02<00:00, 36.97it/s, loss=24.8]\r\n",
+      "04/18 15:26: Val Epoch 15: average Loss: 21.590262, time: 2.38 s\r\n",
+      "Test Epoch 15: 100%|█████████████████| 89/89 [00:02<00:00, 37.32it/s, loss=22.9]\r\n",
+      "04/18 15:26: Test Epoch 15: average Loss: 21.742914, time: 2.39 s\r\n",
+      "04/18 15:26: Best validation model saved!\r\n",
+      "Train Epoch 16:  75%|██████████▌   | 199/264 [00:14<00:04, 14.06it/s, loss=22.2]04/18 15:26: Train Epoch 16: 200/264 Loss: 21.528200\r\n",
+      "Train Epoch 16: 100%|██████████████| 264/264 [00:18<00:00, 14.08it/s, loss=21.5]\r\n",
+      "04/18 15:26: Train Epoch 16: average Loss: 21.184166, time: 18.75 s\r\n",
+      "Val Epoch 16: 100%|██████████████████| 88/88 [00:02<00:00, 36.98it/s, loss=25.2]\r\n",
+      "04/18 15:27: Val Epoch 16: average Loss: 21.552790, time: 2.38 s\r\n",
+      "Test Epoch 16: 100%|█████████████████| 89/89 [00:02<00:00, 37.32it/s, loss=23.1]\r\n",
+      "04/18 15:27: Test Epoch 16: average Loss: 21.805906, time: 2.39 s\r\n",
+      "04/18 15:27: Best validation model saved!\r\n",
+      "Train Epoch 17:  75%|██████████▌   | 199/264 [00:14<00:04, 14.07it/s, loss=20.9]04/18 15:27: Train Epoch 17: 200/264 Loss: 22.207558\r\n",
+      "Train Epoch 17: 100%|██████████████| 264/264 [00:18<00:00, 14.07it/s, loss=21.6]\r\n",
+      "04/18 15:27: Train Epoch 17: average Loss: 21.150025, time: 18.77 s\r\n",
+      "Val Epoch 17: 100%|██████████████████| 88/88 [00:02<00:00, 36.99it/s, loss=25.2]\r\n",
+      "04/18 15:27: Val Epoch 17: average Loss: 21.656280, time: 2.38 s\r\n",
+      "Test Epoch 17: 100%|█████████████████| 89/89 [00:02<00:00, 37.31it/s, loss=23.7]\r\n",
+      "04/18 15:27: Test Epoch 17: average Loss: 21.844660, time: 2.39 s\r\n",
+      "Train Epoch 18:  75%|██████████▌   | 199/264 [00:14<00:04, 14.09it/s, loss=20.4]04/18 15:27: Train Epoch 18: 200/264 Loss: 21.349459\r\n",
+      "Train Epoch 18: 100%|██████████████| 264/264 [00:18<00:00, 14.08it/s, loss=21.4]\r\n",
+      "04/18 15:27: Train Epoch 18: average Loss: 21.032826, time: 18.75 s\r\n",
+      "Val Epoch 18: 100%|██████████████████| 88/88 [00:02<00:00, 37.00it/s, loss=24.3]\r\n",
+      "04/18 15:27: Val Epoch 18: average Loss: 21.340334, time: 2.38 s\r\n",
+      "Test Epoch 18: 100%|█████████████████| 89/89 [00:02<00:00, 37.34it/s, loss=23.8]\r\n",
+      "04/18 15:27: Test Epoch 18: average Loss: 21.582921, time: 2.38 s\r\n",
+      "04/18 15:27: Best validation model saved!\r\n",
+      "Train Epoch 19:  75%|██████████▌   | 199/264 [00:14<00:04, 14.10it/s, loss=20.9]04/18 15:28: Train Epoch 19: 200/264 Loss: 21.687870\r\n",
+      "Train Epoch 19: 100%|██████████████| 264/264 [00:18<00:00, 14.08it/s, loss=20.4]\r\n",
+      "04/18 15:28: Train Epoch 19: average Loss: 20.757627, time: 18.75 s\r\n",
+      "Val Epoch 19: 100%|██████████████████| 88/88 [00:02<00:00, 36.99it/s, loss=24.9]\r\n",
+      "04/18 15:28: Val Epoch 19: average Loss: 21.408308, time: 2.38 s\r\n",
+      "Test Epoch 19: 100%|█████████████████| 89/89 [00:02<00:00, 37.37it/s, loss=22.5]\r\n",
+      "04/18 15:28: Test Epoch 19: average Loss: 21.619702, time: 2.38 s\r\n",
+      "Train Epoch 20:  75%|██████████▌   | 199/264 [00:14<00:04, 14.07it/s, loss=21.1]04/18 15:28: Train Epoch 20: 200/264 Loss: 20.328411\r\n",
+      "Train Epoch 20: 100%|██████████████| 264/264 [00:18<00:00, 14.08it/s, loss=20.4]\r\n",
+      "04/18 15:28: Train Epoch 20: average Loss: 20.753064, time: 18.76 s\r\n",
+      "Val Epoch 20: 100%|██████████████████| 88/88 [00:02<00:00, 37.00it/s, loss=24.1]\r\n",
+      "04/18 15:28: Val Epoch 20: average Loss: 21.360411, time: 2.38 s\r\n",
+      "Test Epoch 20: 100%|█████████████████| 89/89 [00:02<00:00, 37.36it/s, loss=22.3]\r\n",
+      "04/18 15:28: Test Epoch 20: average Loss: 21.592710, time: 2.38 s\r\n",
+      "Train Epoch 21:  75%|██████████▌   | 199/264 [00:14<00:04, 14.08it/s, loss=20.8]04/18 15:28: Train Epoch 21: 200/264 Loss: 20.252331\r\n",
+      "Train Epoch 21: 100%|██████████████| 264/264 [00:18<00:00, 14.08it/s, loss=19.1]\r\n",
+      "04/18 15:28: Train Epoch 21: average Loss: 20.646361, time: 18.75 s\r\n",
+      "Val Epoch 21: 100%|██████████████████| 88/88 [00:02<00:00, 36.99it/s, loss=23.9]\r\n",
+      "04/18 15:28: Val Epoch 21: average Loss: 21.112341, time: 2.38 s\r\n",
+      "Test Epoch 21: 100%|█████████████████| 89/89 [00:02<00:00, 37.35it/s, loss=22.8]\r\n",
+      "04/18 15:29: Test Epoch 21: average Loss: 21.340167, time: 2.38 s\r\n",
+      "04/18 15:29: Best validation model saved!\r\n",
+      "Train Epoch 22:  75%|██████████▌   | 199/264 [00:14<00:04, 14.08it/s, loss=21.4]04/18 15:29: Train Epoch 22: 200/264 Loss: 20.252222\r\n",
+      "Train Epoch 22: 100%|████████████████| 264/264 [00:18<00:00, 14.08it/s, loss=20]\r\n",
+      "04/18 15:29: Train Epoch 22: average Loss: 20.617401, time: 18.75 s\r\n",
+      "Val Epoch 22: 100%|██████████████████| 88/88 [00:02<00:00, 36.94it/s, loss=24.7]\r\n",
+      "04/18 15:29: Val Epoch 22: average Loss: 21.458822, time: 2.38 s\r\n",
+      "Test Epoch 22: 100%|█████████████████| 89/89 [00:02<00:00, 37.36it/s, loss=23.3]\r\n",
+      "04/18 15:29: Test Epoch 22: average Loss: 21.701950, time: 2.38 s\r\n",
+      "Train Epoch 23:  75%|██████████▌   | 199/264 [00:14<00:04, 14.04it/s, loss=20.7]04/18 15:29: Train Epoch 23: 200/264 Loss: 20.546272\r\n",
+      "Train Epoch 23: 100%|██████████████| 264/264 [00:18<00:00, 14.08it/s, loss=20.4]\r\n",
+      "04/18 15:29: Train Epoch 23: average Loss: 20.782312, time: 18.75 s\r\n",
+      "Val Epoch 23: 100%|██████████████████| 88/88 [00:02<00:00, 37.00it/s, loss=24.6]\r\n",
+      "04/18 15:29: Val Epoch 23: average Loss: 21.358193, time: 2.38 s\r\n",
+      "Test Epoch 23: 100%|█████████████████| 89/89 [00:02<00:00, 37.37it/s, loss=23.1]\r\n",
+      "04/18 15:29: Test Epoch 23: average Loss: 21.589926, time: 2.38 s\r\n",
+      "Train Epoch 24:  75%|██████████▌   | 199/264 [00:14<00:04, 14.07it/s, loss=20.8]04/18 15:30: Train Epoch 24: 200/264 Loss: 20.071638\r\n",
+      "Train Epoch 24: 100%|██████████████| 264/264 [00:18<00:00, 14.07it/s, loss=20.1]\r\n",
+      "04/18 15:30: Train Epoch 24: average Loss: 20.558243, time: 18.76 s\r\n",
+      "Val Epoch 24: 100%|██████████████████| 88/88 [00:02<00:00, 36.99it/s, loss=23.7]\r\n",
+      "04/18 15:30: Val Epoch 24: average Loss: 21.181309, time: 2.38 s\r\n",
+      "Test Epoch 24: 100%|█████████████████| 89/89 [00:02<00:00, 37.34it/s, loss=23.6]\r\n",
+      "04/18 15:30: Test Epoch 24: average Loss: 21.407773, time: 2.38 s\r\n",
+      "Train Epoch 25:  75%|██████████▌   | 199/264 [00:14<00:04, 14.06it/s, loss=19.3]04/18 15:30: Train Epoch 25: 200/264 Loss: 19.421518\r\n",
+      "Train Epoch 25: 100%|██████████████| 264/264 [00:18<00:00, 14.08it/s, loss=19.6]\r\n",
+      "04/18 15:30: Train Epoch 25: average Loss: 20.398695, time: 18.75 s\r\n",
+      "Val Epoch 25: 100%|██████████████████| 88/88 [00:02<00:00, 36.99it/s, loss=23.6]\r\n",
+      "04/18 15:30: Val Epoch 25: average Loss: 20.936435, time: 2.38 s\r\n",
+      "Test Epoch 25: 100%|███████████████████| 89/89 [00:02<00:00, 37.34it/s, loss=23]\r\n",
+      "04/18 15:30: Test Epoch 25: average Loss: 21.161080, time: 2.38 s\r\n",
+      "04/18 15:30: Best validation model saved!\r\n",
+      "Train Epoch 26:  75%|██████████▌   | 199/264 [00:14<00:04, 14.10it/s, loss=18.4]04/18 15:30: Train Epoch 26: 200/264 Loss: 20.369778\r\n",
+      "Train Epoch 26: 100%|██████████████| 264/264 [00:18<00:00, 14.07it/s, loss=18.9]\r\n",
+      "04/18 15:30: Train Epoch 26: average Loss: 20.239677, time: 18.76 s\r\n",
+      "Val Epoch 26: 100%|██████████████████| 88/88 [00:02<00:00, 37.00it/s, loss=24.1]\r\n",
+      "04/18 15:30: Val Epoch 26: average Loss: 21.698836, time: 2.38 s\r\n",
+      "Test Epoch 26: 100%|█████████████████| 89/89 [00:02<00:00, 37.35it/s, loss=22.2]\r\n",
+      "04/18 15:30: Test Epoch 26: average Loss: 21.860732, time: 2.38 s\r\n",
+      "Train Epoch 27:  75%|██████████▌   | 199/264 [00:14<00:04, 14.08it/s, loss=20.7]04/18 15:31: Train Epoch 27: 200/264 Loss: 20.980124\r\n",
+      "Train Epoch 27: 100%|██████████████| 264/264 [00:18<00:00, 14.07it/s, loss=20.4]\r\n",
+      "04/18 15:31: Train Epoch 27: average Loss: 20.319519, time: 18.77 s\r\n",
+      "Val Epoch 27: 100%|██████████████████| 88/88 [00:02<00:00, 37.00it/s, loss=23.5]\r\n",
+      "04/18 15:31: Val Epoch 27: average Loss: 20.933960, time: 2.38 s\r\n",
+      "Test Epoch 27: 100%|█████████████████| 89/89 [00:02<00:00, 37.32it/s, loss=22.4]\r\n",
+      "04/18 15:31: Test Epoch 27: average Loss: 21.216676, time: 2.39 s\r\n",
+      "04/18 15:31: Best validation model saved!\r\n",
+      "Train Epoch 28:  75%|██████████▌   | 199/264 [00:14<00:04, 14.08it/s, loss=19.8]04/18 15:31: Train Epoch 28: 200/264 Loss: 20.241156\r\n",
+      "Train Epoch 28: 100%|██████████████| 264/264 [00:18<00:00, 14.08it/s, loss=19.5]\r\n",
+      "04/18 15:31: Train Epoch 28: average Loss: 20.203729, time: 18.75 s\r\n",
+      "Val Epoch 28: 100%|██████████████████| 88/88 [00:02<00:00, 36.99it/s, loss=23.7]\r\n",
+      "04/18 15:31: Val Epoch 28: average Loss: 21.091199, time: 2.38 s\r\n",
+      "Test Epoch 28: 100%|█████████████████| 89/89 [00:02<00:00, 37.35it/s, loss=22.3]\r\n",
+      "04/18 15:31: Test Epoch 28: average Loss: 21.326995, time: 2.38 s\r\n",
+      "Train Epoch 29:  75%|██████████▌   | 199/264 [00:14<00:04, 14.07it/s, loss=21.2]04/18 15:32: Train Epoch 29: 200/264 Loss: 20.161242\r\n",
+      "Train Epoch 29: 100%|██████████████| 264/264 [00:18<00:00, 14.08it/s, loss=19.9]\r\n",
+      "04/18 15:32: Train Epoch 29: average Loss: 20.256450, time: 18.76 s\r\n",
+      "Val Epoch 29: 100%|██████████████████| 88/88 [00:02<00:00, 36.99it/s, loss=23.3]\r\n",
+      "04/18 15:32: Val Epoch 29: average Loss: 20.683587, time: 2.38 s\r\n",
+      "Test Epoch 29: 100%|█████████████████| 89/89 [00:02<00:00, 37.35it/s, loss=22.7]\r\n",
+      "04/18 15:32: Test Epoch 29: average Loss: 20.950286, time: 2.38 s\r\n",
+      "04/18 15:32: Best validation model saved!\r\n",
+      "Train Epoch 30:  75%|██████████▌   | 199/264 [00:14<00:04, 14.06it/s, loss=19.4]04/18 15:32: Train Epoch 30: 200/264 Loss: 19.059256\r\n",
+      "Train Epoch 30: 100%|████████████████| 264/264 [00:18<00:00, 14.08it/s, loss=20]\r\n",
+      "04/18 15:32: Train Epoch 30: average Loss: 20.190815, time: 18.76 s\r\n",
+      "Val Epoch 30: 100%|██████████████████| 88/88 [00:02<00:00, 36.98it/s, loss=23.7]\r\n",
+      "04/18 15:32: Val Epoch 30: average Loss: 21.270211, time: 2.38 s\r\n",
+      "Test Epoch 30: 100%|█████████████████| 89/89 [00:02<00:00, 37.35it/s, loss=22.8]\r\n",
+      "04/18 15:32: Test Epoch 30: average Loss: 21.465990, time: 2.38 s\r\n",
+      "Train Epoch 31:  75%|██████████▌   | 199/264 [00:14<00:04, 14.09it/s, loss=21.1]04/18 15:32: Train Epoch 31: 200/264 Loss: 21.166910\r\n",
+      "Train Epoch 31: 100%|██████████████| 264/264 [00:18<00:00, 14.09it/s, loss=20.1]\r\n",
+      "04/18 15:32: Train Epoch 31: average Loss: 20.160952, time: 18.74 s\r\n",
+      "Val Epoch 31: 100%|██████████████████| 88/88 [00:02<00:00, 36.99it/s, loss=23.2]\r\n",
+      "04/18 15:32: Val Epoch 31: average Loss: 20.817308, time: 2.38 s\r\n",
+      "Test Epoch 31: 100%|█████████████████| 89/89 [00:02<00:00, 37.36it/s, loss=22.1]\r\n",
+      "04/18 15:32: Test Epoch 31: average Loss: 21.041389, time: 2.38 s\r\n",
+      "Train Epoch 32:  75%|██████████▌   | 199/264 [00:14<00:04, 14.10it/s, loss=20.3]04/18 15:33: Train Epoch 32: 200/264 Loss: 20.239948\r\n",
+      "Train Epoch 32: 100%|██████████████| 264/264 [00:18<00:00, 14.07it/s, loss=18.7]\r\n",
+      "04/18 15:33: Train Epoch 32: average Loss: 19.972721, time: 18.76 s\r\n",
+      "Val Epoch 32: 100%|██████████████████| 88/88 [00:02<00:00, 36.98it/s, loss=24.2]\r\n",
+      "04/18 15:33: Val Epoch 32: average Loss: 21.453561, time: 2.38 s\r\n",
+      "Test Epoch 32: 100%|█████████████████| 89/89 [00:02<00:00, 37.36it/s, loss=22.2]\r\n",
+      "04/18 15:33: Test Epoch 32: average Loss: 21.662818, time: 2.38 s\r\n",
+      "Train Epoch 33:  75%|██████████▌   | 199/264 [00:14<00:04, 14.08it/s, loss=19.7]04/18 15:33: Train Epoch 33: 200/264 Loss: 21.384359\r\n",
+      "Train Epoch 33: 100%|██████████████| 264/264 [00:18<00:00, 14.08it/s, loss=20.4]\r\n",
+      "04/18 15:33: Train Epoch 33: average Loss: 20.011278, time: 18.75 s\r\n",
+      "Val Epoch 33: 100%|██████████████████| 88/88 [00:02<00:00, 37.00it/s, loss=24.5]\r\n",
+      "04/18 15:33: Val Epoch 33: average Loss: 21.382805, time: 2.38 s\r\n",
+      "Test Epoch 33: 100%|█████████████████| 89/89 [00:02<00:00, 37.35it/s, loss=23.2]\r\n",
+      "04/18 15:33: Test Epoch 33: average Loss: 21.578498, time: 2.38 s\r\n",
+      "Train Epoch 34:  75%|████████████    | 199/264 [00:14<00:04, 14.11it/s, loss=20]04/18 15:33: Train Epoch 34: 200/264 Loss: 20.696024\r\n",
+      "Train Epoch 34: 100%|██████████████| 264/264 [00:18<00:00, 14.09it/s, loss=19.5]\r\n",
+      "04/18 15:34: Train Epoch 34: average Loss: 19.989019, time: 18.74 s\r\n",
+      "Val Epoch 34: 100%|██████████████████| 88/88 [00:02<00:00, 37.00it/s, loss=23.8]\r\n",
+      "04/18 15:34: Val Epoch 34: average Loss: 21.284217, time: 2.38 s\r\n",
+      "Test Epoch 34: 100%|█████████████████| 89/89 [00:02<00:00, 37.34it/s, loss=21.9]\r\n",
+      "04/18 15:34: Test Epoch 34: average Loss: 21.322555, time: 2.38 s\r\n",
+      "Train Epoch 35:  75%|██████████▌   | 199/264 [00:14<00:04, 14.09it/s, loss=19.8]04/18 15:34: Train Epoch 35: 200/264 Loss: 19.667231\r\n",
+      "Train Epoch 35: 100%|██████████████| 264/264 [00:18<00:00, 14.08it/s, loss=20.6]\r\n",
+      "04/18 15:34: Train Epoch 35: average Loss: 19.916933, time: 18.75 s\r\n",
+      "Val Epoch 35: 100%|██████████████████| 88/88 [00:02<00:00, 37.00it/s, loss=23.2]\r\n",
+      "04/18 15:34: Val Epoch 35: average Loss: 20.768519, time: 2.38 s\r\n",
+      "Test Epoch 35: 100%|█████████████████| 89/89 [00:02<00:00, 37.34it/s, loss=22.1]\r\n",
+      "04/18 15:34: Test Epoch 35: average Loss: 20.955493, time: 2.38 s\r\n",
+      "Train Epoch 36:  75%|██████████▌   | 199/264 [00:14<00:04, 14.08it/s, loss=20.1]04/18 15:34: Train Epoch 36: 200/264 Loss: 19.548941\r\n",
+      "Train Epoch 36: 100%|██████████████| 264/264 [00:18<00:00, 14.07it/s, loss=20.3]\r\n",
+      "04/18 15:34: Train Epoch 36: average Loss: 19.855145, time: 18.76 s\r\n",
+      "Val Epoch 36: 100%|████████████████████| 88/88 [00:02<00:00, 36.95it/s, loss=23]\r\n",
+      "04/18 15:34: Val Epoch 36: average Loss: 20.758952, time: 2.38 s\r\n",
+      "Test Epoch 36: 100%|█████████████████| 89/89 [00:02<00:00, 37.35it/s, loss=21.8]\r\n",
+      "04/18 15:34: Test Epoch 36: average Loss: 20.953051, time: 2.38 s\r\n",
+      "Train Epoch 37:  75%|██████████▌   | 199/264 [00:14<00:04, 14.09it/s, loss=20.5]04/18 15:35: Train Epoch 37: 200/264 Loss: 20.110346\r\n",
+      "Train Epoch 37: 100%|████████████████| 264/264 [00:18<00:00, 14.08it/s, loss=19]\r\n",
+      "04/18 15:35: Train Epoch 37: average Loss: 19.784550, time: 18.75 s\r\n",
+      "Val Epoch 37: 100%|████████████████████| 88/88 [00:02<00:00, 36.98it/s, loss=23]\r\n",
+      "04/18 15:35: Val Epoch 37: average Loss: 20.586615, time: 2.38 s\r\n",
+      "Test Epoch 37: 100%|█████████████████| 89/89 [00:02<00:00, 37.34it/s, loss=22.5]\r\n",
+      "04/18 15:35: Test Epoch 37: average Loss: 20.779311, time: 2.38 s\r\n",
+      "04/18 15:35: Best validation model saved!\r\n",
+      "Train Epoch 38:  75%|██████████▌   | 199/264 [00:14<00:04, 14.11it/s, loss=19.7]04/18 15:35: Train Epoch 38: 200/264 Loss: 18.363724\r\n",
+      "Train Epoch 38: 100%|██████████████| 264/264 [00:18<00:00, 14.08it/s, loss=20.3]\r\n",
+      "04/18 15:35: Train Epoch 38: average Loss: 19.730839, time: 18.76 s\r\n",
+      "Val Epoch 38: 100%|██████████████████| 88/88 [00:02<00:00, 36.99it/s, loss=22.9]\r\n",
+      "04/18 15:35: Val Epoch 38: average Loss: 20.638085, time: 2.38 s\r\n",
+      "Test Epoch 38: 100%|█████████████████| 89/89 [00:02<00:00, 37.35it/s, loss=22.2]\r\n",
+      "04/18 15:35: Test Epoch 38: average Loss: 20.847774, time: 2.38 s\r\n",
+      "Train Epoch 39:  75%|████████████    | 199/264 [00:14<00:04, 14.05it/s, loss=20]04/18 15:35: Train Epoch 39: 200/264 Loss: 20.497795\r\n",
+      "Train Epoch 39: 100%|██████████████| 264/264 [00:18<00:00, 14.07it/s, loss=21.7]\r\n",
+      "04/18 15:36: Train Epoch 39: average Loss: 19.813993, time: 18.76 s\r\n",
+      "Val Epoch 39: 100%|██████████████████| 88/88 [00:02<00:00, 37.00it/s, loss=23.3]\r\n",
+      "04/18 15:36: Val Epoch 39: average Loss: 21.008566, time: 2.38 s\r\n",
+      "Test Epoch 39: 100%|█████████████████| 89/89 [00:02<00:00, 37.35it/s, loss=22.8]\r\n",
+      "04/18 15:36: Test Epoch 39: average Loss: 21.195567, time: 2.38 s\r\n",
+      "Train Epoch 40:  75%|██████████▌   | 199/264 [00:14<00:04, 14.12it/s, loss=19.9]04/18 15:36: Train Epoch 40: 200/264 Loss: 19.352921\r\n",
+      "Train Epoch 40: 100%|██████████████| 264/264 [00:18<00:00, 14.07it/s, loss=19.6]\r\n",
+      "04/18 15:36: Train Epoch 40: average Loss: 19.770867, time: 18.76 s\r\n",
+      "Val Epoch 40: 100%|██████████████████| 88/88 [00:02<00:00, 36.99it/s, loss=23.8]\r\n",
+      "04/18 15:36: Val Epoch 40: average Loss: 20.920919, time: 2.38 s\r\n",
+      "Test Epoch 40: 100%|█████████████████| 89/89 [00:02<00:00, 37.34it/s, loss=21.8]\r\n",
+      "04/18 15:36: Test Epoch 40: average Loss: 21.133729, time: 2.38 s\r\n",
+      "Train Epoch 41:  75%|██████████▌   | 199/264 [00:14<00:04, 14.09it/s, loss=19.4]04/18 15:36: Train Epoch 41: 200/264 Loss: 21.116802\r\n",
+      "Train Epoch 41: 100%|██████████████| 264/264 [00:18<00:00, 14.07it/s, loss=20.3]\r\n",
+      "04/18 15:36: Train Epoch 41: average Loss: 19.662341, time: 18.77 s\r\n",
+      "Val Epoch 41: 100%|██████████████████| 88/88 [00:02<00:00, 36.99it/s, loss=22.9]\r\n",
+      "04/18 15:36: Val Epoch 41: average Loss: 20.642668, time: 2.38 s\r\n",
+      "Test Epoch 41: 100%|█████████████████| 89/89 [00:02<00:00, 37.36it/s, loss=21.5]\r\n",
+      "04/18 15:36: Test Epoch 41: average Loss: 20.808197, time: 2.38 s\r\n",
+      "Train Epoch 42:  75%|██████████▌   | 199/264 [00:14<00:04, 14.06it/s, loss=20.3]04/18 15:37: Train Epoch 42: 200/264 Loss: 18.446213\r\n",
+      "Train Epoch 42: 100%|██████████████| 264/264 [00:18<00:00, 14.08it/s, loss=19.7]\r\n",
+      "04/18 15:37: Train Epoch 42: average Loss: 19.652844, time: 18.75 s\r\n",
+      "Val Epoch 42: 100%|██████████████████| 88/88 [00:02<00:00, 37.00it/s, loss=22.6]\r\n",
+      "04/18 15:37: Val Epoch 42: average Loss: 20.480098, time: 2.38 s\r\n",
+      "Test Epoch 42: 100%|█████████████████| 89/89 [00:02<00:00, 37.33it/s, loss=21.7]\r\n",
+      "04/18 15:37: Test Epoch 42: average Loss: 20.626187, time: 2.38 s\r\n",
+      "04/18 15:37: Best validation model saved!\r\n",
+      "Train Epoch 43:  75%|████████████    | 199/264 [00:14<00:04, 14.12it/s, loss=19]04/18 15:37: Train Epoch 43: 200/264 Loss: 18.960085\r\n",
+      "Train Epoch 43: 100%|██████████████| 264/264 [00:18<00:00, 14.08it/s, loss=18.7]\r\n",
+      "04/18 15:37: Train Epoch 43: average Loss: 19.738113, time: 18.75 s\r\n",
+      "Val Epoch 43: 100%|██████████████████| 88/88 [00:02<00:00, 36.99it/s, loss=22.7]\r\n",
+      "04/18 15:37: Val Epoch 43: average Loss: 20.592857, time: 2.38 s\r\n",
+      "Test Epoch 43: 100%|█████████████████| 89/89 [00:02<00:00, 37.36it/s, loss=21.8]\r\n",
+      "04/18 15:37: Test Epoch 43: average Loss: 20.787053, time: 2.38 s\r\n",
+      "Train Epoch 44:  75%|██████████▌   | 199/264 [00:14<00:04, 14.11it/s, loss=20.2]04/18 15:37: Train Epoch 44: 200/264 Loss: 19.335417\r\n",
+      "Train Epoch 44: 100%|██████████████| 264/264 [00:18<00:00, 14.08it/s, loss=19.3]\r\n",
+      "04/18 15:37: Train Epoch 44: average Loss: 19.761036, time: 18.75 s\r\n",
+      "Val Epoch 44: 100%|██████████████████| 88/88 [00:02<00:00, 37.00it/s, loss=23.5]\r\n",
+      "04/18 15:38: Val Epoch 44: average Loss: 21.283964, time: 2.38 s\r\n",
+      "Test Epoch 44: 100%|█████████████████| 89/89 [00:02<00:00, 37.33it/s, loss=23.1]\r\n",
+      "04/18 15:38: Test Epoch 44: average Loss: 21.469832, time: 2.38 s\r\n",
+      "Train Epoch 45:  75%|██████████▌   | 199/264 [00:14<00:04, 14.10it/s, loss=18.9]04/18 15:38: Train Epoch 45: 200/264 Loss: 20.202532\r\n",
+      "Train Epoch 45: 100%|██████████████| 264/264 [00:18<00:00, 14.08it/s, loss=19.2]\r\n",
+      "04/18 15:38: Train Epoch 45: average Loss: 19.473784, time: 18.75 s\r\n",
+      "Val Epoch 45: 100%|██████████████████| 88/88 [00:02<00:00, 36.99it/s, loss=22.6]\r\n",
+      "04/18 15:38: Val Epoch 45: average Loss: 20.453671, time: 2.38 s\r\n",
+      "Test Epoch 45: 100%|█████████████████| 89/89 [00:02<00:00, 37.35it/s, loss=21.6]\r\n",
+      "04/18 15:38: Test Epoch 45: average Loss: 20.634249, time: 2.38 s\r\n",
+      "04/18 15:38: Best validation model saved!\r\n",
+      "Train Epoch 46:  75%|██████████▌   | 199/264 [00:14<00:04, 14.09it/s, loss=18.9]04/18 15:38: Train Epoch 46: 200/264 Loss: 19.467962\r\n",
+      "Train Epoch 46: 100%|██████████████| 264/264 [00:18<00:00, 14.07it/s, loss=18.6]\r\n",
+      "04/18 15:38: Train Epoch 46: average Loss: 19.430203, time: 18.76 s\r\n",
+      "Val Epoch 46: 100%|██████████████████| 88/88 [00:02<00:00, 36.96it/s, loss=22.4]\r\n",
+      "04/18 15:38: Val Epoch 46: average Loss: 20.445109, time: 2.38 s\r\n",
+      "Test Epoch 46: 100%|█████████████████| 89/89 [00:02<00:00, 37.36it/s, loss=22.1]\r\n",
+      "04/18 15:38: Test Epoch 46: average Loss: 20.585934, time: 2.38 s\r\n",
+      "04/18 15:38: Best validation model saved!\r\n",
+      "Train Epoch 47:  75%|██████████▌   | 199/264 [00:14<00:04, 14.10it/s, loss=21.4]04/18 15:39: Train Epoch 47: 200/264 Loss: 19.603148\r\n",
+      "Train Epoch 47: 100%|██████████████| 264/264 [00:18<00:00, 14.07it/s, loss=19.4]\r\n",
+      "04/18 15:39: Train Epoch 47: average Loss: 19.497315, time: 18.77 s\r\n",
+      "Val Epoch 47: 100%|██████████████████| 88/88 [00:02<00:00, 36.98it/s, loss=22.5]\r\n",
+      "04/18 15:39: Val Epoch 47: average Loss: 20.632452, time: 2.38 s\r\n",
+      "Test Epoch 47: 100%|█████████████████| 89/89 [00:02<00:00, 37.35it/s, loss=21.6]\r\n",
+      "04/18 15:39: Test Epoch 47: average Loss: 20.758448, time: 2.38 s\r\n",
+      "Train Epoch 48:  75%|██████████▌   | 199/264 [00:14<00:04, 14.11it/s, loss=19.9]04/18 15:39: Train Epoch 48: 200/264 Loss: 20.592278\r\n",
+      "Train Epoch 48: 100%|██████████████| 264/264 [00:18<00:00, 14.07it/s, loss=19.3]\r\n",
+      "04/18 15:39: Train Epoch 48: average Loss: 19.412447, time: 18.76 s\r\n",
+      "Val Epoch 48: 100%|████████████████████| 88/88 [00:02<00:00, 37.01it/s, loss=23]\r\n",
+      "04/18 15:39: Val Epoch 48: average Loss: 20.979722, time: 2.38 s\r\n",
+      "Test Epoch 48: 100%|█████████████████| 89/89 [00:02<00:00, 37.34it/s, loss=22.1]\r\n",
+      "04/18 15:39: Test Epoch 48: average Loss: 21.058189, time: 2.38 s\r\n",
+      "Train Epoch 49:  75%|██████████▌   | 199/264 [00:14<00:04, 14.07it/s, loss=17.9]04/18 15:39: Train Epoch 49: 200/264 Loss: 18.558853\r\n",
+      "Train Epoch 49: 100%|████████████████| 264/264 [00:18<00:00, 14.08it/s, loss=18]\r\n",
+      "04/18 15:39: Train Epoch 49: average Loss: 19.603099, time: 18.76 s\r\n",
+      "Val Epoch 49: 100%|██████████████████| 88/88 [00:02<00:00, 37.00it/s, loss=22.6]\r\n",
+      "04/18 15:39: Val Epoch 49: average Loss: 20.621084, time: 2.38 s\r\n",
+      "Test Epoch 49: 100%|█████████████████| 89/89 [00:02<00:00, 37.34it/s, loss=21.9]\r\n",
+      "04/18 15:40: Test Epoch 49: average Loss: 20.776100, time: 2.38 s\r\n",
+      "Train Epoch 50:  75%|██████████▌   | 199/264 [00:14<00:04, 14.11it/s, loss=19.7]04/18 15:40: Train Epoch 50: 200/264 Loss: 19.841299\r\n",
+      "Train Epoch 50: 100%|██████████████| 264/264 [00:18<00:00, 14.08it/s, loss=18.9]\r\n",
+      "04/18 15:40: Train Epoch 50: average Loss: 19.432674, time: 18.75 s\r\n",
+      "Val Epoch 50: 100%|██████████████████| 88/88 [00:02<00:00, 36.99it/s, loss=22.5]\r\n",
+      "04/18 15:40: Val Epoch 50: average Loss: 20.330265, time: 2.38 s\r\n",
+      "Test Epoch 50: 100%|█████████████████| 89/89 [00:02<00:00, 37.35it/s, loss=21.6]\r\n",
+      "04/18 15:40: Test Epoch 50: average Loss: 20.527227, time: 2.38 s\r\n",
+      "04/18 15:40: Best validation model saved!\r\n",
+      "Train Epoch 51:  75%|██████████▌   | 199/264 [00:14<00:04, 14.05it/s, loss=21.3]04/18 15:40: Train Epoch 51: 200/264 Loss: 18.177088\r\n",
+      "Train Epoch 51: 100%|████████████████| 264/264 [00:18<00:00, 14.07it/s, loss=20]\r\n",
+      "04/18 15:40: Train Epoch 51: average Loss: 19.492620, time: 18.76 s\r\n",
+      "Val Epoch 51: 100%|██████████████████| 88/88 [00:02<00:00, 37.01it/s, loss=23.4]\r\n",
+      "04/18 15:40: Val Epoch 51: average Loss: 21.465949, time: 2.38 s\r\n",
+      "Test Epoch 51: 100%|█████████████████| 89/89 [00:02<00:00, 37.35it/s, loss=23.8]\r\n",
+      "04/18 15:40: Test Epoch 51: average Loss: 21.671903, time: 2.38 s\r\n",
+      "Train Epoch 52:  75%|██████████▌   | 199/264 [00:14<00:04, 14.11it/s, loss=19.7]04/18 15:41: Train Epoch 52: 200/264 Loss: 20.761515\r\n",
+      "Train Epoch 52: 100%|██████████████| 264/264 [00:18<00:00, 14.07it/s, loss=18.7]\r\n",
+      "04/18 15:41: Train Epoch 52: average Loss: 19.423770, time: 18.76 s\r\n",
+      "Val Epoch 52: 100%|██████████████████| 88/88 [00:02<00:00, 36.98it/s, loss=22.8]\r\n",
+      "04/18 15:41: Val Epoch 52: average Loss: 20.755795, time: 2.38 s\r\n",
+      "Test Epoch 52: 100%|█████████████████| 89/89 [00:02<00:00, 37.35it/s, loss=21.8]\r\n",
+      "04/18 15:41: Test Epoch 52: average Loss: 20.912931, time: 2.38 s\r\n",
+      "Train Epoch 53:  75%|██████████▌   | 199/264 [00:14<00:04, 14.03it/s, loss=19.1]04/18 15:41: Train Epoch 53: 200/264 Loss: 20.528215\r\n",
+      "Train Epoch 53: 100%|██████████████| 264/264 [00:18<00:00, 14.07it/s, loss=18.4]\r\n",
+      "04/18 15:41: Train Epoch 53: average Loss: 19.396375, time: 18.76 s\r\n",
+      "Val Epoch 53: 100%|██████████████████| 88/88 [00:02<00:00, 36.99it/s, loss=22.2]\r\n",
+      "04/18 15:41: Val Epoch 53: average Loss: 20.210960, time: 2.38 s\r\n",
+      "Test Epoch 53: 100%|█████████████████| 89/89 [00:02<00:00, 37.34it/s, loss=22.2]\r\n",
+      "04/18 15:41: Test Epoch 53: average Loss: 20.388522, time: 2.38 s\r\n",
+      "04/18 15:41: Best validation model saved!\r\n",
+      "Train Epoch 54:  75%|██████████▌   | 199/264 [00:14<00:04, 14.08it/s, loss=18.3]04/18 15:41: Train Epoch 54: 200/264 Loss: 18.878670\r\n",
+      "Train Epoch 54: 100%|████████████████| 264/264 [00:18<00:00, 14.07it/s, loss=19]\r\n",
+      "04/18 15:41: Train Epoch 54: average Loss: 19.248442, time: 18.77 s\r\n",
+      "Val Epoch 54: 100%|██████████████████| 88/88 [00:02<00:00, 36.97it/s, loss=23.1]\r\n",
+      "04/18 15:41: Val Epoch 54: average Loss: 21.073780, time: 2.38 s\r\n",
+      "Test Epoch 54: 100%|█████████████████| 89/89 [00:02<00:00, 37.32it/s, loss=22.2]\r\n",
+      "04/18 15:41: Test Epoch 54: average Loss: 21.315897, time: 2.38 s\r\n",
+      "Train Epoch 55:  75%|██████████▌   | 199/264 [00:14<00:04, 14.10it/s, loss=20.1]04/18 15:42: Train Epoch 55: 200/264 Loss: 20.081511\r\n",
+      "Train Epoch 55: 100%|██████████████| 264/264 [00:18<00:00, 14.07it/s, loss=20.3]\r\n",
+      "04/18 15:42: Train Epoch 55: average Loss: 19.331000, time: 18.76 s\r\n",
+      "Val Epoch 55: 100%|██████████████████| 88/88 [00:02<00:00, 36.96it/s, loss=22.6]\r\n",
+      "04/18 15:42: Val Epoch 55: average Loss: 20.506599, time: 2.38 s\r\n",
+      "Test Epoch 55: 100%|█████████████████| 89/89 [00:02<00:00, 37.33it/s, loss=21.6]\r\n",
+      "04/18 15:42: Test Epoch 55: average Loss: 20.697025, time: 2.38 s\r\n",
+      "Train Epoch 56:  75%|██████████▌   | 199/264 [00:14<00:04, 14.09it/s, loss=19.5]04/18 15:42: Train Epoch 56: 200/264 Loss: 19.281361\r\n",
+      "Train Epoch 56: 100%|██████████████| 264/264 [00:18<00:00, 14.07it/s, loss=18.3]\r\n",
+      "04/18 15:42: Train Epoch 56: average Loss: 19.236541, time: 18.76 s\r\n",
+      "Val Epoch 56: 100%|██████████████████| 88/88 [00:02<00:00, 37.01it/s, loss=22.8]\r\n",
+      "04/18 15:42: Val Epoch 56: average Loss: 20.614472, time: 2.38 s\r\n",
+      "Test Epoch 56: 100%|█████████████████| 89/89 [00:02<00:00, 37.35it/s, loss=21.6]\r\n",
+      "04/18 15:42: Test Epoch 56: average Loss: 20.802479, time: 2.38 s\r\n",
+      "Train Epoch 57:  75%|████████████    | 199/264 [00:14<00:04, 14.06it/s, loss=20]04/18 15:42: Train Epoch 57: 200/264 Loss: 19.697676\r\n",
+      "Train Epoch 57: 100%|██████████████| 264/264 [00:18<00:00, 14.07it/s, loss=18.7]\r\n",
+      "04/18 15:43: Train Epoch 57: average Loss: 19.181848, time: 18.76 s\r\n",
+      "Val Epoch 57: 100%|██████████████████| 88/88 [00:02<00:00, 36.98it/s, loss=22.9]\r\n",
+      "04/18 15:43: Val Epoch 57: average Loss: 20.830666, time: 2.38 s\r\n",
+      "Test Epoch 57: 100%|█████████████████| 89/89 [00:02<00:00, 37.34it/s, loss=21.3]\r\n",
+      "04/18 15:43: Test Epoch 57: average Loss: 21.008264, time: 2.38 s\r\n",
+      "Train Epoch 58:  75%|██████████▌   | 199/264 [00:14<00:04, 14.05it/s, loss=18.8]04/18 15:43: Train Epoch 58: 200/264 Loss: 20.451786\r\n",
+      "Train Epoch 58: 100%|██████████████| 264/264 [00:18<00:00, 14.07it/s, loss=19.4]\r\n",
+      "04/18 15:43: Train Epoch 58: average Loss: 19.188656, time: 18.76 s\r\n",
+      "Val Epoch 58: 100%|████████████████████| 88/88 [00:02<00:00, 36.99it/s, loss=23]\r\n",
+      "04/18 15:43: Val Epoch 58: average Loss: 20.665284, time: 2.38 s\r\n",
+      "Test Epoch 58: 100%|█████████████████| 89/89 [00:02<00:00, 37.36it/s, loss=21.5]\r\n",
+      "04/18 15:43: Test Epoch 58: average Loss: 20.800964, time: 2.38 s\r\n",
+      "Train Epoch 59:  75%|██████████▌   | 199/264 [00:14<00:04, 14.12it/s, loss=18.3]04/18 15:43: Train Epoch 59: 200/264 Loss: 19.634094\r\n",
+      "Train Epoch 59: 100%|██████████████| 264/264 [00:18<00:00, 14.07it/s, loss=18.3]\r\n",
+      "04/18 15:43: Train Epoch 59: average Loss: 19.246439, time: 18.76 s\r\n",
+      "Val Epoch 59: 100%|██████████████████| 88/88 [00:02<00:00, 37.01it/s, loss=22.6]\r\n",
+      "04/18 15:43: Val Epoch 59: average Loss: 20.543856, time: 2.38 s\r\n",
+      "Test Epoch 59: 100%|█████████████████| 89/89 [00:02<00:00, 37.35it/s, loss=21.8]\r\n",
+      "04/18 15:43: Test Epoch 59: average Loss: 20.700365, time: 2.38 s\r\n",
+      "Train Epoch 60:  75%|██████████▌   | 199/264 [00:14<00:04, 14.07it/s, loss=18.9]04/18 15:44: Train Epoch 60: 200/264 Loss: 18.916515\r\n",
+      "Train Epoch 60: 100%|██████████████| 264/264 [00:18<00:00, 14.08it/s, loss=19.4]\r\n",
+      "04/18 15:44: Train Epoch 60: average Loss: 19.185081, time: 18.75 s\r\n",
+      "Val Epoch 60: 100%|██████████████████| 88/88 [00:02<00:00, 37.00it/s, loss=22.5]\r\n",
+      "04/18 15:44: Val Epoch 60: average Loss: 20.436561, time: 2.38 s\r\n",
+      "Test Epoch 60: 100%|█████████████████| 89/89 [00:02<00:00, 37.34it/s, loss=22.5]\r\n",
+      "04/18 15:44: Test Epoch 60: average Loss: 20.603210, time: 2.38 s\r\n",
+      "Train Epoch 61:  75%|██████████▌   | 199/264 [00:14<00:04, 14.12it/s, loss=18.7]04/18 15:44: Train Epoch 61: 200/264 Loss: 18.924559\r\n",
+      "Train Epoch 61: 100%|██████████████| 264/264 [00:18<00:00, 14.08it/s, loss=18.1]\r\n",
+      "04/18 15:44: Train Epoch 61: average Loss: 19.187679, time: 18.76 s\r\n",
+      "Val Epoch 61: 100%|██████████████████| 88/88 [00:02<00:00, 37.00it/s, loss=22.4]\r\n",
+      "04/18 15:44: Val Epoch 61: average Loss: 20.413942, time: 2.38 s\r\n",
+      "Test Epoch 61: 100%|█████████████████| 89/89 [00:02<00:00, 37.35it/s, loss=21.9]\r\n",
+      "04/18 15:44: Test Epoch 61: average Loss: 20.622479, time: 2.38 s\r\n",
+      "Train Epoch 62:  75%|██████████▌   | 199/264 [00:14<00:04, 14.11it/s, loss=19.6]04/18 15:44: Train Epoch 62: 200/264 Loss: 19.292921\r\n",
+      "Train Epoch 62: 100%|██████████████| 264/264 [00:18<00:00, 14.08it/s, loss=19.7]\r\n",
+      "04/18 15:45: Train Epoch 62: average Loss: 19.329297, time: 18.75 s\r\n",
+      "Val Epoch 62: 100%|██████████████████| 88/88 [00:02<00:00, 37.01it/s, loss=22.3]\r\n",
+      "04/18 15:45: Val Epoch 62: average Loss: 20.597072, time: 2.38 s\r\n",
+      "Test Epoch 62: 100%|███████████████████| 89/89 [00:02<00:00, 37.36it/s, loss=22]\r\n",
+      "04/18 15:45: Test Epoch 62: average Loss: 20.715091, time: 2.38 s\r\n",
+      "Train Epoch 63:  75%|██████████▌   | 199/264 [00:14<00:04, 13.95it/s, loss=18.3]04/18 15:45: Train Epoch 63: 200/264 Loss: 17.980318\r\n",
+      "Train Epoch 63: 100%|██████████████| 264/264 [00:18<00:00, 14.03it/s, loss=20.1]\r\n",
+      "04/18 15:45: Train Epoch 63: average Loss: 19.025569, time: 18.81 s\r\n",
+      "Val Epoch 63: 100%|██████████████████| 88/88 [00:02<00:00, 37.00it/s, loss=22.5]\r\n",
+      "04/18 15:45: Val Epoch 63: average Loss: 20.588684, time: 2.38 s\r\n",
+      "Test Epoch 63: 100%|█████████████████| 89/89 [00:02<00:00, 37.25it/s, loss=21.4]\r\n",
+      "04/18 15:45: Test Epoch 63: average Loss: 20.754457, time: 2.39 s\r\n",
+      "Train Epoch 64:  75%|██████████▌   | 199/264 [00:14<00:04, 13.93it/s, loss=18.6]04/18 15:45: Train Epoch 64: 200/264 Loss: 19.478308\r\n",
+      "Train Epoch 64: 100%|████████████████| 264/264 [00:18<00:00, 14.03it/s, loss=19]\r\n",
+      "04/18 15:45: Train Epoch 64: average Loss: 19.071902, time: 18.82 s\r\n",
+      "Val Epoch 64: 100%|██████████████████| 88/88 [00:02<00:00, 36.40it/s, loss=22.3]\r\n",
+      "04/18 15:45: Val Epoch 64: average Loss: 20.410618, time: 2.42 s\r\n",
+      "Test Epoch 64: 100%|█████████████████| 89/89 [00:02<00:00, 36.73it/s, loss=21.4]\r\n",
+      "04/18 15:45: Test Epoch 64: average Loss: 20.548186, time: 2.42 s\r\n",
+      "Train Epoch 65:  75%|██████████▌   | 199/264 [00:14<00:04, 13.94it/s, loss=18.8]04/18 15:46: Train Epoch 65: 200/264 Loss: 20.106136\r\n",
+      "Train Epoch 65: 100%|██████████████| 264/264 [00:18<00:00, 13.95it/s, loss=19.1]\r\n",
+      "04/18 15:46: Train Epoch 65: average Loss: 19.067270, time: 18.93 s\r\n",
+      "Val Epoch 65: 100%|██████████████████| 88/88 [00:02<00:00, 36.40it/s, loss=22.4]\r\n",
+      "04/18 15:46: Val Epoch 65: average Loss: 20.407996, time: 2.42 s\r\n",
+      "Test Epoch 65: 100%|█████████████████| 89/89 [00:02<00:00, 36.75it/s, loss=22.1]\r\n",
+      "04/18 15:46: Test Epoch 65: average Loss: 20.686973, time: 2.42 s\r\n",
+      "Train Epoch 66:  75%|██████████▌   | 199/264 [00:14<00:04, 13.98it/s, loss=19.1]04/18 15:46: Train Epoch 66: 200/264 Loss: 20.473032\r\n",
+      "Train Epoch 66: 100%|████████████████| 264/264 [00:18<00:00, 13.94it/s, loss=18]\r\n",
+      "04/18 15:46: Train Epoch 66: average Loss: 19.128083, time: 18.93 s\r\n",
+      "Val Epoch 66: 100%|██████████████████| 88/88 [00:02<00:00, 36.39it/s, loss=22.1]\r\n",
+      "04/18 15:46: Val Epoch 66: average Loss: 20.386831, time: 2.42 s\r\n",
+      "Test Epoch 66: 100%|█████████████████| 89/89 [00:02<00:00, 36.70it/s, loss=21.3]\r\n",
+      "04/18 15:46: Test Epoch 66: average Loss: 20.535779, time: 2.43 s\r\n",
+      "Train Epoch 67:  75%|██████████▌   | 199/264 [00:14<00:04, 13.95it/s, loss=19.5]04/18 15:46: Train Epoch 67: 200/264 Loss: 18.592131\r\n",
+      "Train Epoch 67: 100%|██████████████| 264/264 [00:18<00:00, 13.94it/s, loss=20.6]\r\n",
+      "04/18 15:47: Train Epoch 67: average Loss: 19.053517, time: 18.94 s\r\n",
+      "Val Epoch 67: 100%|██████████████████| 88/88 [00:02<00:00, 36.36it/s, loss=22.1]\r\n",
+      "04/18 15:47: Val Epoch 67: average Loss: 20.290403, time: 2.42 s\r\n",
+      "Test Epoch 67: 100%|█████████████████| 89/89 [00:02<00:00, 36.73it/s, loss=21.6]\r\n",
+      "04/18 15:47: Test Epoch 67: average Loss: 20.480237, time: 2.42 s\r\n",
+      "Train Epoch 68:  75%|████████████    | 199/264 [00:14<00:04, 13.89it/s, loss=19]04/18 15:47: Train Epoch 68: 200/264 Loss: 20.195356\r\n",
+      "Train Epoch 68: 100%|██████████████| 264/264 [00:18<00:00, 13.94it/s, loss=19.4]\r\n",
+      "04/18 15:47: Train Epoch 68: average Loss: 19.065702, time: 18.94 s\r\n",
+      "Val Epoch 68: 100%|██████████████████| 88/88 [00:02<00:00, 36.35it/s, loss=22.2]\r\n",
+      "04/18 15:47: Val Epoch 68: average Loss: 20.350747, time: 2.42 s\r\n",
+      "Test Epoch 68: 100%|█████████████████| 89/89 [00:02<00:00, 36.71it/s, loss=21.5]\r\n",
+      "04/18 15:47: Test Epoch 68: average Loss: 20.576846, time: 2.42 s\r\n",
+      "04/18 15:47: Validation performance didn't improve for 15 epochs. Training stops.\r\n",
+      "04/18 15:47: Best models saved at /home/czzhangheng/code/TrafficWheel/experiments/PEMSD7/2025-04-18_15-20-48/best_model.pth and /home/czzhangheng/code/TrafficWheel/experiments/PEMSD7/2025-04-18_15-20-48/best_test_model.pth\r\n",
+      "04/18 15:47: Testing on best validation model\r\n",
+      "04/18 15:47: Horizon 01, MAE: 17.3233, RMSE: 27.7127, MAPE: 0.0803\r\n",
+      "04/18 15:47: Horizon 02, MAE: 18.2212, RMSE: 29.7846, MAPE: 0.0792\r\n",
+      "04/18 15:47: Horizon 03, MAE: 18.9630, RMSE: 31.1453, MAPE: 0.0835\r\n",
+      "04/18 15:47: Horizon 04, MAE: 19.4835, RMSE: 32.1422, MAPE: 0.0837\r\n",
+      "04/18 15:47: Horizon 05, MAE: 19.9294, RMSE: 32.9599, MAPE: 0.0844\r\n",
+      "04/18 15:47: Horizon 06, MAE: 20.3815, RMSE: 33.7357, MAPE: 0.0874\r\n",
+      "04/18 15:47: Horizon 07, MAE: 20.7470, RMSE: 34.3549, MAPE: 0.0878\r\n",
+      "04/18 15:47: Horizon 08, MAE: 21.1239, RMSE: 34.9675, MAPE: 0.0908\r\n",
+      "04/18 15:47: Horizon 09, MAE: 21.4703, RMSE: 35.5311, MAPE: 0.0919\r\n",
+      "04/18 15:47: Horizon 10, MAE: 21.8317, RMSE: 36.1064, MAPE: 0.0924\r\n",
+      "04/18 15:47: Horizon 11, MAE: 22.2766, RMSE: 36.7033, MAPE: 0.0946\r\n",
+      "04/18 15:47: Horizon 12, MAE: 22.6966, RMSE: 37.2852, MAPE: 0.0959\r\n",
+      "04/18 15:47: Average Horizon, MAE: 20.3707, RMSE: 33.6516, MAPE: 0.0876\r\n",
+      "04/18 15:47: Testing on best test model\r\n",
+      "04/18 15:47: Horizon 01, MAE: 17.3233, RMSE: 27.7127, MAPE: 0.0803\r\n",
+      "04/18 15:47: Horizon 02, MAE: 18.2212, RMSE: 29.7846, MAPE: 0.0792\r\n",
+      "04/18 15:47: Horizon 03, MAE: 18.9630, RMSE: 31.1453, MAPE: 0.0835\r\n",
+      "04/18 15:47: Horizon 04, MAE: 19.4835, RMSE: 32.1422, MAPE: 0.0837\r\n",
+      "04/18 15:47: Horizon 05, MAE: 19.9294, RMSE: 32.9599, MAPE: 0.0844\r\n",
+      "04/18 15:47: Horizon 06, MAE: 20.3815, RMSE: 33.7357, MAPE: 0.0874\r\n",
+      "04/18 15:47: Horizon 07, MAE: 20.7470, RMSE: 34.3549, MAPE: 0.0878\r\n",
+      "04/18 15:47: Horizon 08, MAE: 21.1239, RMSE: 34.9675, MAPE: 0.0908\r\n",
+      "04/18 15:47: Horizon 09, MAE: 21.4703, RMSE: 35.5311, MAPE: 0.0919\r\n",
+      "04/18 15:47: Horizon 10, MAE: 21.8317, RMSE: 36.1064, MAPE: 0.0924\r\n",
+      "04/18 15:47: Horizon 11, MAE: 22.2766, RMSE: 36.7033, MAPE: 0.0946\r\n",
+      "04/18 15:47: Horizon 12, MAE: 22.6966, RMSE: 37.2852, MAPE: 0.0959\r\n",
+      "04/18 15:47: Average Horizon, MAE: 20.3707, RMSE: 33.6516, MAPE: 0.0876\r\n"
+     ]
+    }
+   ],
    "source": [
     "!python run.py --model EXP --dataset PEMSD7  --mode train  --device cuda:1"
    ]
diff --git a/config/EXP/PEMSD4.yaml b/config/EXP/PEMSD4.yaml
index adeeebc..163d2ca 100644
--- a/config/EXP/PEMSD4.yaml
+++ b/config/EXP/PEMSD4.yaml
@@ -27,7 +27,7 @@ train:
   epochs: 300
   lr_init: 0.003
   weight_decay: 0
-  lr_decay: True
+  lr_decay: False
   lr_decay_rate: 0.5
   lr_decay_step: "5,20,40,65"
   early_stop: True
diff --git a/config/EXP/SD.yaml b/config/EXP/SD.yaml
index 7334242..977b2e8 100644
--- a/config/EXP/SD.yaml
+++ b/config/EXP/SD.yaml
@@ -14,18 +14,10 @@ data:
   days_per_week: 7
 
 model:
+  batch_size: 64
   input_dim: 1
   output_dim: 1
-  embed_dim: 12
-  rnn_units: 64
-  num_layers: 1
-  cheb_order: 2
-  use_day: True
-  use_week: True
-  graph_size: 30
-  expert_nums: 8
-  top_k: 2
-  hidden_dim: 64
+  in_len: 12
 
 train:
   loss_func: mae
diff --git a/config/STID/PEMSD4.yaml b/config/STID/PEMSD4.yaml
new file mode 100644
index 0000000..8cf6ba5
--- /dev/null
+++ b/config/STID/PEMSD4.yaml
@@ -0,0 +1,58 @@
+data:
+  num_nodes: 307
+  lag: 12
+  horizon: 12
+  val_ratio: 0.2
+  test_ratio: 0.2
+  tod: False
+  normalizer: std
+  column_wise: False
+  default_graph: True
+  add_time_in_day: True
+  add_day_in_week: True
+  steps_per_day: 288
+  days_per_week: 7
+
+model:
+  input_dim: 3
+  output_dim: 1
+  history: 12
+  horizon: 12
+  num_nodes: 307
+  input_len: 12
+  embed_dim": 32
+  output_len: 12
+  num_layer: 3
+  if_node: True
+  node_dim: 32
+  if_T_i_D: True
+  if_D_i_W: True
+  temp_dim_tid: 32
+  temp_dim_diw: 32
+  time_of_day_size: 288
+  day_of_week_size: 7
+
+
+train:
+  loss_func: mae
+  seed: 1
+  batch_size: 64
+  epochs: 300
+  lr_init: 0.002
+  weight_decay: 0.0001
+  lr_decay: False
+  lr_decay_rate: 0.3
+  lr_decay_step: "1,50,80"
+  early_stop: True
+  early_stop_patience: 15
+  grad_norm: False
+  max_grad_norm: 5
+  real_value: True
+
+test:
+  mae_thresh: null
+  mape_thresh: 0.0
+
+log:
+  log_step: 200
+  plot: False
diff --git a/lib/initializer.py b/lib/initializer.py
index c08bacc..2e3d321 100644
--- a/lib/initializer.py
+++ b/lib/initializer.py
@@ -12,6 +12,8 @@ def init_model(args, device):
             nn.init.xavier_uniform_(p)
         else:
             nn.init.uniform_(p)
+    total_params = sum(p.numel() for p in model.parameters())
+    print(f"Model has {total_params} parameters")
     return model
 
 def init_optimizer(model, args):
diff --git a/model/EXP/EXP2.py b/model/EXP/EXP2.py
index 494c0c6..85517a9 100644
--- a/model/EXP/EXP2.py
+++ b/model/EXP/EXP2.py
@@ -21,7 +21,7 @@ class PositionalEncoding(nn.Module):
         return x + self.pe[:T].unsqueeze(1)            # (T,1,d_model) 广播到 (T,B,d_model)
 
 
-class TemporalTransformerForecast(nn.Module):
+class EXP(nn.Module):
     """
     Transformer-based 多步预测：
       - 只使用 x[...,0] 作为输入通道
diff --git a/model/EXP/EXP21.py b/model/EXP/EXP21.py
index 7081e83..6c0bd78 100644
--- a/model/EXP/EXP21.py
+++ b/model/EXP/EXP21.py
@@ -4,7 +4,7 @@ import torch.nn.functional as F
 
 
 """
-使用多层感知机替换输入输出的proj层
+添加时间嵌入
 """
 
 class DynamicGraphConstructor(nn.Module):
@@ -104,6 +104,7 @@ class EXP(nn.Module):
         self.time_embedding = nn.Embedding(self.time_slots, self.hidden_dim)
         self.day_embedding  = nn.Embedding(7, self.hidden_dim)
 
+
         # input projection now still only takes the flow history
         self.input_proj = MLP(
             in_dim      = self.seq_len,
diff --git a/model/EXP/EXP22.py b/model/EXP/EXP22.py
index aff4072..cae6577 100644
--- a/model/EXP/EXP22.py
+++ b/model/EXP/EXP22.py
@@ -2,11 +2,10 @@ import torch
 import torch.nn as nn
 import torch.nn.functional as F
 
+"""
+添加空间嵌入
+"""
 
-"""
-使用多层感知机替换输入输出的 proj 层，
-并在 EXP 模型中添加显式的空间嵌入（Spatial Embedding）。
-"""
 
 class DynamicGraphConstructor(nn.Module):
     def __init__(self, node_num, embed_dim):
@@ -35,8 +34,8 @@ class GraphConvBlock(nn.Module):
     def forward(self, x, adj):
         # x: (B, N, F_in), adj: (N, N)
         res = x
-        x = torch.matmul(adj, x)      # 邻接乘特征
-        x = self.theta(x)             # 线性变换
+        x = torch.matmul(adj, x)  # 邻接乘特征
+        x = self.theta(x)  # 线性变换
         # 残差连接
         x = x + (res if self.residual else self.res_proj(res))
         return F.relu(x)
@@ -90,7 +89,7 @@ class MLP(nn.Module):
         dims = [in_dim] + hidden_dims + [out_dim]
         layers = []
         for i in range(len(dims) - 2):
-            layers += [nn.Linear(dims[i], dims[i+1]), activation()]
+            layers += [nn.Linear(dims[i], dims[i + 1]), activation()]
         layers += [nn.Linear(dims[-2], dims[-1])]
         self.net = nn.Sequential(*layers)
 
@@ -103,17 +102,18 @@ class EXP(nn.Module):
     def __init__(self, args):
         super().__init__()
         # 训练 & 输出参数
-        self.horizon    = args['horizon']
+        self.horizon = args['horizon']
         self.output_dim = args['output_dim']
-        self.seq_len    = args.get('in_len', 12)
+        self.seq_len = args.get('in_len', 12)
         self.hidden_dim = args.get('hidden_dim', 64)
-        self.num_nodes  = args['num_nodes']
-        self.embed_dim  = args.get('embed_dim', 16)
+        self.num_nodes = args['num_nodes']
+        self.embed_dim = args.get('embed_dim', 16)
 
         # ==== 时间嵌入 ====
-        self.time_slots     = args.get('time_slots', 24 * 60 // args.get('time_slot', 5))
+        self.time_slots = args.get('time_slots', 24 * 60 // args.get('time_slot', 5))
         self.time_embedding = nn.Embedding(self.time_slots, self.hidden_dim)
-        self.day_embedding  = nn.Embedding(7, self.hidden_dim)
+        self.day_embedding = nn.Embedding(7, self.hidden_dim)
+        self.node_emb = nn.Parameter(torch.empty(self.num_nodes, self.embed_dim))
 
         # ==== 空间嵌入 ====
         # 每个节点一个可学习的向量
@@ -124,9 +124,9 @@ class EXP(nn.Module):
 
         # 输入投影：仅对流量序列做 MLP
         self.input_proj = MLP(
-            in_dim      = self.seq_len,
-            hidden_dims = [self.hidden_dim],
-            out_dim     = self.hidden_dim
+            in_dim=self.seq_len,
+            hidden_dims=[self.hidden_dim],
+            out_dim=self.hidden_dim
         )
 
         # 两个 SandwichBlock
@@ -135,9 +135,9 @@ class EXP(nn.Module):
 
         # 输出投影
         self.out_proj = MLP(
-            in_dim      = self.hidden_dim,
-            hidden_dims = [2 * self.hidden_dim],
-            out_dim     = self.horizon * self.output_dim
+            in_dim=self.hidden_dim,
+            hidden_dims=[2 * self.hidden_dim],
+            out_dim=self.horizon * self.output_dim
         )
 
     def forward(self, x):
@@ -151,7 +151,7 @@ class EXP(nn.Module):
         # 拆分三条序列
         x_flow = x[..., 0]  # (B, T, N)
         x_time = x[..., 1]  # (B, T, N)
-        x_day  = x[..., 2]  # (B, T, N)
+        x_day = x[..., 2]  # (B, T, N)
 
         B, T, N = x_flow.shape
         assert T == self.seq_len, f"序列长度应为 {self.seq_len}，但收到 {T}"
@@ -162,14 +162,16 @@ class EXP(nn.Module):
 
         # 2) 计算离散时间嵌入
         t_idx = (x_time[:, -1, :] * (self.time_slots - 1)).long()  # (B, N)
-        d_idx = x_day[:,  -1, :].long()                           # (B, N)
-        time_emb = self.time_embedding(t_idx)                     # (B, N, hidden_dim)
-        day_emb  = self.day_embedding(d_idx)                      # (B, N, hidden_dim)
+        d_idx = x_day[:, -1, :].long()  # (B, N)
+        time_emb = self.time_embedding(t_idx)  # (B, N, hidden_dim)
+        day_emb = self.day_embedding(d_idx)  # (B, N, hidden_dim)
 
         # 3) 计算空间嵌入并扩展到 batch 大小
-        node_idx    = torch.arange(N, device=x.device)            # (N,)
-        spatial_emb = self.spatial_embedding[node_idx]            # (N, hidden_dim)
-        spatial_emb = spatial_emb.unsqueeze(0).expand(B, -1, -1)  # (B, N, hidden_dim)
+        # node_emb = []
+        # node_emb.append(self.node_emb.unsqueeze(0).expand(
+        #     B, -1, -1).transpose(1, 2).unsqueeze(-1))
+        # spatial_emb = torch.stack(node_emb)
+        spatial_emb = self.spatial_embedding.unsqueeze(0).expand(B, N, self.hidden_dim)  # -> (B, N, hidden_dim)
 
         # 4) 将三种嵌入相加到 h0
         h0 = h0 + time_emb + day_emb + spatial_emb
@@ -180,7 +182,7 @@ class EXP(nn.Module):
         h2 = self.sandwich2(h1)
 
         # 6) 输出投影 -> (B, horizon, N, output_dim)
-        out = self.out_proj(h2)                   # (B, N, horizon*out_dim)
+        out = self.out_proj(h2)  # (B, N, horizon*out_dim)
         out = out.view(B, N, self.horizon, self.output_dim)
-        out = out.permute(0, 2, 1, 3)             # (B, horizon, N, output_dim)
+        out = out.permute(0, 2, 1, 3)  # (B, horizon, N, output_dim)
         return out
diff --git a/model/EXP/EXP23.py b/model/EXP/EXP23.py
new file mode 100644
index 0000000..5133a86
--- /dev/null
+++ b/model/EXP/EXP23.py
@@ -0,0 +1,159 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+"""
+添加时间嵌入 + 基于可学习邻接矩阵的图构造
+"""
+
+class DynamicGraphConstructor(nn.Module):
+    def __init__(self, node_num):
+        super().__init__()
+        # 直接用一个 N×N 的可学习参数矩阵来表示邻接
+        self.adj_param = nn.Parameter(torch.randn(node_num, node_num), requires_grad=True)
+
+    def forward(self):
+        # 非线性截断，去除负边
+        adj = F.relu(self.adj_param)
+        # 行归一化
+        adj = F.softmax(adj, dim=-1)
+        return adj
+
+class GraphConvBlock(nn.Module):
+    def __init__(self, input_dim, output_dim):
+        super().__init__()
+        self.theta = nn.Linear(input_dim, output_dim)
+        self.residual = (input_dim == output_dim)
+        if not self.residual:
+            self.res_proj = nn.Linear(input_dim, output_dim)
+
+    def forward(self, x, adj):
+        # x: (B, N, C)
+        res = x
+        # 邻接乘特征
+        x = torch.matmul(adj, x)
+        x = self.theta(x)
+        x = x + (res if self.residual else self.res_proj(res))
+        return F.relu(x)
+
+class MANBA_Block(nn.Module):
+    def __init__(self, input_dim, hidden_dim):
+        super().__init__()
+        self.attn = nn.MultiheadAttention(embed_dim=input_dim, num_heads=4, batch_first=True)
+        self.ffn = nn.Sequential(
+            nn.Linear(input_dim, hidden_dim),
+            nn.ReLU(),
+            nn.Linear(hidden_dim, input_dim)
+        )
+        self.norm1 = nn.LayerNorm(input_dim)
+        self.norm2 = nn.LayerNorm(input_dim)
+
+    def forward(self, x):
+        # x: (B, N, C)
+        res = x
+        x_attn, _ = self.attn(x, x, x)
+        x = self.norm1(res + x_attn)
+        res2 = x
+        x_ffn = self.ffn(x)
+        x = self.norm2(res2 + x_ffn)
+        return x
+
+class SandwichBlock(nn.Module):
+    def __init__(self, num_nodes, hidden_dim):
+        super().__init__()
+        self.manba1 = MANBA_Block(hidden_dim, hidden_dim * 2)
+        self.graph_constructor = DynamicGraphConstructor(num_nodes)
+        self.gc = GraphConvBlock(hidden_dim, hidden_dim)
+        self.manba2 = MANBA_Block(hidden_dim, hidden_dim * 2)
+
+    def forward(self, h):
+        # h: (B, N, C)
+        h1 = self.manba1(h)
+        adj = self.graph_constructor()    # (N, N)
+        h2 = self.gc(h1, adj)
+        h3 = self.manba2(h2)
+        return h3
+
+class MLP(nn.Module):
+    def __init__(self, in_dim, hidden_dims, out_dim, activation=nn.ReLU):
+        super().__init__()
+        dims = [in_dim] + hidden_dims + [out_dim]
+        layers = []
+        for i in range(len(dims) - 2):
+            layers += [nn.Linear(dims[i], dims[i + 1]), activation()]
+        layers += [nn.Linear(dims[-2], dims[-1])]
+        self.net = nn.Sequential(*layers)
+
+    def forward(self, x):
+        return self.net(x)
+
+class EXP(nn.Module):
+    def __init__(self, args):
+        super().__init__()
+        self.horizon     = args['horizon']
+        self.output_dim  = args['output_dim']
+        self.seq_len     = args.get('in_len', 12)
+        self.hidden_dim  = args.get('hidden_dim', 64)
+        self.num_nodes   = args['num_nodes']
+
+        # ==== 离散时间嵌入 ====
+        self.time_slots = args.get('time_slots', 24 * 60 // args.get('time_slot', 5))
+        self.time_embedding = nn.Embedding(self.time_slots, self.hidden_dim)
+        self.day_embedding  = nn.Embedding(7, self.hidden_dim)
+
+        # 流量历史投影
+        self.input_proj = MLP(
+            in_dim      = self.seq_len,
+            hidden_dims = [self.hidden_dim],
+            out_dim     = self.hidden_dim
+        )
+
+        # 两个 SandwichBlock
+        self.sandwich1 = SandwichBlock(self.num_nodes, self.hidden_dim)
+        self.sandwich2 = SandwichBlock(self.num_nodes, self.hidden_dim)
+
+        # 输出投影
+        self.out_proj = MLP(
+            in_dim      = self.hidden_dim,
+            hidden_dims = [2 * self.hidden_dim],
+            out_dim     = self.horizon * self.output_dim
+        )
+
+    def forward(self, x):
+        """
+        x: (B, T, N, D_total)
+           D_total >= 3:
+             x[...,0] = flow,
+             x[...,1] = time_in_day (0…1),
+             x[...,2] = day_in_week (0…6)
+        """
+        x_flow = x[..., 0]  # (B, T, N)
+        x_time = x[..., 1]  # (B, T, N)
+        x_day  = x[..., 2]  # (B, T, N)
+
+        B, T, N = x_flow.shape
+        assert T == self.seq_len
+
+        # 1) 投影流量历史
+        x_flat = x_flow.permute(0, 2, 1).reshape(B * N, T)
+        h0 = self.input_proj(x_flat).view(B, N, self.hidden_dim)
+
+        # 2) 离散时间索引
+        t_idx = (x_time[:, -1, :,] * (self.time_slots - 1)).long()  # (B, N)
+        d_idx = x_day[:,  -1, :,].long()                            # (B, N)
+        time_emb = self.time_embedding(t_idx)
+        day_emb  = self.day_embedding(d_idx)
+
+        # 3) 注入时间嵌入
+        h0 = h0 + time_emb + day_emb
+
+        # 4) Sandwich + 残差
+        h1 = self.sandwich1(h0)
+        h1 = h1 + h0
+        h2 = self.sandwich2(h1)
+
+        # 5) 输出投影
+        out = self.out_proj(h2)  # (B, N, horizon*output_dim)
+        out = out.view(B, N, self.horizon, self.output_dim)
+        out = out.permute(0, 2, 1, 3)  # (B, horizon, N, output_dim)
+        return out
diff --git a/model/EXP/EXP24.py b/model/EXP/EXP24.py
new file mode 100644
index 0000000..8dc4b2d
--- /dev/null
+++ b/model/EXP/EXP24.py
@@ -0,0 +1,168 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+"""
+添加时间嵌入 + 三重残差
+"""
+
+class DynamicGraphConstructor(nn.Module):
+    def __init__(self, node_num, embed_dim):
+        super().__init__()
+        self.nodevec1 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
+        self.nodevec2 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
+
+    def forward(self):
+        adj = torch.matmul(self.nodevec1, self.nodevec2.T)
+        adj = F.relu(adj)
+        adj = F.softmax(adj, dim=-1)
+        return adj
+
+
+class GraphConvBlock(nn.Module):
+    def __init__(self, input_dim, output_dim):
+        super().__init__()
+        self.theta = nn.Linear(input_dim, output_dim)
+        self.residual = (input_dim == output_dim)
+        if not self.residual:
+            self.res_proj = nn.Linear(input_dim, output_dim)
+
+    def forward(self, x, adj):
+        res = x
+        x = torch.matmul(adj, x)
+        x = self.theta(x)
+        x = x + (res if self.residual else self.res_proj(res))
+        return F.relu(x)
+
+
+class MANBA_Block(nn.Module):
+    def __init__(self, input_dim, hidden_dim):
+        super().__init__()
+        self.attn = nn.MultiheadAttention(embed_dim=input_dim, num_heads=4, batch_first=True)
+        self.ffn = nn.Sequential(
+            nn.Linear(input_dim, hidden_dim),
+            nn.ReLU(),
+            nn.Linear(hidden_dim, input_dim)
+        )
+        self.norm1 = nn.LayerNorm(input_dim)
+        self.norm2 = nn.LayerNorm(input_dim)
+
+    def forward(self, x):
+        res = x
+        x_attn, _ = self.attn(x, x, x)
+        x = self.norm1(res + x_attn)
+        res2 = x
+        x_ffn = self.ffn(x)
+        x = self.norm2(res2 + x_ffn)
+        return x
+
+
+class SandwichBlock(nn.Module):
+    def __init__(self, num_nodes, embed_dim, hidden_dim):
+        super().__init__()
+        self.manba1 = MANBA_Block(hidden_dim, hidden_dim * 2)
+        self.graph_constructor = DynamicGraphConstructor(num_nodes, embed_dim)
+        self.gc = GraphConvBlock(hidden_dim, hidden_dim)
+        self.manba2 = MANBA_Block(hidden_dim, hidden_dim * 2)
+
+    def forward(self, h):
+        h1 = self.manba1(h)
+        adj = self.graph_constructor()
+        h2 = self.gc(h1, adj)
+        h3 = self.manba2(h2)
+        return h3  # 不在这里加残差，留给上层 EXP 统一处理
+
+
+class MLP(nn.Module):
+    def __init__(self, in_dim, hidden_dims, out_dim, activation=nn.ReLU):
+        super().__init__()
+        dims = [in_dim] + hidden_dims + [out_dim]
+        layers = []
+        for i in range(len(dims)-2):
+            layers += [nn.Linear(dims[i], dims[i+1]), activation()]
+        layers += [nn.Linear(dims[-2], dims[-1])]
+        self.net = nn.Sequential(*layers)
+
+    def forward(self, x):
+        return self.net(x)
+
+
+class EXP(nn.Module):
+    def __init__(self, args):
+        super().__init__()
+        self.horizon     = args['horizon']
+        self.output_dim  = args['output_dim']
+        self.seq_len     = args.get('in_len', 12)
+        self.hidden_dim  = args.get('hidden_dim', 64)
+        self.num_nodes   = args['num_nodes']
+        self.embed_dim   = args.get('embed_dim', 16)
+
+        # ==== 离散时间嵌入 ====
+        self.time_slots = args.get('time_slots', 24 * 60 // args.get('time_slot', 5))
+        self.time_embedding = nn.Embedding(self.time_slots, self.hidden_dim)
+        self.day_embedding  = nn.Embedding(7, self.hidden_dim)
+
+        # 流量历史投影
+        self.input_proj = MLP(
+            in_dim      = self.seq_len,
+            hidden_dims = [self.hidden_dim],
+            out_dim     = self.hidden_dim
+        )
+
+        # 两个 SandwichBlock
+        self.sandwich1 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
+        self.sandwich2 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
+
+        # 输出投影
+        self.out_proj = MLP(
+            in_dim      = self.hidden_dim,
+            hidden_dims = [2 * self.hidden_dim],
+            out_dim     = self.horizon * self.output_dim
+        )
+
+    def forward(self, x):
+        """
+        x: (B, T, N, D_total)
+           D_total >= 3:
+             x[...,0] = flow,
+             x[...,1] = time_in_day (0…1),
+             x[...,2] = day_in_week (0…6)
+        """
+        x_flow = x[..., 0]  # (B, T, N)
+        x_time = x[..., 1]  # (B, T, N)
+        x_day  = x[..., 2]  # (B, T, N)
+
+        B, T, N = x_flow.shape
+        assert T == self.seq_len
+
+        # 1) 投影流量历史
+        x_flat = x_flow.permute(0, 2, 1).reshape(B * N, T)
+        h0 = self.input_proj(x_flat).view(B, N, self.hidden_dim)
+
+        # 2) 离散时间索引
+        t_idx = (x_time[:, -1, :,] * (self.time_slots - 1)).long()  # (B, N)
+        d_idx = x_day[:,  -1, :,].long()                            # (B, N)
+        time_emb = self.time_embedding(t_idx)
+        day_emb  = self.day_embedding(d_idx)
+
+        # 3) 注入时间嵌入
+        h0 = h0 + time_emb + day_emb
+
+        # ==== 三重残差 ====
+        # 第一重：Sandwich1 + 残差
+        h1 = self.sandwich1(h0)
+        h1 = h1 + h0
+
+        # 第二重：Sandwich2 + 残差
+        h2 = self.sandwich2(h1)
+        h2 = h2 + h1
+
+        # 第三重：全局残差 (直接连接到最初 h0)
+        h3 = h2 + h0
+
+        # 5) 输出投影
+        out = self.out_proj(h3)                 # (B, N, horizon*output_dim)
+        out = out.view(B, N, self.horizon, self.output_dim)
+        out = out.permute(0, 2, 1, 3)           # (B, horizon, N, output_dim)
+        return out
diff --git a/model/EXP/EXP25.py b/model/EXP/EXP25.py
new file mode 100644
index 0000000..88ef7cb
--- /dev/null
+++ b/model/EXP/EXP25.py
@@ -0,0 +1,196 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class DynamicTanh(nn.Module):
+    """
+    Dynamic tanh activation with learnable scaling (alpha) and affine transformation (weight, bias).
+    """
+    def __init__(self, normalized_shape, channels_last=True, alpha_init_value=0.5):
+        super().__init__()
+        self.normalized_shape = normalized_shape
+        self.alpha_init_value = alpha_init_value
+        self.channels_last = channels_last
+
+        # learnable scale for tanh
+        self.alpha = nn.Parameter(torch.full((1,), alpha_init_value))
+        # affine parameters
+        self.weight = nn.Parameter(torch.ones(normalized_shape))
+        self.bias = nn.Parameter(torch.zeros(normalized_shape))
+
+    def forward(self, x):
+        # scaled tanh
+        x = torch.tanh(self.alpha * x)
+        # affine transform
+        if self.channels_last:
+            x = x * self.weight + self.bias
+        else:
+            # channels_first: assume shape (B, C, H, W)
+            x = x * self.weight[:, None, None] + self.bias[:, None, None]
+        return x
+
+    def extra_repr(self):
+        return f"normalized_shape={self.normalized_shape}, alpha_init_value={self.alpha_init_value}, channels_last={self.channels_last}"
+
+
+class DynamicGraphConstructor(nn.Module):
+    def __init__(self, node_num, embed_dim):
+        super().__init__()
+        self.nodevec1 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
+        self.nodevec2 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
+
+    def forward(self):
+        adj = torch.matmul(self.nodevec1, self.nodevec2.T)
+        adj = F.relu(adj)
+        adj = F.softmax(adj, dim=-1)
+        return adj
+
+
+class GraphConvBlock(nn.Module):
+    def __init__(self, input_dim, output_dim):
+        super().__init__()
+        self.theta = nn.Linear(input_dim, output_dim)
+        self.residual = (input_dim == output_dim)
+        if not self.residual:
+            self.res_proj = nn.Linear(input_dim, output_dim)
+
+    def forward(self, x, adj):
+        res = x
+        x = torch.matmul(adj, x)
+        x = self.theta(x)
+        x = x + (res if self.residual else self.res_proj(res))
+        return F.relu(x)
+
+
+class MANBA_Block(nn.Module):
+    """
+    Multi-head attention + feed-forward network with DynamicTanh replacing LayerNorm.
+    """
+    def __init__(self, input_dim, hidden_dim):
+        super().__init__()
+        self.attn = nn.MultiheadAttention(embed_dim=input_dim, num_heads=4, batch_first=True)
+        self.ffn = nn.Sequential(
+            nn.Linear(input_dim, hidden_dim),
+            nn.ReLU(),
+            nn.Linear(hidden_dim, input_dim)
+        )
+        # replace LayerNorm with DynamicTanh
+        self.norm1 = DynamicTanh(normalized_shape=input_dim, channels_last=True)
+        self.norm2 = DynamicTanh(normalized_shape=input_dim, channels_last=True)
+
+    def forward(self, x):
+        # self-attention
+        res = x
+        x_attn, _ = self.attn(x, x, x)
+        x = self.norm1(res + x_attn)
+        # feed-forward
+        res2 = x
+        x_ffn = self.ffn(x)
+        x = self.norm2(res2 + x_ffn)
+        return x
+
+
+class SandwichBlock(nn.Module):
+    def __init__(self, num_nodes, embed_dim, hidden_dim):
+        super().__init__()
+        self.manba1 = MANBA_Block(hidden_dim, hidden_dim * 2)
+        self.graph_constructor = DynamicGraphConstructor(num_nodes, embed_dim)
+        self.gc = GraphConvBlock(hidden_dim, hidden_dim)
+        self.manba2 = MANBA_Block(hidden_dim, hidden_dim * 2)
+
+    def forward(self, h):
+        h1 = self.manba1(h)
+        adj = self.graph_constructor()
+        h2 = self.gc(h1, adj)
+        h3 = self.manba2(h2)
+        return h3
+
+
+class MLP(nn.Module):
+    def __init__(self, in_dim, hidden_dims, out_dim, activation=nn.ReLU):
+        super().__init__()
+        dims = [in_dim] + hidden_dims + [out_dim]
+        layers = []
+        for i in range(len(dims) - 2):
+            layers.append(nn.Linear(dims[i], dims[i+1]))
+            layers.append(activation())
+        layers.append(nn.Linear(dims[-2], dims[-1]))
+        self.net = nn.Sequential(*layers)
+
+    def forward(self, x):
+        return self.net(x)
+
+
+class EXP(nn.Module):
+    def __init__(self, args):
+        super().__init__()
+        self.horizon     = args['horizon']
+        self.output_dim  = args['output_dim']
+        self.seq_len     = args.get('in_len', 12)
+        self.hidden_dim  = args.get('hidden_dim', 64)
+        self.num_nodes   = args['num_nodes']
+        self.embed_dim   = args.get('embed_dim', 16)
+
+        # discrete time embeddings
+        self.time_slots    = args.get('time_slots', 24 * 60 // args.get('time_slot', 5))
+        self.time_embedding = nn.Embedding(self.time_slots, self.hidden_dim)
+        self.day_embedding  = nn.Embedding(7, self.hidden_dim)
+
+        # input projection for flow history
+        self.input_proj = MLP(
+            in_dim      = self.seq_len,
+            hidden_dims = [self.hidden_dim],
+            out_dim     = self.hidden_dim
+        )
+
+        # two Sandwich blocks
+        self.sandwich1 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
+        self.sandwich2 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
+
+        # output projection
+        self.out_proj = MLP(
+            in_dim      = self.hidden_dim,
+            hidden_dims = [2 * self.hidden_dim],
+            out_dim     = self.horizon * self.output_dim
+        )
+
+    def forward(self, x):
+        """
+        x: (B, T, N, D_total) where
+           x[...,0]=flow, x[...,1]=time_in_day (scaled), x[...,2]=day_in_week
+        """
+        x_flow = x[..., 0]  # (B, T, N)
+        x_time = x[..., 1]  # (B, T, N)
+        x_day  = x[..., 2]  # (B, T, N)
+
+        B, T, N = x_flow.shape
+        assert T == self.seq_len, "Input sequence length mismatch"
+
+        # project flow history
+        x_flat = x_flow.permute(0, 2, 1).reshape(B * N, T)
+        h0 = self.input_proj(x_flat).view(B, N, self.hidden_dim)
+
+        # time embeddings at last step
+        t_idx = (x_time[:, -1, :] * (self.time_slots - 1)).long()
+        d_idx = x_day[:,  -1, :].long()
+        time_emb = self.time_embedding(t_idx)
+        day_emb  = self.day_embedding(d_idx)
+
+        # inject time features
+        h0 = h0 + time_emb + day_emb
+
+        # Sandwich + residuals
+        h1 = self.sandwich1(h0) + h0
+        h2 = self.sandwich2(h1)
+
+        # output
+        out = self.out_proj(h2)
+        out = out.view(B, N, self.horizon, self.output_dim)
+        out = out.permute(0, 2, 1, 3)
+        return out
+
+# Example usage:
+# args = {'horizon':12, 'output_dim':1, 'num_nodes':170}
+# model = EXP(args)
+# print(model)
diff --git a/model/EXP/EXP26.py b/model/EXP/EXP26.py
new file mode 100644
index 0000000..d55d9d3
--- /dev/null
+++ b/model/EXP/EXP26.py
@@ -0,0 +1,195 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+"""
+添加时间嵌入 + 引入图注意力网络（GAT）
+"""
+
+class DynamicGraphConstructor(nn.Module):
+    def __init__(self, node_num, embed_dim):
+        super().__init__()
+        self.nodevec1 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
+        self.nodevec2 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
+
+    def forward(self):
+        adj = torch.matmul(self.nodevec1, self.nodevec2.T)
+        adj = F.relu(adj)
+        adj = F.softmax(adj, dim=-1)
+        return adj
+
+
+# 原来的 GCN 块保留备用
+class GraphConvBlock(nn.Module):
+    def __init__(self, input_dim, output_dim):
+        super().__init__()
+        self.theta = nn.Linear(input_dim, output_dim)
+        self.residual = (input_dim == output_dim)
+        if not self.residual:
+            self.res_proj = nn.Linear(input_dim, output_dim)
+
+    def forward(self, x, adj):
+        res = x
+        x = torch.matmul(adj, x)
+        x = self.theta(x)
+        x = x + (res if self.residual else self.res_proj(res))
+        return F.relu(x)
+
+
+# ★★ GAT 部分：从 LeronQ/GCN_predict-Pytorch 改写而来 ★★
+class GraphAttentionLayer(nn.Module):
+    def __init__(self, in_c, out_c):
+        super().__init__()
+        self.W = nn.Linear(in_c, out_c, bias=False)
+        self.b = nn.Parameter(torch.Tensor(out_c))
+        nn.init.xavier_uniform_(self.W.weight)
+        nn.init.zeros_(self.b)
+
+    def forward(self, h, adj):
+        # h: [B, N, C_in], adj: [N, N]
+        Wh = self.W(h)  # [B, N, C_out]
+        # 计算注意力得分
+        score = torch.bmm(Wh, Wh.transpose(1, 2)) * adj.unsqueeze(0)  # [B, N, N]
+        score = score.masked_fill(score == 0, -1e16)
+        alpha = F.softmax(score, dim=-1)  # [B, N, N]
+        # 加权求和并加偏置
+        out = torch.bmm(alpha, Wh) + self.b  # [B, N, C_out]
+        return F.relu(out)
+
+class GraphAttentionBlock(nn.Module):
+    def __init__(self, input_dim, output_dim, n_heads=4):
+        super().__init__()
+        # 多头注意力
+        self.heads = nn.ModuleList([GraphAttentionLayer(input_dim, output_dim) for _ in range(n_heads)])
+        # 合并后再做一次线性映射
+        self.out_att = GraphAttentionLayer(output_dim * n_heads, output_dim)
+        self.act = nn.ReLU()
+
+    def forward(self, x, adj):
+        # x: [B, N, C], adj: [N, N]
+        # 并行多头，然后拼接
+        h_cat = torch.cat([head(x, adj) for head in self.heads], dim=-1)  # [B, N, output_dim * n_heads]
+        h_out = self.out_att(h_cat, adj)  # [B, N, output_dim]
+        return self.act(h_out)
+
+
+class MANBA_Block(nn.Module):
+    def __init__(self, input_dim, hidden_dim):
+        super().__init__()
+        self.attn = nn.MultiheadAttention(embed_dim=input_dim, num_heads=4, batch_first=True)
+        self.ffn = nn.Sequential(
+            nn.Linear(input_dim, hidden_dim),
+            nn.ReLU(),
+            nn.Linear(hidden_dim, input_dim)
+        )
+        self.norm1 = nn.LayerNorm(input_dim)
+        self.norm2 = nn.LayerNorm(input_dim)
+
+    def forward(self, x):
+        res = x
+        x_attn, _ = self.attn(x, x, x)
+        x = self.norm1(res + x_attn)
+        res2 = x
+        x_ffn = self.ffn(x)
+        x = self.norm2(res2 + x_ffn)
+        return x
+
+
+class SandwichBlock(nn.Module):
+    def __init__(self, num_nodes, embed_dim, hidden_dim):
+        super().__init__()
+        self.manba1 = MANBA_Block(hidden_dim, hidden_dim * 2)
+        self.graph_constructor = DynamicGraphConstructor(num_nodes, embed_dim)
+        # ★★ 替换为 GATBlock ★★
+        self.gc = GraphAttentionBlock(hidden_dim, hidden_dim, n_heads=4)
+        self.manba2 = MANBA_Block(hidden_dim, hidden_dim * 2)
+
+    def forward(self, h):
+        h1 = self.manba1(h)
+        adj = self.graph_constructor()
+        h2 = self.gc(h1, adj)
+        h3 = self.manba2(h2)
+        return h3
+
+
+class MLP(nn.Module):
+    def __init__(self, in_dim, hidden_dims, out_dim, activation=nn.ReLU):
+        super().__init__()
+        dims = [in_dim] + hidden_dims + [out_dim]
+        layers = []
+        for i in range(len(dims)-2):
+            layers += [nn.Linear(dims[i], dims[i+1]), activation()]
+        layers += [nn.Linear(dims[-2], dims[-1])]
+        self.net = nn.Sequential(*layers)
+
+    def forward(self, x):
+        return self.net(x)
+
+
+class EXP(nn.Module):
+    def __init__(self, args):
+        super().__init__()
+        self.horizon     = args['horizon']
+        self.output_dim  = args['output_dim']
+        self.seq_len     = args.get('in_len', 12)
+        self.hidden_dim  = args.get('hidden_dim', 64)
+        self.num_nodes   = args['num_nodes']
+        self.embed_dim   = args.get('embed_dim', 16)
+
+        # ==== 新增：离散时间嵌入 ====
+        self.time_slots = args.get('time_slots', 24 * 60 // args.get('time_slot', 5))
+        self.time_embedding = nn.Embedding(self.time_slots, self.hidden_dim)
+        self.day_embedding  = nn.Embedding(7, self.hidden_dim)
+
+        # 输入投影（仅 flow）
+        self.input_proj = MLP(
+            in_dim      = self.seq_len,
+            hidden_dims = [self.hidden_dim],
+            out_dim     = self.hidden_dim
+        )
+
+        # 两个 SandwichBlock（已替换为 GAT）
+        self.sandwich1 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
+        self.sandwich2 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
+
+        # 输出投影
+        self.out_proj = MLP(
+            in_dim      = self.hidden_dim,
+            hidden_dims = [2 * self.hidden_dim],
+            out_dim     = self.horizon * self.output_dim
+        )
+
+    def forward(self, x):
+        """
+        x: (B, T, N, D_total)
+           D_total >= 3, x[...,0]=flow, x[...,1]=time_in_day, x[...,2]=day_in_week
+        """
+        x_flow = x[..., 0]  # (B, T, N)
+        x_time = x[..., 1]  # (B, T, N)
+        x_day  = x[..., 2]  # (B, T, N)
+
+        B, T, N = x_flow.shape
+        assert T == self.seq_len
+
+        # 1) 投影流量历史
+        x_flat = x_flow.permute(0, 2, 1).reshape(B * N, T)
+        h0 = self.input_proj(x_flat).view(B, N, self.hidden_dim)
+
+        # 2) 取最后一步的时间索引并嵌入
+        t_idx = (x_time[:, -1, :,] * (self.time_slots - 1)).long()
+        d_idx = x_day[:,  -1, :,].long()
+        time_emb = self.time_embedding(t_idx)
+        day_emb  = self.day_embedding(d_idx)
+
+        # 3) 注入时间信息
+        h0 = h0 + time_emb + day_emb
+
+        # 4) Sandwich + 残差
+        h1 = self.sandwich1(h0)
+        h1 = h1 + h0
+        h2 = self.sandwich2(h1)
+
+        # 5) 输出
+        out = self.out_proj(h2)
+        out = out.view(B, N, self.horizon, self.output_dim).permute(0, 2, 1, 3)
+        return out
diff --git a/model/EXP/EXP27.py b/model/EXP/EXP27.py
new file mode 100644
index 0000000..075a222
--- /dev/null
+++ b/model/EXP/EXP27.py
@@ -0,0 +1,170 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class MANBA_Block(nn.Module):
+    def __init__(self, input_dim, hidden_dim):
+        super().__init__()
+        self.attn = nn.MultiheadAttention(embed_dim=input_dim, num_heads=4, batch_first=True)
+        self.ffn = nn.Sequential(
+            nn.Linear(input_dim, hidden_dim),
+            nn.ReLU(),
+            nn.Linear(hidden_dim, input_dim)
+        )
+        self.norm1 = nn.LayerNorm(input_dim)
+        self.norm2 = nn.LayerNorm(input_dim)
+
+    def forward(self, x):
+        # x: (B, N, input_dim)
+        res = x
+        x_attn, _ = self.attn(x, x, x)
+        x = self.norm1(res + x_attn)
+        res2 = x
+        x_ffn = self.ffn(x)
+        x = self.norm2(res2 + x_ffn)
+        return x
+
+
+class ExpertBlock(nn.Module):
+    """
+    Mixture-of-Experts block: routes each node's representation to a selected expert or a shared expert.
+    """
+    def __init__(self, hidden_dim, num_experts):
+        super().__init__()
+        self.num_experts = num_experts
+        # gating network projects to num_experts + 1 (extra shared expert)
+        self.gate = nn.Linear(hidden_dim, num_experts + 1)
+        # per-expert FFNs
+        self.experts = nn.ModuleList([
+            nn.Sequential(
+                nn.Linear(hidden_dim, hidden_dim * 2),
+                nn.ReLU(),
+                nn.Linear(hidden_dim * 2, hidden_dim)
+            ) for _ in range(num_experts)
+        ])
+        # shared expert
+        self.shared_expert = nn.Sequential(
+            nn.Linear(hidden_dim, hidden_dim * 2),
+            nn.ReLU(),
+            nn.Linear(hidden_dim * 2, hidden_dim)
+        )
+
+    def forward(self, x):
+        # x: (B, N, hidden_dim)
+        B, N, D = x.shape
+        # flatten to (B*N, D)
+        flat = x.view(B * N, D)
+        # compute gating scores and select expert per node
+        scores = F.softmax(self.gate(flat), dim=-1)  # (B*N, num_experts+1)
+        idx = scores.argmax(dim=-1)                  # (B*N,)
+
+        out_flat = torch.zeros_like(flat)
+        # apply each expert
+        for e in range(self.num_experts):
+            mask = (idx == e)
+            if mask.any():
+                out_flat[mask] = self.experts[e](flat[mask])
+        # apply shared expert for last index
+        shared_mask = (idx == self.num_experts)
+        if shared_mask.any():
+            out_flat[shared_mask] = self.shared_expert(flat[shared_mask])
+
+        # reshape back to (B, N, D)
+        return out_flat.view(B, N, D)
+
+
+class MLP(nn.Module):
+    def __init__(self, in_dim, hidden_dims, out_dim, activation=nn.ReLU):
+        super().__init__()
+        dims = [in_dim] + hidden_dims + [out_dim]
+        layers = []
+        for i in range(len(dims) - 2):
+            layers += [nn.Linear(dims[i], dims[i+1]), activation()]
+        layers += [nn.Linear(dims[-2], dims[-1])]
+        self.net = nn.Sequential(*layers)
+
+    def forward(self, x):
+        return self.net(x)
+
+
+class SandwichBlock(nn.Module):
+    def __init__(self, num_nodes, embed_dim, hidden_dim, num_experts):
+        super().__init__()
+        self.manba1 = MANBA_Block(hidden_dim, hidden_dim * 2)
+        self.expert_block = ExpertBlock(hidden_dim, num_experts)
+        self.manba2 = MANBA_Block(hidden_dim, hidden_dim * 2)
+
+    def forward(self, h):
+        h1 = self.manba1(h)
+        h2 = self.expert_block(h1)
+        h3 = self.manba2(h2)
+        return h3
+
+
+class EXP(nn.Module):
+    def __init__(self, args):
+        super().__init__()
+        self.horizon     = args['horizon']
+        self.output_dim  = args['output_dim']
+        self.seq_len     = args.get('in_len', 12)
+        self.hidden_dim  = args.get('hidden_dim', 64)
+        self.num_nodes   = args['num_nodes']
+        self.embed_dim   = args.get('embed_dim', 16)
+        self.num_experts = args.get('num_experts', 8)  # number of private experts
+
+        # discrete time embeddings
+        self.time_slots    = args.get('time_slots', 24 * 60 // args.get('time_slot', 5))
+        self.time_embedding = nn.Embedding(self.time_slots, self.hidden_dim)
+        self.day_embedding  = nn.Embedding(7, self.hidden_dim)
+
+        # input projection
+        self.input_proj = MLP(
+            in_dim      = self.seq_len,
+            hidden_dims = [self.hidden_dim],
+            out_dim     = self.hidden_dim
+        )
+
+        # two Sandwich blocks with MoE
+        self.sandwich1 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim, self.num_experts)
+        self.sandwich2 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim, self.num_experts)
+
+        # output projection
+        self.out_proj = MLP(
+            in_dim      = self.hidden_dim,
+            hidden_dims = [2 * self.hidden_dim],
+            out_dim     = self.horizon * self.output_dim
+        )
+
+    def forward(self, x):
+        """
+        x: (B, T, N, D_total)
+           x[...,0]= flow, x[...,1]=time_in_day, x[...,2]=day_in_week
+        """
+        x_flow = x[..., 0]
+        x_time = x[..., 1]
+        x_day  = x[..., 2]
+
+        B, T, N = x_flow.shape
+        assert T == self.seq_len
+
+        # project flow history
+        x_flat = x_flow.permute(0, 2, 1).reshape(B * N, T)
+        h0 = self.input_proj(x_flat).view(B, N, self.hidden_dim)
+
+        # time & day embeddings at last step
+        t_idx = (x_time[:, -1, :,] * (self.time_slots - 1)).long()
+        d_idx = x_day[:,  -1, :,].long()
+        time_emb = self.time_embedding(t_idx)
+        day_emb  = self.day_embedding(d_idx)
+        h0 = h0 + time_emb + day_emb
+
+        # two MoE Sandwich blocks + residuals
+        h1 = self.sandwich1(h0) + h0
+        h2 = self.sandwich2(h1) + h1
+
+        # output
+        out = self.out_proj(h2)
+        out = out.view(B, N, self.horizon, self.output_dim)
+        out = out.permute(0, 2, 1, 3)
+        return out
diff --git a/model/EXP/EXP8b.py b/model/EXP/EXP8b.py
new file mode 100644
index 0000000..49f28e7
--- /dev/null
+++ b/model/EXP/EXP8b.py
@@ -0,0 +1,133 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+"""
+含残差版本
+"""
+
+class DynamicGraphConstructor(nn.Module):
+    def __init__(self, node_num, embed_dim):
+        super().__init__()
+        self.nodevec1 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
+        self.nodevec2 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
+
+    def forward(self):
+        # (N, D) @ (D, N) -> (N, N)
+        adj = torch.matmul(self.nodevec1, self.nodevec2.T)
+        adj = F.relu(adj)
+        adj = F.softmax(adj, dim=-1)
+        return adj
+
+
+class GraphConvBlock(nn.Module):
+    def __init__(self, input_dim, output_dim):
+        super().__init__()
+        self.theta = nn.Linear(input_dim, output_dim)
+        self.residual = input_dim == output_dim
+        if not self.residual:
+            self.res_proj = nn.Linear(input_dim, output_dim)
+
+    def forward(self, x, adj):
+        # x: (B, N, C) / adj: (N, N)
+        res = x
+        x = torch.matmul(adj, x)  # (B, N, C)
+        x = self.theta(x)
+
+        # 残差连接
+        if self.residual:
+            x = x + res
+        else:
+            x = x + self.res_proj(res)
+
+        return F.relu(x)
+
+
+class MANBA_Block(nn.Module):
+    def __init__(self, input_dim, hidden_dim):
+        super().__init__()
+        self.attn = nn.MultiheadAttention(embed_dim=input_dim, num_heads=4, batch_first=True)
+        self.ffn = nn.Sequential(
+            nn.Linear(input_dim, hidden_dim),
+            nn.ReLU(),
+            nn.Linear(hidden_dim, input_dim)
+        )
+        self.norm1 = nn.LayerNorm(input_dim)
+        self.norm2 = nn.LayerNorm(input_dim)
+
+    def forward(self, x):
+        # x: (B, T, C)
+        res = x
+        x_attn, _ = self.attn(x, x, x)
+        x = self.norm1(res + x_attn)
+
+        res2 = x
+        x_ffn = self.ffn(x)
+        x = self.norm2(res2 + x_ffn)
+
+        return x
+
+
+class EXP(nn.Module):
+    def __init__(self, args):
+        super().__init__()
+        self.horizon = args['horizon']
+        self.output_dim = args['output_dim']
+        self.seq_len = args.get('in_len', 12)
+        self.hidden_dim = args.get('hidden_dim', 64)
+        self.num_nodes = args['num_nodes']
+
+        self.time_slots = args.get('time_slots', 24 * 60 // args.get('time_slot', 5))
+        self.time_embedding = nn.Embedding(self.time_slots, self.hidden_dim)
+        self.day_embedding  = nn.Embedding(7, self.hidden_dim)
+
+        # 动态图构建
+        self.graph = DynamicGraphConstructor(self.num_nodes, embed_dim=16)
+
+        # 输入映射层
+        self.input_proj = nn.Linear(self.seq_len, self.hidden_dim)
+
+        # 图卷积
+        self.gc = GraphConvBlock(self.hidden_dim, self.hidden_dim)
+
+        # MANBA block
+        self.manba = MANBA_Block(self.hidden_dim, self.hidden_dim * 2)
+
+        # 输出映射
+        self.out_proj = nn.Linear(self.hidden_dim, self.horizon * self.output_dim)
+
+    def forward(self, x):
+        # x: (B, T, N, D_total)
+        x_time = x[..., 1]  # (B, T, N)
+        x_day  = x[..., 2]  # (B, T, N)
+        x = x[..., 0]  # 只用主通道 (B, T, N)
+        B, T, N = x.shape
+        assert T == self.seq_len
+
+        # 输入投影 (B, T, N) -> (B, N, T) -> (B*N, T) -> (B*N, H)
+        x = x.permute(0, 2, 1).reshape(B * N, T)
+        h = self.input_proj(x)  # (B*N, hidden_dim)
+        h = h.view(B, N, self.hidden_dim)
+
+        t_idx = (x_time[:, -1, :,] * (self.time_slots - 1)).long()  # (B, N)
+        d_idx = x_day[:,  -1, :,].long()                            # (B, N)
+
+        time_emb = self.time_embedding(t_idx)   # (B, N, hidden_dim)
+        day_emb  = self.day_embedding(d_idx)    # (B, N, hidden_dim)
+
+        # 3) inject them into the initial hidden state
+        h = h + time_emb + day_emb
+
+        # 动态图构建
+        adj = self.graph()  # (N, N)
+
+        # 空间建模：图卷积
+        h = self.gc(h, adj)  # (B, N, hidden_dim)
+
+        # 时间建模：MANBA
+        h = self.manba(h)  # (B, N, hidden_dim)
+
+        # 输出映射
+        out = self.out_proj(h)  # (B, N, horizon * output_dim)
+        out = out.view(B, N, self.horizon, self.output_dim).permute(0, 2, 1, 3)
+        return out  # (B, horizon, N, output_dim)
diff --git a/model/STID/MLP.py b/model/STID/MLP.py
new file mode 100644
index 0000000..17fccbc
--- /dev/null
+++ b/model/STID/MLP.py
@@ -0,0 +1,29 @@
+import torch
+from torch import nn
+
+
+class MultiLayerPerceptron(nn.Module):
+    """Multi-Layer Perceptron with residual links."""
+
+    def __init__(self, input_dim, hidden_dim) -> None:
+        super().__init__()
+        self.fc1 = nn.Conv2d(
+            in_channels=input_dim,  out_channels=hidden_dim, kernel_size=(1, 1), bias=True)
+        self.fc2 = nn.Conv2d(
+            in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=(1, 1), bias=True)
+        self.act = nn.ReLU()
+        self.drop = nn.Dropout(p=0.15)
+
+    def forward(self, input_data: torch.Tensor) -> torch.Tensor:
+        """Feed forward of MLP.
+
+        Args:
+            input_data (torch.Tensor): input data with shape [B, D, N]
+
+        Returns:
+            torch.Tensor: latent repr
+        """
+
+        hidden = self.fc2(self.drop(self.act(self.fc1(input_data))))      # MLP
+        hidden = hidden + input_data                           # residual
+        return hidden
diff --git a/model/STID/STID.py b/model/STID/STID.py
new file mode 100644
index 0000000..2bbe8a7
--- /dev/null
+++ b/model/STID/STID.py
@@ -0,0 +1,117 @@
+import torch
+from torch import nn
+
+from model.STID.MLP import MultiLayerPerceptron
+
+
+class STID(nn.Module):
+    """
+    Paper: Spatial-Temporal Identity: A Simple yet Effective Baseline for Multivariate Time Series Forecasting
+    Link: https://arxiv.org/abs/2208.05233
+    Official Code: https://github.com/zezhishao/STID
+    """
+
+    def __init__(self, model_args):
+        super().__init__()
+        # attributes
+        self.num_nodes = model_args["num_nodes"]
+        self.node_dim = model_args["node_dim"]
+        self.input_len = model_args["input_len"]
+        self.input_dim = model_args["input_dim"]
+        self.embed_dim = model_args["embed_dim"]
+        self.output_len = model_args["output_len"]
+        self.num_layer = model_args["num_layer"]
+        self.temp_dim_tid = model_args["temp_dim_tid"]
+        self.temp_dim_diw = model_args["temp_dim_diw"]
+        self.time_of_day_size = model_args["time_of_day_size"]
+        self.day_of_week_size = model_args["day_of_week_size"]
+
+        self.if_time_in_day = model_args["if_T_i_D"]
+        self.if_day_in_week = model_args["if_D_i_W"]
+        self.if_spatial = model_args["if_node"]
+
+        # spatial embeddings
+        if self.if_spatial:
+            self.node_emb = nn.Parameter(torch.empty(self.num_nodes, self.node_dim))
+            nn.init.xavier_uniform_(self.node_emb)
+        # temporal embeddings
+        if self.if_time_in_day:
+            self.time_in_day_emb = nn.Parameter(
+                torch.empty(self.time_of_day_size, self.temp_dim_tid))
+            nn.init.xavier_uniform_(self.time_in_day_emb)
+        if self.if_day_in_week:
+            self.day_in_week_emb = nn.Parameter(
+                torch.empty(self.day_of_week_size, self.temp_dim_diw))
+            nn.init.xavier_uniform_(self.day_in_week_emb)
+
+        # embedding layer
+        self.time_series_emb_layer = nn.Conv2d(
+            in_channels=self.input_dim * self.input_len, out_channels=self.embed_dim, kernel_size=(1, 1), bias=True)
+
+        # encoding
+        self.hidden_dim = self.embed_dim+self.node_dim * \
+            int(self.if_spatial)+self.temp_dim_tid*int(self.if_day_in_week) + \
+            self.temp_dim_diw*int(self.if_time_in_day)
+        self.encoder = nn.Sequential(
+            *[MultiLayerPerceptron(self.hidden_dim, self.hidden_dim) for _ in range(self.num_layer)])
+
+        # regression
+        self.regression_layer = nn.Conv2d(
+            in_channels=self.hidden_dim, out_channels=self.output_len, kernel_size=(1, 1), bias=True)
+
+    def forward(self, history_data: torch.Tensor) -> torch.Tensor:
+        """Feed forward of STID.
+
+        Args:
+            history_data (torch.Tensor): history data with shape [B, L, N, C]
+
+        Returns:
+            torch.Tensor: prediction with shape [B, L, N, C]
+        """
+
+        # prepare data
+        input_data = history_data[..., range(self.input_dim)]
+        # input_data = history_data[..., 0:1]
+
+        if self.if_time_in_day:
+            t_i_d_data = history_data[..., 1]
+            # In the datasets used in STID, the time_of_day feature is normalized to [0, 1]. We multiply it by 288 to get the index.
+            # If you use other datasets, you may need to change this line.
+            time_in_day_emb = self.time_in_day_emb[(t_i_d_data[:, -1, :] * self.time_of_day_size).type(torch.LongTensor)]
+        else:
+            time_in_day_emb = None
+        if self.if_day_in_week:
+            d_i_w_data = history_data[..., 2]
+            day_in_week_emb = self.day_in_week_emb[(d_i_w_data[:, -1, :] * self.day_of_week_size).type(torch.LongTensor)]
+        else:
+            day_in_week_emb = None
+
+        # time series embedding
+        batch_size, _, num_nodes, _ = input_data.shape
+        input_data = input_data.transpose(1, 2).contiguous()
+        input_data = input_data.view(
+            batch_size, num_nodes, -1).transpose(1, 2).unsqueeze(-1)
+        time_series_emb = self.time_series_emb_layer(input_data)
+
+        node_emb = []
+        if self.if_spatial:
+            # expand node embeddings
+            node_emb.append(self.node_emb.unsqueeze(0).expand(
+                batch_size, -1, -1).transpose(1, 2).unsqueeze(-1))
+        # temporal embeddings
+        tem_emb = []
+        if time_in_day_emb is not None:
+            tem_emb.append(time_in_day_emb.transpose(1, 2).unsqueeze(-1))
+        if day_in_week_emb is not None:
+            tem_emb.append(day_in_week_emb.transpose(1, 2).unsqueeze(-1))
+
+        # concate all embeddings
+        hidden = torch.cat([time_series_emb] + node_emb + tem_emb, dim=1)
+
+        # encoding
+        hidden = self.encoder(hidden)
+
+        # regression
+        prediction = self.regression_layer(hidden)
+
+        return prediction
diff --git a/model/model_selector.py b/model/model_selector.py
index 2c7992b..7df6e32 100644
--- a/model/model_selector.py
+++ b/model/model_selector.py
@@ -13,7 +13,8 @@ from model.STFGNN.STFGNN import STFGNN
 from model.STSGCN.STSGCN import STSGCN
 from model.STGODE.STGODE import ODEGCN
 from model.PDG2SEQ.PDG2Seq import PDG2Seq
-from model.EXP.EXP21 import EXP as EXP
+from model.STID.STID import STID
+from model.EXP.EXP26 import EXP as EXP
 
 def model_selector(model):
     match model['type']:
@@ -32,5 +33,6 @@ def model_selector(model):
         case 'STSGCN': return STSGCN(model)
         case 'STGODE': return ODEGCN(model)
         case 'PDG2SEQ': return PDG2Seq(model)
+        case 'STID': return STID(model)
         case 'EXP': return EXP(model)