39 changed files with 29145 additions and 106 deletions
--- a/baseline.ipynb
+++ b/baseline.ipynb
--- a/baseline1.ipynb
+++ b/baseline1.ipynb
--- a/dataloader/DCRNNdataloader.py
+++ b/dataloader/DCRNNdataloader.py
@ -1,4 +1,4 @@
-from utils.normalization import normalize_dataset
+from lib.normalization import normalize_dataset

 import numpy as np
 import gc
--- a/dataloader/EXPdataloader.py
+++ b/dataloader/EXPdataloader.py
@ -1,7 +1,10 @@
 import numpy as np
+import gc
+import os
 import torch
-from utils.normalization import normalize_dataset
-from dataloader.data_selector import load_st_dataset
+import h5py
+from lib.normalization import normalize_dataset
+

 def get_dataloader(args, normalizer="std", single=True):
    # args should now include 'cycle'
@ -118,6 +121,70 @@ def data_loader_with_cycle(X, Y, C, batch_size, shuffle=True, drop_last=True):
    return loader


+# Rest of the helper functions (load_st_dataset, split_data..., add_window_x/y) unchanged
+
+
+def load_st_dataset(dataset, sample):
+    # output B, N, D
+    match dataset:
+        case "PEMSD3":
+            data_path = os.path.join("./data/PEMS03/PEMS03.npz")
+            data = np.load(data_path)["data"][
+                :, :, 0
+            ]  # only the first dimension, traffic flow data
+        case "PEMSD4":
+            data_path = os.path.join("./data/PEMS04/PEMS04.npz")
+            data = np.load(data_path)["data"][
+                :, :, 0
+            ]  # only the first dimension, traffic flow data
+        case "PEMSD7":
+            data_path = os.path.join("./data/PEMS07/PEMS07.npz")
+            data = np.load(data_path)["data"][
+                :, :, 0
+            ]  # only the first dimension, traffic flow data
+        case "PEMSD8":
+            data_path = os.path.join("./data/PEMS08/PEMS08.npz")
+            data = np.load(data_path)["data"][
+                :, :, 0
+            ]  # only the first dimension, traffic flow data
+        case "PEMSD7(L)":
+            data_path = os.path.join("./data/PEMS07(L)/PEMS07L.npz")
+            data = np.load(data_path)["data"][
+                :, :, 0
+            ]  # only the first dimension, traffic flow data
+        case "PEMSD7(M)":
+            data_path = os.path.join("./data/PEMS07(M)/V_228.csv")
+            data = np.genfromtxt(
+                data_path, delimiter=","
+            )  # Read CSV directly with numpy
+        case "METR-LA":
+            data_path = os.path.join("./data/METR-LA/METR.h5")
+            with h5py.File(
+                data_path, "r"
+            ) as f:  # Use h5py to handle HDF5 files without pandas
+                data = np.array(f["data"])
+        case "BJ":
+            data_path = os.path.join("./data/BJ/BJ500.csv")
+            data = np.genfromtxt(
+                data_path, delimiter=",", skip_header=1
+            )  # Skip header if present
+        case "Hainan":
+            data_path = os.path.join("./data/Hainan/Hainan.npz")
+            data = np.load(data_path)["data"][:, :, 0]
+        case "SD":
+            data_path = os.path.join("./data/SD/data.npz")
+            data = np.load(data_path)["data"][:, :, 0].astype(np.float32)
+        case _:
+            raise ValueError(f"Unsupported dataset: {dataset}")
+
+    # Ensure data shape compatibility
+    if len(data.shape) == 2:
+        data = np.expand_dims(data, axis=-1)
+
+    print("加载 %s 数据集中... " % dataset)
+    return data[::sample]
+
+
 def split_data_by_days(data, val_days, test_days, interval=30):
    t = int((24 * 60) / interval)
    test_data = data[-t * int(test_days) :]
--- a/dataloader/PeMSDdataloader.py
+++ b/dataloader/PeMSDdataloader.py
@ -1,10 +1,10 @@
-from utils.normalization import normalize_dataset
-from dataloader.data_selector import load_st_dataset
+from lib.normalization import normalize_dataset

 import numpy as np
 import gc
+import os
 import torch
-
+import h5py


 def get_dataloader(args, normalizer="std", single=True):
@ -113,6 +113,73 @@ def get_dataloader(args, normalizer="std", single=True):
    return train_dataloader, val_dataloader, test_dataloader, scaler


+def load_st_dataset(config):
+    dataset = config["basic"]["dataset"]
+    sample = config["data"]["sample"]
+    # output B, N, D
+    match dataset:
+        case "PEMS-BAY":
+            data_path = os.path.join("./data/PEMS-BAY/pems-bay.h5")
+            with h5py.File(data_path, 'r') as f:
+                data = f['speed']['block0_values'][:]
+        case "PEMSD3":
+            data_path = os.path.join("./data/PEMS03/PEMS03.npz")
+            data = np.load(data_path)["data"][
+                :, :, 0
+            ]  # only the first dimension, traffic flow data
+        case "PEMSD4":
+            data_path = os.path.join("./data/PEMS04/PEMS04.npz")
+            data = np.load(data_path)["data"][
+                :, :, 0
+            ]  # only the first dimension, traffic flow data
+        case "PEMSD7":
+            data_path = os.path.join("./data/PEMS07/PEMS07.npz")
+            data = np.load(data_path)["data"][
+                :, :, 0
+            ]  # only the first dimension, traffic flow data
+        case "PEMSD8":
+            data_path = os.path.join("./data/PEMS08/PEMS08.npz")
+            data = np.load(data_path)["data"][
+                :, :, 0
+            ]  # only the first dimension, traffic flow data
+        case "PEMSD7(L)":
+            data_path = os.path.join("./data/PEMS07(L)/PEMS07L.npz")
+            data = np.load(data_path)["data"][
+                :, :, 0
+            ]  # only the first dimension, traffic flow data
+        case "PEMSD7(M)":
+            data_path = os.path.join("./data/PEMS07(M)/V_228.csv")
+            data = np.genfromtxt(
+                data_path, delimiter=","
+            )  # Read CSV directly with numpy
+        case "METR-LA":
+            data_path = os.path.join("./data/METR-LA/METR.h5")
+            with h5py.File(
+                data_path, "r"
+            ) as f:  # Use h5py to handle HDF5 files without pandas
+                data = np.array(f["data"])
+        case "BJ":
+            data_path = os.path.join("./data/BJ/BJ500.csv")
+            data = np.genfromtxt(
+                data_path, delimiter=",", skip_header=1
+            )  # Skip header if present
+        case "Hainan":
+            data_path = os.path.join("./data/Hainan/Hainan.npz")
+            data = np.load(data_path)["data"][:, :, 0]
+        case "SD":
+            data_path = os.path.join("./data/SD/data.npz")
+            data = np.load(data_path)["data"][:, :, 0].astype(np.float32)
+        case _:
+            raise ValueError(f"Unsupported dataset: {dataset}")
+
+    # Ensure data shape compatibility
+    if len(data.shape) == 2:
+        data = np.expand_dims(data, axis=-1)
+
+    print("加载 %s 数据集中... " % dataset)
+    return data[::sample]
+
+
 def split_data_by_days(data, val_days, test_days, interval=30):
    t = int((24 * 60) / interval)
    test_data = data[-t * int(test_days) :]
--- a/dataloader/PeMSDdataloader_old.py
+++ b/dataloader/PeMSDdataloader_old.py
@ -0,0 +1,253 @@
+from lib.normalization import normalize_dataset
+
+import numpy as np
+import gc
+import os
+import torch
+import h5py
+
+
+def get_dataloader(args, normalizer="std", single=True):
+    data = load_st_dataset(args["type"])  # 加载数据
+    L, N, F = data.shape  # 数据形状
+
+    # Step 1: data -> x,y
+    x = add_window_x(data, args["lag"], args["horizon"], single)
+    y = add_window_y(data, args["lag"], args["horizon"], single)
+
+    del data
+    gc.collect()
+
+    # Step 2: time_in_day, day_in_week -> day, week
+    time_in_day = [i % args["steps_per_day"] / args["steps_per_day"] for i in range(L)]
+    time_in_day = np.tile(np.array(time_in_day), [1, N, 1]).transpose((2, 1, 0))
+    day_in_week = [
+        (i // args["steps_per_day"]) % args["days_per_week"] for i in range(L)
+    ]
+    day_in_week = np.tile(np.array(day_in_week), [1, N, 1]).transpose((2, 1, 0))
+
+    x_day = add_window_x(time_in_day, args["lag"], args["horizon"], single)
+    x_week = add_window_x(day_in_week, args["lag"], args["horizon"], single)
+
+    # Step 3 day, week, x, y --> x, y
+    x = np.concatenate([x, x_day, x_week], axis=-1)
+
+    del x_day, x_week
+    gc.collect()
+
+    # Step 4 x,y --> x_train, x_val, x_test, y_train, y_val, y_test
+    if args["test_ratio"] > 1:
+        x_train, x_val, x_test = split_data_by_days(
+            x, args["val_ratio"], args["test_ratio"]
+        )
+    else:
+        x_train, x_val, x_test = split_data_by_ratio(
+            x, args["val_ratio"], args["test_ratio"]
+        )
+
+    del x
+    gc.collect()
+
+    # Normalization
+    scaler = normalize_dataset(
+        x_train[..., : args["input_dim"]], normalizer, args["column_wise"]
+    )
+    x_train[..., : args["input_dim"]] = scaler.transform(
+        x_train[..., : args["input_dim"]]
+    )
+    x_val[..., : args["input_dim"]] = scaler.transform(x_val[..., : args["input_dim"]])
+    x_test[..., : args["input_dim"]] = scaler.transform(
+        x_test[..., : args["input_dim"]]
+    )
+
+    y_day = add_window_y(time_in_day, args["lag"], args["horizon"], single)
+    y_week = add_window_y(day_in_week, args["lag"], args["horizon"], single)
+
+    del time_in_day, day_in_week
+    gc.collect()
+
+    y = np.concatenate([y, y_day, y_week], axis=-1)
+
+    del y_day, y_week
+    gc.collect()
+
+    # Split Y
+    if args["test_ratio"] > 1:
+        y_train, y_val, y_test = split_data_by_days(
+            y, args["val_ratio"], args["test_ratio"]
+        )
+    else:
+        y_train, y_val, y_test = split_data_by_ratio(
+            y, args["val_ratio"], args["test_ratio"]
+        )
+
+    del y
+    gc.collect()
+
+    # Step 5: x_train y_train x_val y_val x_test y_test --> train val test
+    # train_dataloader = data_loader(x_train[..., :args['input_dim']], y_train[..., :args['input_dim']], args['batch_size'], shuffle=True, drop_last=True)
+    train_dataloader = data_loader(
+        x_train, y_train, args["batch_size"], shuffle=True, drop_last=True
+    )
+
+    del x_train, y_train
+    gc.collect()
+
+    # val_dataloader = data_loader(x_val[..., :args['input_dim']], y_val[..., :args['input_dim']], args['batch_size'], shuffle=False, drop_last=True)
+    val_dataloader = data_loader(
+        x_val, y_val, args["batch_size"], shuffle=False, drop_last=True
+    )
+
+    del x_val, y_val
+    gc.collect()
+
+    # test_dataloader = data_loader(x_test[..., :args['input_dim']], y_test[..., :args['input_dim']], args['batch_size'], shuffle=False, drop_last=False)
+    test_dataloader = data_loader(
+        x_test, y_test, args["batch_size"], shuffle=False, drop_last=False
+    )
+
+    del x_test, y_test
+    gc.collect()
+
+    return train_dataloader, val_dataloader, test_dataloader, scaler
+
+
+def load_st_dataset(dataset):
+    # output B, N, D
+    match dataset:
+        case "PEMSD3":
+            data_path = os.path.join("./data/PEMS03/PEMS03.npz")
+            data = np.load(data_path)["data"][
+                :, :, 0
+            ]  # only the first dimension, traffic flow data
+        case "PEMSD4":
+            data_path = os.path.join("./data/PEMS04/PEMS04.npz")
+            data = np.load(data_path)["data"][
+                :, :, 0
+            ]  # only the first dimension, traffic flow data
+        case "PEMSD7":
+            data_path = os.path.join("./data/PEMS07/PEMS07.npz")
+            data = np.load(data_path)["data"][
+                :, :, 0
+            ]  # only the first dimension, traffic flow data
+        case "PEMSD8":
+            data_path = os.path.join("./data/PEMS08/PEMS08.npz")
+            data = np.load(data_path)["data"][
+                :, :, 0
+            ]  # only the first dimension, traffic flow data
+        case "PEMSD7(L)":
+            data_path = os.path.join("./data/PEMS07(L)/PEMS07L.npz")
+            data = np.load(data_path)["data"][
+                :, :, 0
+            ]  # only the first dimension, traffic flow data
+        case "PEMSD7(M)":
+            data_path = os.path.join("./data/PEMS07(M)/V_228.csv")
+            data = np.genfromtxt(
+                data_path, delimiter=","
+            )  # Read CSV directly with numpy
+        case "METR-LA":
+            data_path = os.path.join("./data/METR-LA/METR.h5")
+            with h5py.File(
+                data_path, "r"
+            ) as f:  # Use h5py to handle HDF5 files without pandas
+                data = np.array(f["data"])
+        case "BJ":
+            data_path = os.path.join("./data/BJ/BJ500.csv")
+            data = np.genfromtxt(
+                data_path, delimiter=",", skip_header=1
+            )  # Skip header if present
+        case "Hainan":
+            data_path = os.path.join("./data/Hainan/Hainan.npz")
+            data = np.load(data_path)["data"][:, :, 0]
+        case _:
+            raise ValueError(f"Unsupported dataset: {dataset}")
+
+    # Ensure data shape compatibility
+    if len(data.shape) == 2:
+        data = np.expand_dims(data, axis=-1)
+
+    print(
+        "Load %s Dataset shaped: " % dataset,
+        data.shape,
+        data.max(),
+        data.min(),
+        data.mean(),
+        np.median(data),
+    )
+    return data
+
+
+def split_data_by_days(data, val_days, test_days, interval=30):
+    t = int((24 * 60) / interval)
+    test_data = data[-t * int(test_days) :]
+    val_data = data[-t * int(test_days + val_days) : -t * int(test_days)]
+    train_data = data[: -t * int(test_days + val_days)]
+    return train_data, val_data, test_data
+
+
+def split_data_by_ratio(data, val_ratio, test_ratio):
+    data_len = data.shape[0]
+    test_data = data[-int(data_len * test_ratio) :]
+    val_data = data[
+        -int(data_len * (test_ratio + val_ratio)) : -int(data_len * test_ratio)
+    ]
+    train_data = data[: -int(data_len * (test_ratio + val_ratio))]
+    return train_data, val_data, test_data
+
+
+def data_loader(X, Y, batch_size, shuffle=True, drop_last=True):
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    X = torch.tensor(X, dtype=torch.float32, device=device)
+    Y = torch.tensor(Y, dtype=torch.float32, device=device)
+    data = torch.utils.data.TensorDataset(X, Y)
+    dataloader = torch.utils.data.DataLoader(
+        data, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last
+    )
+    return dataloader
+
+
+def add_window_x(data, window=3, horizon=1, single=False):
+    """
+    Generate windowed X values from the input data.
+
+    :param data: Input data, shape [B, ...]
+    :param window: Size of the sliding window
+    :param horizon: Horizon size
+    :param single: If True, generate single-step windows, else multi-step
+    :return: X with shape [B, W, ...]
+    """
+    length = len(data)
+    end_index = length - horizon - window + 1
+    x = []  # Sliding windows
+    index = 0
+
+    while index < end_index:
+        x.append(data[index : index + window])
+        index += 1
+
+    return np.array(x)
+
+
+def add_window_y(data, window=3, horizon=1, single=False):
+    """
+    Generate windowed Y values from the input data.
+
+    :param data: Input data, shape [B, ...]
+    :param window: Size of the sliding window
+    :param horizon: Horizon size
+    :param single: If True, generate single-step windows, else multi-step
+    :return: Y with shape [B, H, ...]
+    """
+    length = len(data)
+    end_index = length - horizon - window + 1
+    y = []  # Horizon values
+    index = 0
+
+    while index < end_index:
+        if single:
+            y.append(data[index + window + horizon - 1 : index + window + horizon])
+        else:
+            y.append(data[index + window : index + window + horizon])
+        index += 1
+
+    return np.array(y)
--- a/dataloader/cde_loader/cdeDataloader.py
+++ b/dataloader/cde_loader/cdeDataloader.py
@ -1,4 +1,4 @@
-from utils.normalization import normalize_dataset
+from lib.normalization import normalize_dataset
 import numpy as np
 import gc
 import os
--- a/dataloader/data_selector.py
+++ b/dataloader/data_selector.py
@ -1,69 +0,0 @@
-import os
-import numpy as np
-import h5py
-
-def load_st_dataset(config):
-    dataset = config["basic"]["dataset"]
-    sample = config["data"]["sample"]
-    # output B, N, D
-    match dataset:
-        case "PEMS-BAY":
-            data_path = os.path.join("./data/PEMS-BAY/pems-bay.h5")
-            with h5py.File(data_path, 'r') as f:
-                data = f['speed']['block0_values'][:]
-        case "PEMSD3":
-            data_path = os.path.join("./data/PEMS03/PEMS03.npz")
-            data = np.load(data_path)["data"][
-                :, :, 0
-            ]
-        case "PEMSD4":
-            data_path = os.path.join("./data/PEMS04/PEMS04.npz")
-            data = np.load(data_path)["data"][
-                :, :, 0
-            ]
-        case "PEMSD7":
-            data_path = os.path.join("./data/PEMS07/PEMS07.npz")
-            data = np.load(data_path)["data"][
-                :, :, 0
-            ]
-        case "PEMSD8":
-            data_path = os.path.join("./data/PEMS08/PEMS08.npz")
-            data = np.load(data_path)["data"][
-                :, :, 0
-            ]
-        case "PEMSD7(L)":
-            data_path = os.path.join("./data/PEMS07(L)/PEMS07L.npz")
-            data = np.load(data_path)["data"][
-                :, :, 0
-            ]
-        case "PEMSD7(M)":
-            data_path = os.path.join("./data/PEMS07(M)/V_228.csv")
-            data = np.genfromtxt(
-                data_path, delimiter=","
-            )
-        case "METR-LA":
-            data_path = os.path.join("./data/METR-LA/METR.h5")
-            with h5py.File(
-                data_path, "r"
-            ) as f:
-                data = np.array(f["data"])
-        case "BJ":
-            data_path = os.path.join("./data/BJ/BJ500.csv")
-            data = np.genfromtxt(
-                data_path, delimiter=",", skip_header=1
-            ) 
-        case "Hainan":
-            data_path = os.path.join("./data/Hainan/Hainan.npz")
-            data = np.load(data_path)["data"][:, :, 0]
-        case "SD":
-            data_path = os.path.join("./data/SD/data.npz")
-            data = np.load(data_path)["data"][:, :, 0].astype(np.float32)
-        case _:
-            raise ValueError(f"Unsupported dataset: {dataset}")
-
-    # Ensure data shape compatibility
-    if len(data.shape) == 2:
-        data = np.expand_dims(data, axis=-1)
-
-    print("加载 %s 数据集中... " % dataset)
-    return data[::sample]
--- a/utils/Download_data.py
+++ b/utils/Download_data.py
--- a/lib/LargeST.py
+++ b/lib/LargeST.py
@ -0,0 +1,271 @@
+import pickle
+import torch
+import numpy as np
+import os
+import gc
+# ! X shape: (B, T, N, C)
+
+
+def load_pkl(pickle_file: str) -> object:
+    """
+    Load data from a pickle file.
+
+    Args:
+        pickle_file (str): Path to the pickle file.
+
+    Returns:
+        object: Loaded object from the pickle file.
+    """
+
+    try:
+        with open(pickle_file, "rb") as f:
+            pickle_data = pickle.load(f)
+    except UnicodeDecodeError:
+        with open(pickle_file, "rb") as f:
+            pickle_data = pickle.load(f, encoding="latin1")
+    except Exception as e:
+        print(f"Unable to load data from {pickle_file}: {e}")
+        raise
+    return pickle_data
+
+
+def get_dataloaders_from_index_data(
+    data_dir, tod=False, dow=False, batch_size=64, log=None, train_size=0.6
+):
+    data = np.load(os.path.join(data_dir, "data.npz"))["data"].astype(np.float32)
+
+    features = [0]
+    if tod:
+        features.append(1)
+    if dow:
+        features.append(2)
+    # if dom:
+    #     features.append(3)
+    data = data[..., features]
+
+    index = np.load(os.path.join(data_dir, "index.npz"))
+
+    train_index = index["train"]  # (num_samples, 3)
+    val_index = index["val"]
+    test_index = index["test"]
+
+    x_train_index = vrange(train_index[:, 0], train_index[:, 1])
+    y_train_index = vrange(train_index[:, 1], train_index[:, 2])
+    x_val_index = vrange(val_index[:, 0], val_index[:, 1])
+    y_val_index = vrange(val_index[:, 1], val_index[:, 2])
+    x_test_index = vrange(test_index[:, 0], test_index[:, 1])
+    y_test_index = vrange(test_index[:, 1], test_index[:, 2])
+
+    x_train = data[x_train_index]
+    y_train = data[y_train_index][..., :1]
+    x_val = data[x_val_index]
+    y_val = data[y_val_index][..., :1]
+    x_test = data[x_test_index]
+    y_test = data[y_test_index][..., :1]
+
+    scaler = StandardScaler(mean=x_train[..., 0].mean(), std=x_train[..., 0].std())
+
+    x_train[..., 0] = scaler.transform(x_train[..., 0])
+    x_val[..., 0] = scaler.transform(x_val[..., 0])
+    x_test[..., 0] = scaler.transform(x_test[..., 0])
+
+    print_log(f"Trainset:\tx-{x_train.shape}\ty-{y_train.shape}", log=log)
+    print_log(f"Valset:  \tx-{x_val.shape}  \ty-{y_val.shape}", log=log)
+    print_log(f"Testset:\tx-{x_test.shape}\ty-{y_test.shape}", log=log)
+
+    trainset = torch.utils.data.TensorDataset(
+        torch.FloatTensor(x_train), torch.FloatTensor(y_train)
+    )
+    valset = torch.utils.data.TensorDataset(
+        torch.FloatTensor(x_val), torch.FloatTensor(y_val)
+    )
+    testset = torch.utils.data.TensorDataset(
+        torch.FloatTensor(x_test), torch.FloatTensor(y_test)
+    )
+    if train_size != 0.6:
+        drop_last = True
+    else:
+        drop_last = False
+    trainset_loader = torch.utils.data.DataLoader(
+        trainset, batch_size=batch_size, shuffle=True, drop_last=drop_last
+    )
+    valset_loader = torch.utils.data.DataLoader(
+        valset, batch_size=batch_size, shuffle=False, drop_last=drop_last
+    )
+    testset_loader = torch.utils.data.DataLoader(
+        testset, batch_size=batch_size, shuffle=False, drop_last=drop_last
+    )
+
+    return trainset_loader, valset_loader, testset_loader, scaler
+
+
+def get_dataloaders_from_index_data_MTS(
+    data_dir,
+    in_steps=12,
+    out_steps=12,
+    tod=False,
+    dow=False,
+    y_tod=False,
+    y_dow=False,
+    batch_size=64,
+    log=None,
+):
+    data = np.load(os.path.join(data_dir, f"data.npz"))["data"].astype(np.float32)
+    index = np.load(os.path.join(data_dir, f"index_{in_steps}_{out_steps}.npz"))
+
+    x_features = [0]
+    if tod:
+        x_features.append(1)
+    if dow:
+        x_features.append(2)
+
+    y_features = [0]
+    if y_tod:
+        y_features.append(1)
+    if y_dow:
+        y_features.append(2)
+
+    train_index = index["train"]  # (num_samples, 3)
+    val_index = index["val"]
+    test_index = index["test"]
+
+    # Parallel
+    # x_train_index = vrange(train_index[:, 0], train_index[:, 1])
+    # y_train_index = vrange(train_index[:, 1], train_index[:, 2])
+    # x_val_index = vrange(val_index[:, 0], val_index[:, 1])
+    # y_val_index = vrange(val_index[:, 1], val_index[:, 2])
+    # x_test_index = vrange(test_index[:, 0], test_index[:, 1])
+    # y_test_index = vrange(test_index[:, 1], test_index[:, 2])
+
+    # x_train = data[x_train_index][..., x_features]
+    # y_train = data[y_train_index][..., y_features]
+    # x_val = data[x_val_index][..., x_features]
+    # y_val = data[y_val_index][..., y_features]
+    # x_test = data[x_test_index][..., x_features]
+    # y_test = data[y_test_index][..., y_features]
+
+    # Iterative
+    x_train = np.stack([data[idx[0] : idx[1]] for idx in train_index])[..., x_features]
+    y_train = np.stack([data[idx[1] : idx[2]] for idx in train_index])[..., y_features]
+    x_val = np.stack([data[idx[0] : idx[1]] for idx in val_index])[..., x_features]
+    y_val = np.stack([data[idx[1] : idx[2]] for idx in val_index])[..., y_features]
+    x_test = np.stack([data[idx[0] : idx[1]] for idx in test_index])[..., x_features]
+    y_test = np.stack([data[idx[1] : idx[2]] for idx in test_index])[..., y_features]
+
+    scaler = StandardScaler(mean=x_train[..., 0].mean(), std=x_train[..., 0].std())
+
+    x_train[..., 0] = scaler.transform(x_train[..., 0])
+    x_val[..., 0] = scaler.transform(x_val[..., 0])
+    x_test[..., 0] = scaler.transform(x_test[..., 0])
+
+    print_log(f"Trainset:\tx-{x_train.shape}\ty-{y_train.shape}", log=log)
+    print_log(f"Valset:  \tx-{x_val.shape}  \ty-{y_val.shape}", log=log)
+    print_log(f"Testset:\tx-{x_test.shape}\ty-{y_test.shape}", log=log)
+
+    trainset = torch.utils.data.TensorDataset(
+        torch.FloatTensor(x_train), torch.FloatTensor(y_train)
+    )
+    valset = torch.utils.data.TensorDataset(
+        torch.FloatTensor(x_val), torch.FloatTensor(y_val)
+    )
+    testset = torch.utils.data.TensorDataset(
+        torch.FloatTensor(x_test), torch.FloatTensor(y_test)
+    )
+
+    trainset_loader = torch.utils.data.DataLoader(
+        trainset, batch_size=batch_size, shuffle=True
+    )
+    valset_loader = torch.utils.data.DataLoader(
+        valset, batch_size=batch_size, shuffle=False
+    )
+    testset_loader = torch.utils.data.DataLoader(
+        testset, batch_size=batch_size, shuffle=False
+    )
+
+    return trainset_loader, valset_loader, testset_loader, scaler
+
+
+def get_dataloaders_from_index_data_Test(
+    data_dir,
+    in_steps=12,
+    out_steps=12,
+    tod=False,
+    dow=False,
+    y_tod=False,
+    y_dow=False,
+    batch_size=64,
+    log=None,
+):
+    data = np.load(os.path.join(data_dir, f"data.npz"))["data"].astype(np.float32)
+    index = np.load(os.path.join(data_dir, f"index_{in_steps}_{out_steps}.npz"))
+
+    x_features = [0]
+    if tod:
+        x_features.append(1)
+    if dow:
+        x_features.append(2)
+
+    y_features = [0]
+    if y_tod:
+        y_features.append(1)
+    if y_dow:
+        y_features.append(2)
+
+    train_index = index["train"]  # (num_samples, 3)
+    # val_index = index["val"]
+    test_index = index["test"]
+
+    # Parallel
+    # x_train_index = vrange(train_index[:, 0], train_index[:, 1])
+    # y_train_index = vrange(train_index[:, 1], train_index[:, 2])
+    # x_val_index = vrange(val_index[:, 0], val_index[:, 1])
+    # y_val_index = vrange(val_index[:, 1], val_index[:, 2])
+    # x_test_index = vrange(test_index[:, 0], test_index[:, 1])
+    # y_test_index = vrange(test_index[:, 1], test_index[:, 2])
+
+    # x_train = data[x_train_index][..., x_features]
+    # y_train = data[y_train_index][..., y_features]
+    # x_val = data[x_val_index][..., x_features]
+    # y_val = data[y_val_index][..., y_features]
+    # x_test = data[x_test_index][..., x_features]
+    # y_test = data[y_test_index][..., y_features]
+
+    # Iterative
+    x_train = np.stack([data[idx[0] : idx[1]] for idx in train_index])[..., x_features]
+    # y_train = np.stack([data[idx[1] : idx[2]] for idx in train_index])[..., y_features]
+    # x_val = np.stack([data[idx[0] : idx[1]] for idx in val_index])[..., x_features]
+    # y_val = np.stack([data[idx[1] : idx[2]] for idx in val_index])[..., y_features]
+    x_test = np.stack([data[idx[0] : idx[1]] for idx in test_index])[..., x_features]
+    y_test = np.stack([data[idx[1] : idx[2]] for idx in test_index])[..., y_features]
+
+    scaler = StandardScaler(mean=x_train[..., 0].mean(), std=x_train[..., 0].std())
+
+    # x_train[..., 0] = scaler.transform(x_train[..., 0])
+    # x_val[..., 0] = scaler.transform(x_val[..., 0])
+    x_test[..., 0] = scaler.transform(x_test[..., 0])
+
+    # print_log(f"Trainset:\tx-{x_train.shape}\ty-{y_train.shape}", log=log)
+    # print_log(f"Valset:  \tx-{x_val.shape}  \ty-{y_val.shape}", log=log)
+    print_log(f"Testset:\tx-{x_test.shape}\ty-{y_test.shape}", log=log)
+
+    # trainset = torch.utils.data.TensorDataset(
+    #     torch.FloatTensor(x_train), torch.FloatTensor(y_train)
+    # )
+    # valset = torch.utils.data.TensorDataset(
+    #     torch.FloatTensor(x_val), torch.FloatTensor(y_val)
+    # )
+    testset = torch.utils.data.TensorDataset(
+        torch.FloatTensor(x_test), torch.FloatTensor(y_test)
+    )
+
+    # trainset_loader = torch.utils.data.DataLoader(
+    #     trainset, batch_size=batch_size, shuffle=True
+    # )
+    # valset_loader = torch.utils.data.DataLoader(
+    #     valset, batch_size=batch_size, shuffle=False
+    # )
+    testset_loader = torch.utils.data.DataLoader(
+        testset, batch_size=batch_size, shuffle=False
+    )
+
+    return testset_loader, scaler
--- a/lib/Trainer_old.py
+++ b/lib/Trainer_old.py
@ -0,0 +1,261 @@
+import torch
+import math
+import os
+import time
+import copy
+import numpy as np
+
+# import pynvml
+from lib.logger import get_logger
+from lib.loss_function import all_metrics
+
+
+class Trainer(object):
+    def __init__(
+        self,
+        model,
+        loss,
+        optimizer,
+        train_loader,
+        val_loader,
+        test_loader,
+        scaler,
+        args,
+        lr_scheduler=None,
+    ):
+        super(Trainer, self).__init__()
+        self.model = model
+        self.loss = loss
+        self.optimizer = optimizer
+        self.train_loader = train_loader
+        self.val_loader = val_loader
+        self.test_loader = test_loader
+        self.scaler = scaler
+        self.args = args
+        self.lr_scheduler = lr_scheduler
+        self.train_per_epoch = len(train_loader)
+        if val_loader != None:
+            self.val_per_epoch = len(val_loader)
+        self.best_path = os.path.join(self.args["log_dir"], "best_model.pth")
+        self.best_test_path = os.path.join(self.args["log_dir"], "best_test_model.pth")
+        self.loss_figure_path = os.path.join(self.args["log_dir"], "loss.png")
+        # log
+        if os.path.isdir(args["log_dir"]) == False and not args["debug"]:
+            os.makedirs(args["log_dir"], exist_ok=True)
+        self.logger = get_logger(
+            args["log_dir"], name=self.model.__class__.__name__, debug=args["debug"]
+        )
+        self.logger.info("Experiment log path in: {}".format(args["log_dir"]))
+
+    def val_epoch(self, epoch, val_dataloader):
+        self.model.eval()
+        total_val_loss = 0
+        epoch_time = time.time()
+        with torch.no_grad():
+            for batch_idx, (data, target) in enumerate(val_dataloader):
+                data = data
+                label = target[..., : self.args["output_dim"]]
+                output = self.model(data)
+                if self.args["real_value"]:
+                    output = self.scaler.inverse_transform(output)
+                loss = self.loss(output.cuda(), label)
+                if not torch.isnan(loss):
+                    total_val_loss += loss.item()
+        val_loss = total_val_loss / len(val_dataloader)
+        self.logger.info(
+            "Val Epoch {}: average Loss: {:.6f}, train time: {:.2f} s".format(
+                epoch, val_loss, time.time() - epoch_time
+            )
+        )
+        return val_loss
+
+    def test_epoch(self, epoch, test_dataloader):
+        self.model.eval()
+        total_test_loss = 0
+        epoch_time = time.time()
+        with torch.no_grad():
+            for batch_idx, (data, target) in enumerate(test_dataloader):
+                data = data
+                label = target[..., : self.args["output_dim"]]
+                output = self.model(data)
+                if self.args["real_value"]:
+                    output = self.scaler.inverse_transform(output)
+                loss = self.loss(output.cuda(), label)
+                if not torch.isnan(loss):
+                    total_test_loss += loss.item()
+        test_loss = total_test_loss / len(test_dataloader)
+        self.logger.info(
+            "test Epoch {}: average Loss: {:.6f}, train time: {:.2f} s".format(
+                epoch, test_loss, time.time() - epoch_time
+            )
+        )
+        return test_loss
+
+    def train_epoch(self, epoch):
+        self.model.train()
+        total_loss = 0
+        epoch_time = time.time()
+        for batch_idx, (data, target) in enumerate(self.train_loader):
+            data = data
+            label = target[..., : self.args["output_dim"]]
+            self.optimizer.zero_grad()
+
+            output = self.model(data)
+            if self.args["real_value"]:
+                output = self.scaler.inverse_transform(output)
+
+            loss = self.loss(output.cuda(), label)
+            loss.backward()
+
+            # add max grad clipping
+            if self.args["grad_norm"]:
+                torch.nn.utils.clip_grad_norm_(
+                    self.model.parameters(), self.args["max_grad_norm"]
+                )
+            self.optimizer.step()
+            total_loss += loss.item()
+
+            # log information
+            if (batch_idx + 1) % self.args["log_step"] == 0:
+                self.logger.info(
+                    "Train Epoch {}: {}/{} Loss: {:.6f}".format(
+                        epoch, batch_idx + 1, self.train_per_epoch, loss.item()
+                    )
+                )
+        train_epoch_loss = total_loss / self.train_per_epoch
+        self.logger.info(
+            "Train Epoch {}: averaged Loss: {:.6f}, train time: {:.2f} s".format(
+                epoch, train_epoch_loss, time.time() - epoch_time
+            )
+        )
+
+        # learning rate decay
+        if self.args["lr_decay"]:
+            self.lr_scheduler.step()
+        return train_epoch_loss
+
+    def train(self):
+        best_model = None
+        best_test_model = None
+        not_improved_count = 0
+        best_loss = float("inf")
+        best_test_loss = float("inf")
+        vaild_loss = []
+        for epoch in range(0, self.args["epochs"]):
+            train_epoch_loss = self.train_epoch(epoch)
+            if self.val_loader == None:
+                val_dataloader = self.test_loader
+            else:
+                val_dataloader = self.val_loader
+            test_dataloader = self.test_loader
+
+            val_epoch_loss = self.val_epoch(epoch, val_dataloader)
+            vaild_loss.append(val_epoch_loss)
+
+            test_epoch_loss = self.test_epoch(epoch, test_dataloader)
+            if train_epoch_loss > 1e6:
+                self.logger.warning("Gradient explosion detected. Ending...")
+                break
+
+            if val_epoch_loss < best_loss:
+                best_loss = val_epoch_loss
+                not_improved_count = 0
+                best_state = True
+            else:
+                not_improved_count += 1
+                best_state = False
+            # early stop
+            if self.args["early_stop"]:
+                if not_improved_count == self.args["early_stop_patience"]:
+                    self.logger.info(
+                        "Validation performance didn't improve for {} epochs. "
+                        "Training stops.".format(self.args["early_stop_patience"])
+                    )
+                    break
+            # save the best state
+            if best_state == True:
+                self.logger.info("Current best model saved!")
+                best_model = copy.deepcopy(self.model.state_dict())
+
+            if test_epoch_loss < best_test_loss:
+                best_test_loss = test_epoch_loss
+                best_test_model = copy.deepcopy(self.model.state_dict())
+
+        # save the best model to file
+        if not self.args["debug"]:
+            torch.save(best_model, self.best_path)
+            self.logger.info("Saving current best model to " + self.best_path)
+            torch.save(best_test_model, self.best_test_path)
+            self.logger.info("Saving current best model to " + self.best_test_path)
+
+        # test
+        self.model.load_state_dict(best_model)
+        self.test(self.model, self.args, self.test_loader, self.scaler, self.logger)
+
+        self.logger.info("This is best_test_model")
+        self.model.load_state_dict(best_test_model)
+        self.test(self.model, self.args, self.test_loader, self.scaler, self.logger)
+
+    def save_checkpoint(self):
+        state = {
+            "state_dict": self.model.state_dict(),
+            "optimizer": self.optimizer.state_dict(),
+            "config": self.args,
+        }
+        torch.save(state, self.best_path)
+        self.logger.info("Saving current best model to " + self.best_path)
+
+    @staticmethod
+    def test(model, args, data_loader, scaler, logger, path=None):
+        if path != None:
+            check_point = torch.load(path)
+            state_dict = check_point["state_dict"]
+            args = check_point["config"]
+            model.load_state_dict(state_dict)
+            model.to(args["device"])
+        model.eval()
+        y_pred = []
+        y_true = []
+        with torch.no_grad():
+            for batch_idx, (data, target) in enumerate(data_loader):
+                data = data
+                label = target[..., : args["output_dim"]]
+                output = model(data)
+                y_true.append(label)
+                y_pred.append(output)
+        if args["real_value"]:
+            y_pred = scaler.inverse_transform(torch.cat(y_pred, dim=0))
+            y_true = torch.cat(y_true, dim=0)
+        else:
+            y_pred = torch.cat(y_pred, dim=0)
+            y_true = torch.cat(y_true, dim=0)
+        for t in range(y_true.shape[1]):
+            mae, rmse, mape = all_metrics(
+                y_pred[:, t, ...],
+                y_true[:, t, ...],
+                args["mae_thresh"],
+                args["mape_thresh"],
+            )
+            logger.info(
+                "Horizon {:02d}, MAE: {:.4f}, RMSE: {:.4f}, MAPE: {:.4f}".format(
+                    t + 1, mae, rmse, mape
+                )
+            )
+        mae, rmse, mape = all_metrics(
+            y_pred, y_true, args["mae_thresh"], args["mape_thresh"]
+        )
+        logger.info(
+            "Average Horizon, MAE: {:.4f}, RMSE: {:.4f}, MAPE: {:.4f}".format(
+                mae, rmse, mape
+            )
+        )
+
+    @staticmethod
+    def _compute_sampling_threshold(global_step, k):
+        """
+        Computes the sampling probability for scheduled sampling using inverse sigmoid.
+        :param global_step:
+        :param k:
+        :return:
+        """
+        return k / (k + math.exp(global_step / k))
--- a/lib/dl_other_dataset.py
+++ b/lib/dl_other_dataset.py
@ -0,0 +1,39 @@
+if __name__ == "__main__":
+    import os
+    import subprocess
+
+    # Kaggle 数据集列表
+    datasets = {
+        "hangzhou_taxi": "changyuheng/hz-taxi",
+        "nyc_taxi": "new-york-city/nyc-taxi-trip-duration",
+        "hangzhou_bike": "changyuheng/hz-bike",
+    }
+
+    # 下载保存目录
+    save_dir = "./datasets"
+    os.makedirs(save_dir, exist_ok=True)
+
+    # 检查 Kaggle API 配置
+    kaggle_json = os.path.expanduser("~/.kaggle/kaggle.json")
+    if not os.path.exists(kaggle_json):
+        raise FileNotFoundError(
+            f"未找到 {kaggle_json}，请先在 Kaggle 设置中下载并放置 API Key。"
+        )
+
+    # 循环下载
+    for name, kaggle_id in datasets.items():
+        print(f"📥 正在下载 {name} ({kaggle_id}) ...")
+        cmd = [
+            "kaggle",
+            "datasets",
+            "download",
+            "-d",
+            kaggle_id,
+            "-p",
+            os.path.join(save_dir, name),
+            "--unzip",
+        ]
+        subprocess.run(cmd, check=True)
+        print(f"✅ {name} 下载完成\n")
+
+    print("🎉 所有数据集已下载到", save_dir)
--- a/utils/initializer.py
+++ b/utils/initializer.py
@ -1,7 +1,7 @@
 import torch
 import torch.nn as nn
 from model.model_selector import model_selector
-from utils.loss_function import masked_mae_loss
+from lib.loss_function import masked_mae_loss
 import random
 import numpy as np
 from datetime import datetime
--- a/utils/logger.py
+++ b/utils/logger.py
--- a/utils/loss_function.py
+++ b/utils/loss_function.py
--- a/utils/normalization.py
+++ b/utils/normalization.py
--- a/utils/training_stats.py
+++ b/utils/training_stats.py
--- a/model/STGNCDE/BasicTrainer_cde.py
+++ b/model/STGNCDE/BasicTrainer_cde.py
@ -4,10 +4,10 @@ import os
 import time
 import copy
 import numpy as np
-from utils.logger import get_logger
+from lib.logger import get_logger
 from lib.metrics import All_Metrics
 from lib.TrainInits import print_model_parameters
-from utils.training_stats import TrainingStats
+from lib.training_stats import TrainingStats


 class Trainer(object):
--- a/model/STGNRDE/BasicTrainer_cde.py
+++ b/model/STGNRDE/BasicTrainer_cde.py
@ -4,10 +4,10 @@ import os
 import time
 import copy
 import numpy as np
-from utils.logger import get_logger
+from lib.logger import get_logger
 from lib.metrics import All_Metrics
 from lib.TrainInits import print_model_parameters
-from utils.training_stats import TrainingStats
+from lib.training_stats import TrainingStats


 class Trainer(object):
--- a/run.py
+++ b/run.py
@ -3,7 +3,7 @@ import torch

 # import time
 from config.args_parser import parse_args
-import utils.initializer as init
+import lib.initializer as init
 from dataloader.loader_selector import get_dataloader
 from trainer.trainer_selector import select_trainer

@ -60,7 +60,7 @@ def main():


 if __name__ == "__main__":
-    from utils.Download_data import check_and_download_data
+    from lib.Download_data import check_and_download_data

    data_complete = check_and_download_data()
    assert data_complete is not None, "数据集下载失败，请重试！"
--- a/trainer/DCRNN_Trainer.py
+++ b/trainer/DCRNN_Trainer.py
@ -5,9 +5,9 @@ import copy
 from tqdm import tqdm

 import torch
-from utils.logger import get_logger
-from utils.loss_function import all_metrics
-from utils.training_stats import TrainingStats
+from lib.logger import get_logger
+from lib.loss_function import all_metrics
+from lib.training_stats import TrainingStats


 class Trainer:
--- a/trainer/E32Trainer.py
+++ b/trainer/E32Trainer.py
@ -5,9 +5,9 @@ import copy
 from tqdm import tqdm

 import torch
-from utils.logger import get_logger
-from utils.loss_function import all_metrics
-from utils.training_stats import TrainingStats
+from lib.logger import get_logger
+from lib.loss_function import all_metrics
+from lib.training_stats import TrainingStats


 class Trainer:
--- a/trainer/EXP_trainer.py
+++ b/trainer/EXP_trainer.py
@ -5,9 +5,9 @@ import copy
 from tqdm import tqdm

 import torch
-from utils.logger import get_logger
-from utils.loss_function import all_metrics
-from utils.training_stats import TrainingStats
+from lib.logger import get_logger
+from lib.loss_function import all_metrics
+from lib.training_stats import TrainingStats


 class Trainer:
--- a/trainer/PDG2SEQ_Trainer.py
+++ b/trainer/PDG2SEQ_Trainer.py
@ -5,9 +5,9 @@ import copy
 from tqdm import tqdm

 import torch
-from utils.logger import get_logger
-from utils.loss_function import all_metrics
-from utils.training_stats import TrainingStats
+from lib.logger import get_logger
+from lib.loss_function import all_metrics
+from lib.training_stats import TrainingStats


 class Trainer:
--- a/trainer/STMLP_Trainer.py
+++ b/trainer/STMLP_Trainer.py
@ -1,5 +1,6 @@
 import math
 import os
+import sys
 import time
 import copy
 import torch.nn.functional as F
@ -7,10 +8,10 @@ import torch
 from torch import nn

 from tqdm import tqdm
-from utils.logger import get_logger
-from utils.loss_function import all_metrics
+from lib.logger import get_logger
+from lib.loss_function import all_metrics
 from model.STMLP.STMLP import STMLP
-from utils.training_stats import TrainingStats
+from lib.training_stats import TrainingStats


 class Trainer:
--- a/trainer/Trainer.py
+++ b/trainer/Trainer.py
@ -4,8 +4,8 @@ import time
 import copy
 import psutil
 import torch
-from utils.logger import get_logger
-from utils.loss_function import all_metrics
+from lib.logger import get_logger
+from lib.loss_function import all_metrics


 class TrainingStats:
--- a/trainer/Trainer_old.py
+++ b/trainer/Trainer_old.py
@ -5,9 +5,9 @@ import copy
 from tqdm import tqdm

 import torch
-from utils.logger import get_logger
-from utils.loss_function import all_metrics
-from utils.training_stats import TrainingStats
+from lib.logger import get_logger
+from lib.loss_function import all_metrics
+from lib.training_stats import TrainingStats


 class Trainer:
--- a/trainer/cdeTrainer/cdetrainer.py
+++ b/trainer/cdeTrainer/cdetrainer.py
@ -5,9 +5,9 @@ import copy
 from tqdm import tqdm

 import torch
-from utils.logger import get_logger
-from utils.loss_function import all_metrics
-from utils.training_stats import TrainingStats
+from lib.logger import get_logger
+from lib.loss_function import all_metrics
+from lib.training_stats import TrainingStats


 class Trainer:
--- a/utils/ADFtest.py
+++ b/utils/ADFtest.py
@ -0,0 +1,80 @@
+import pandas as pd
+import numpy as np
+import os
+from statsmodels.tsa.stattools import adfuller
+from arch.unitroot import ADF
+
+
+def calculate_ADF(root_path, data_path):
+    df_raw = pd.read_csv(os.path.join(root_path, data_path))
+    cols = list(df_raw.columns)
+    cols.remove("date")
+    df_raw = df_raw[cols]
+    adf_list = []
+    for i in cols:
+        df_data = df_raw[i]
+        adf = adfuller(df_data, maxlag=1)
+        print(adf)
+        adf_list.append(adf)
+    return np.array(adf_list)
+
+
+def calculate_target_ADF(root_path, data_path, target="OT"):
+    df_raw = pd.read_csv(os.path.join(root_path, data_path))
+    target_cols = target.split(",")
+    # df_data = df_raw[target]
+    df_raw = df_raw[target_cols]
+    adf_list = []
+    for i in target_cols:
+        df_data = df_raw[i]
+        adf = adfuller(df_data, maxlag=1)
+        # print(adf)
+        adf_list.append(adf)
+    return np.array(adf_list)
+
+
+def archADF(root_path, data_path):
+    df = pd.read_csv(os.path.join(root_path, data_path))
+    cols = df.columns[1:]
+    stats = 0
+    for target_col in cols:
+        series = df[target_col].values
+        adf = ADF(series)
+        stat = adf.stat
+        stats += stat
+    return stats / len(cols)
+
+
+if __name__ == "__main__":
+    # * Exchange - result: -1.902402344564288 | report: -1.889
+    ADFmetric = archADF(
+        root_path="./dataset/exchange_rate/", data_path="exchange_rate.csv"
+    )
+    print("Exchange ADF metric", ADFmetric)
+
+    # * Illness - result: -5.33416661870624 | report: -5.406
+    ADFmetric = archADF(
+        root_path="./dataset/illness/", data_path="national_illness.csv"
+    )
+    print("Illness ADF metric", ADFmetric)
+
+    # * ETTm2 - result: -5.663628743471695 | report: -6.225
+    ADFmetric = archADF(root_path="./dataset/ETT-small/", data_path="ETTm2.csv")
+    print("ETTm2 ADF metric", ADFmetric)
+
+    # * Electricity - result: -8.44485821939281 | report: -8.483
+    ADFmetric = archADF(root_path="./dataset/electricity/", data_path="electricity.csv")
+    print("Electricity ADF metric", ADFmetric)
+
+    # * Traffic - result: -15.020978067839014 | report: -15.046
+    ADFmetric = archADF(root_path="./dataset/traffic/", data_path="traffic.csv")
+    print("Traffic ADF metric", ADFmetric)
+
+    # * Weather - result: -26.681433085204866 | report: -26.661
+    ADFmetric = archADF(root_path="./dataset/weather/", data_path="weather.csv")
+    print("Weather ADF metric", ADFmetric)
+
+    # print(ADFmetric)
+
+    # mean_ADFmetric = ADFmetric[:,0].mean()
+    # print(mean_ADFmetric)
--- a/utils/augmentation.py
+++ b/utils/augmentation.py
@ -0,0 +1,613 @@
+import numpy as np
+from tqdm import tqdm
+
+
+def jitter(x, sigma=0.03):
+    # https://arxiv.org/pdf/1706.00527.pdf
+    return x + np.random.normal(loc=0.0, scale=sigma, size=x.shape)
+
+
+def scaling(x, sigma=0.1):
+    # https://arxiv.org/pdf/1706.00527.pdf
+    factor = np.random.normal(loc=1.0, scale=sigma, size=(x.shape[0], x.shape[2]))
+    return np.multiply(x, factor[:, np.newaxis, :])
+
+
+def rotation(x):
+    x = np.array(x)
+    flip = np.random.choice([-1, 1], size=(x.shape[0], x.shape[2]))
+    rotate_axis = np.arange(x.shape[2])
+    np.random.shuffle(rotate_axis)
+    return flip[:, np.newaxis, :] * x[:, :, rotate_axis]
+
+
+def permutation(x, max_segments=5, seg_mode="equal"):
+    orig_steps = np.arange(x.shape[1])
+
+    num_segs = np.random.randint(1, max_segments, size=(x.shape[0]))
+
+    ret = np.zeros_like(x)
+    for i, pat in enumerate(x):
+        if num_segs[i] > 1:
+            if seg_mode == "random":
+                split_points = np.random.choice(
+                    x.shape[1] - 2, num_segs[i] - 1, replace=False
+                )
+                split_points.sort()
+                splits = np.split(orig_steps, split_points)
+            else:
+                splits = np.array_split(orig_steps, num_segs[i])
+            warp = np.concatenate(np.random.permutation(splits)).ravel()
+            # ? Question: What is the point of making segments?
+            # for i in range(len(splits)):
+            #     permute = np.random.permutation(splits[i])
+
+            ret[i] = pat[warp]
+        else:
+            ret[i] = pat
+    return ret
+
+
+def magnitude_warp(x, sigma=0.2, knot=4):
+    from scipy.interpolate import CubicSpline
+
+    orig_steps = np.arange(x.shape[1])
+
+    random_warps = np.random.normal(
+        loc=1.0, scale=sigma, size=(x.shape[0], knot + 2, x.shape[2])
+    )
+    warp_steps = (
+        np.ones((x.shape[2], 1)) * (np.linspace(0, x.shape[1] - 1.0, num=knot + 2))
+    ).T
+    ret = np.zeros_like(x)
+    for i, pat in enumerate(x):
+        warper = np.array(
+            [
+                CubicSpline(warp_steps[:, dim], random_warps[i, :, dim])(orig_steps)
+                for dim in range(x.shape[2])
+            ]
+        ).T
+        ret[i] = pat * warper
+
+    return ret
+
+
+def time_warp(x, sigma=0.2, knot=4):
+    from scipy.interpolate import CubicSpline
+
+    orig_steps = np.arange(x.shape[1])
+
+    random_warps = np.random.normal(
+        loc=1.0, scale=sigma, size=(x.shape[0], knot + 2, x.shape[2])
+    )
+    warp_steps = (
+        np.ones((x.shape[2], 1)) * (np.linspace(0, x.shape[1] - 1.0, num=knot + 2))
+    ).T
+
+    ret = np.zeros_like(x)
+    for i, pat in enumerate(x):
+        for dim in range(x.shape[2]):
+            time_warp = CubicSpline(
+                warp_steps[:, dim], warp_steps[:, dim] * random_warps[i, :, dim]
+            )(orig_steps)
+            scale = (x.shape[1] - 1) / time_warp[-1]
+            ret[i, :, dim] = np.interp(
+                orig_steps, np.clip(scale * time_warp, 0, x.shape[1] - 1), pat[:, dim]
+            ).T
+    return ret
+
+
+def window_slice(x, reduce_ratio=0.9):
+    # https://halshs.archives-ouvertes.fr/halshs-01357973/document
+    target_len = np.ceil(reduce_ratio * x.shape[1]).astype(int)
+    if target_len >= x.shape[1]:
+        return x
+    starts = np.random.randint(
+        low=0, high=x.shape[1] - target_len, size=(x.shape[0])
+    ).astype(int)
+    ends = (target_len + starts).astype(int)
+
+    ret = np.zeros_like(x)
+    for i, pat in enumerate(x):
+        for dim in range(x.shape[2]):
+            ret[i, :, dim] = np.interp(
+                np.linspace(0, target_len, num=x.shape[1]),
+                np.arange(target_len),
+                pat[starts[i] : ends[i], dim],
+            ).T
+    return ret
+
+
+def window_warp(x, window_ratio=0.1, scales=[0.5, 2.0]):
+    # https://halshs.archives-ouvertes.fr/halshs-01357973/document
+    warp_scales = np.random.choice(scales, x.shape[0])
+    warp_size = np.ceil(window_ratio * x.shape[1]).astype(int)
+    window_steps = np.arange(warp_size)
+
+    window_starts = np.random.randint(
+        low=1, high=x.shape[1] - warp_size - 1, size=(x.shape[0])
+    ).astype(int)
+    window_ends = (window_starts + warp_size).astype(int)
+
+    ret = np.zeros_like(x)
+    for i, pat in enumerate(x):
+        for dim in range(x.shape[2]):
+            start_seg = pat[: window_starts[i], dim]
+            window_seg = np.interp(
+                np.linspace(0, warp_size - 1, num=int(warp_size * warp_scales[i])),
+                window_steps,
+                pat[window_starts[i] : window_ends[i], dim],
+            )
+            end_seg = pat[window_ends[i] :, dim]
+            warped = np.concatenate((start_seg, window_seg, end_seg))
+            ret[i, :, dim] = np.interp(
+                np.arange(x.shape[1]),
+                np.linspace(0, x.shape[1] - 1.0, num=warped.size),
+                warped,
+            ).T
+    return ret
+
+
+def spawner(x, labels, sigma=0.05, verbose=0):
+    # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6983028/
+    # use verbose=-1 to turn off warnings
+    # use verbose=1 to print out figures
+
+    import utils.dtw as dtw
+
+    random_points = np.random.randint(low=1, high=x.shape[1] - 1, size=x.shape[0])
+    window = np.ceil(x.shape[1] / 10.0).astype(int)
+    orig_steps = np.arange(x.shape[1])
+    l = np.argmax(labels, axis=1) if labels.ndim > 1 else labels
+
+    ret = np.zeros_like(x)
+    # for i, pat in enumerate(tqdm(x)):
+    for i, pat in enumerate(x):
+        # guarentees that same one isnt selected
+        choices = np.delete(np.arange(x.shape[0]), i)
+        # remove ones of different classes
+        choices = np.where(l[choices] == l[i])[0]
+        if choices.size > 0:
+            random_sample = x[np.random.choice(choices)]
+            # SPAWNER splits the path into two randomly
+            path1 = dtw.dtw(
+                pat[: random_points[i]],
+                random_sample[: random_points[i]],
+                dtw.RETURN_PATH,
+                slope_constraint="symmetric",
+                window=window,
+            )
+            path2 = dtw.dtw(
+                pat[random_points[i] :],
+                random_sample[random_points[i] :],
+                dtw.RETURN_PATH,
+                slope_constraint="symmetric",
+                window=window,
+            )
+            combined = np.concatenate(
+                (np.vstack(path1), np.vstack(path2 + random_points[i])), axis=1
+            )
+            if verbose:
+                # print(random_points[i])
+                dtw_value, cost, DTW_map, path = dtw.dtw(
+                    pat,
+                    random_sample,
+                    return_flag=dtw.RETURN_ALL,
+                    slope_constraint=slope_constraint,
+                    window=window,
+                )
+                dtw.draw_graph1d(cost, DTW_map, path, pat, random_sample)
+                dtw.draw_graph1d(cost, DTW_map, combined, pat, random_sample)
+            mean = np.mean([pat[combined[0]], random_sample[combined[1]]], axis=0)
+            for dim in range(x.shape[2]):
+                ret[i, :, dim] = np.interp(
+                    orig_steps,
+                    np.linspace(0, x.shape[1] - 1.0, num=mean.shape[0]),
+                    mean[:, dim],
+                ).T
+        else:
+            # if verbose > -1:
+            #     print("There is only one pattern of class {}, skipping pattern average".format(l[i]))
+            ret[i, :] = pat
+    return jitter(ret, sigma=sigma)
+
+
+def wdba(
+    x, labels, batch_size=6, slope_constraint="symmetric", use_window=True, verbose=0
+):
+    # https://ieeexplore.ieee.org/document/8215569
+    # use verbose = -1 to turn off warnings
+    # slope_constraint is for DTW. "symmetric" or "asymmetric"
+    x = np.array(x)
+    import utils.dtw as dtw
+
+    if use_window:
+        window = np.ceil(x.shape[1] / 10.0).astype(int)
+    else:
+        window = None
+    orig_steps = np.arange(x.shape[1])
+    l = np.argmax(labels, axis=1) if labels.ndim > 1 else labels
+
+    ret = np.zeros_like(x)
+    # for i in tqdm(range(ret.shape[0])):
+    for i in range(ret.shape[0]):
+        # get the same class as i
+        choices = np.where(l == l[i])[0]
+        if choices.size > 0:
+            # pick random intra-class pattern
+            k = min(choices.size, batch_size)
+            random_prototypes = x[np.random.choice(choices, k, replace=False)]
+
+            # calculate dtw between all
+            dtw_matrix = np.zeros((k, k))
+            for p, prototype in enumerate(random_prototypes):
+                for s, sample in enumerate(random_prototypes):
+                    if p == s:
+                        dtw_matrix[p, s] = 0.0
+                    else:
+                        dtw_matrix[p, s] = dtw.dtw(
+                            prototype,
+                            sample,
+                            dtw.RETURN_VALUE,
+                            slope_constraint=slope_constraint,
+                            window=window,
+                        )
+
+            # get medoid
+            medoid_id = np.argsort(np.sum(dtw_matrix, axis=1))[0]
+            nearest_order = np.argsort(dtw_matrix[medoid_id])
+            medoid_pattern = random_prototypes[medoid_id]
+
+            # start weighted DBA
+            average_pattern = np.zeros_like(medoid_pattern)
+            weighted_sums = np.zeros((medoid_pattern.shape[0]))
+            for nid in nearest_order:
+                if nid == medoid_id or dtw_matrix[medoid_id, nearest_order[1]] == 0.0:
+                    average_pattern += medoid_pattern
+                    weighted_sums += np.ones_like(weighted_sums)
+                else:
+                    path = dtw.dtw(
+                        medoid_pattern,
+                        random_prototypes[nid],
+                        dtw.RETURN_PATH,
+                        slope_constraint=slope_constraint,
+                        window=window,
+                    )
+                    dtw_value = dtw_matrix[medoid_id, nid]
+                    warped = random_prototypes[nid, path[1]]
+                    weight = np.exp(
+                        np.log(0.5)
+                        * dtw_value
+                        / dtw_matrix[medoid_id, nearest_order[1]]
+                    )
+                    average_pattern[path[0]] += weight * warped
+                    weighted_sums[path[0]] += weight
+
+            ret[i, :] = average_pattern / weighted_sums[:, np.newaxis]
+        else:
+            # if verbose > -1:
+            #     print("There is only one pattern of class {}, skipping pattern average".format(l[i]))
+            ret[i, :] = x[i]
+    return ret
+
+
+# Proposed
+
+
+def random_guided_warp(
+    x,
+    labels,
+    slope_constraint="symmetric",
+    use_window=True,
+    dtw_type="normal",
+    verbose=0,
+):
+    # use verbose = -1 to turn off warnings
+    # slope_constraint is for DTW. "symmetric" or "asymmetric"
+    # dtw_type is for shapeDTW or DTW. "normal" or "shape"
+
+    import utils.dtw as dtw
+
+    if use_window:
+        window = np.ceil(x.shape[1] / 10.0).astype(int)
+    else:
+        window = None
+    orig_steps = np.arange(x.shape[1])
+    l = np.argmax(labels, axis=1) if labels.ndim > 1 else labels
+
+    ret = np.zeros_like(x)
+    # for i, pat in enumerate(tqdm(x)):
+    for i, pat in enumerate(x):
+        # guarentees that same one isnt selected
+        choices = np.delete(np.arange(x.shape[0]), i)
+        # remove ones of different classes
+        choices = np.where(l[choices] == l[i])[0]
+        if choices.size > 0:
+            # pick random intra-class pattern
+            random_prototype = x[np.random.choice(choices)]
+
+            if dtw_type == "shape":
+                path = dtw.shape_dtw(
+                    random_prototype,
+                    pat,
+                    dtw.RETURN_PATH,
+                    slope_constraint=slope_constraint,
+                    window=window,
+                )
+            else:
+                path = dtw.dtw(
+                    random_prototype,
+                    pat,
+                    dtw.RETURN_PATH,
+                    slope_constraint=slope_constraint,
+                    window=window,
+                )
+
+            # Time warp
+            warped = pat[path[1]]
+            for dim in range(x.shape[2]):
+                ret[i, :, dim] = np.interp(
+                    orig_steps,
+                    np.linspace(0, x.shape[1] - 1.0, num=warped.shape[0]),
+                    warped[:, dim],
+                ).T
+        else:
+            # if verbose > -1:
+            #     print("There is only one pattern of class {}, skipping timewarping".format(l[i]))
+            ret[i, :] = pat
+    return ret
+
+
+def random_guided_warp_shape(x, labels, slope_constraint="symmetric", use_window=True):
+    return random_guided_warp(x, labels, slope_constraint, use_window, dtw_type="shape")
+
+
+def discriminative_guided_warp(
+    x,
+    labels,
+    batch_size=6,
+    slope_constraint="symmetric",
+    use_window=True,
+    dtw_type="normal",
+    use_variable_slice=True,
+    verbose=0,
+):
+    # use verbose = -1 to turn off warnings
+    # slope_constraint is for DTW. "symmetric" or "asymmetric"
+    # dtw_type is for shapeDTW or DTW. "normal" or "shape"
+
+    import utils.dtw as dtw
+
+    if use_window:
+        window = np.ceil(x.shape[1] / 10.0).astype(int)
+    else:
+        window = None
+    orig_steps = np.arange(x.shape[1])
+    l = np.argmax(labels, axis=1) if labels.ndim > 1 else labels
+
+    positive_batch = np.ceil(batch_size / 2).astype(int)
+    negative_batch = np.floor(batch_size / 2).astype(int)
+
+    ret = np.zeros_like(x)
+    warp_amount = np.zeros(x.shape[0])
+    # for i, pat in enumerate(tqdm(x)):
+    for i, pat in enumerate(x):
+        # guarentees that same one isnt selected
+        choices = np.delete(np.arange(x.shape[0]), i)
+
+        # remove ones of different classes
+        positive = np.where(l[choices] == l[i])[0]
+        negative = np.where(l[choices] != l[i])[0]
+
+        if positive.size > 0 and negative.size > 0:
+            pos_k = min(positive.size, positive_batch)
+            neg_k = min(negative.size, negative_batch)
+            positive_prototypes = x[np.random.choice(positive, pos_k, replace=False)]
+            negative_prototypes = x[np.random.choice(negative, neg_k, replace=False)]
+
+            # vector embedding and nearest prototype in one
+            pos_aves = np.zeros((pos_k))
+            neg_aves = np.zeros((pos_k))
+            if dtw_type == "shape":
+                for p, pos_prot in enumerate(positive_prototypes):
+                    for ps, pos_samp in enumerate(positive_prototypes):
+                        if p != ps:
+                            pos_aves[p] += (1.0 / (pos_k - 1.0)) * dtw.shape_dtw(
+                                pos_prot,
+                                pos_samp,
+                                dtw.RETURN_VALUE,
+                                slope_constraint=slope_constraint,
+                                window=window,
+                            )
+                    for ns, neg_samp in enumerate(negative_prototypes):
+                        neg_aves[p] += (1.0 / neg_k) * dtw.shape_dtw(
+                            pos_prot,
+                            neg_samp,
+                            dtw.RETURN_VALUE,
+                            slope_constraint=slope_constraint,
+                            window=window,
+                        )
+                selected_id = np.argmax(neg_aves - pos_aves)
+                path = dtw.shape_dtw(
+                    positive_prototypes[selected_id],
+                    pat,
+                    dtw.RETURN_PATH,
+                    slope_constraint=slope_constraint,
+                    window=window,
+                )
+            else:
+                for p, pos_prot in enumerate(positive_prototypes):
+                    for ps, pos_samp in enumerate(positive_prototypes):
+                        if p != ps:
+                            pos_aves[p] += (1.0 / (pos_k - 1.0)) * dtw.dtw(
+                                pos_prot,
+                                pos_samp,
+                                dtw.RETURN_VALUE,
+                                slope_constraint=slope_constraint,
+                                window=window,
+                            )
+                    for ns, neg_samp in enumerate(negative_prototypes):
+                        neg_aves[p] += (1.0 / neg_k) * dtw.dtw(
+                            pos_prot,
+                            neg_samp,
+                            dtw.RETURN_VALUE,
+                            slope_constraint=slope_constraint,
+                            window=window,
+                        )
+                selected_id = np.argmax(neg_aves - pos_aves)
+                path = dtw.dtw(
+                    positive_prototypes[selected_id],
+                    pat,
+                    dtw.RETURN_PATH,
+                    slope_constraint=slope_constraint,
+                    window=window,
+                )
+
+            # Time warp
+            warped = pat[path[1]]
+            warp_path_interp = np.interp(
+                orig_steps,
+                np.linspace(0, x.shape[1] - 1.0, num=warped.shape[0]),
+                path[1],
+            )
+            warp_amount[i] = np.sum(np.abs(orig_steps - warp_path_interp))
+            for dim in range(x.shape[2]):
+                ret[i, :, dim] = np.interp(
+                    orig_steps,
+                    np.linspace(0, x.shape[1] - 1.0, num=warped.shape[0]),
+                    warped[:, dim],
+                ).T
+        else:
+            # if verbose > -1:
+            #     print("There is only one pattern of class {}".format(l[i]))
+            ret[i, :] = pat
+            warp_amount[i] = 0.0
+    if use_variable_slice:
+        max_warp = np.max(warp_amount)
+        if max_warp == 0:
+            # unchanged
+            ret = window_slice(ret, reduce_ratio=0.9)
+        else:
+            for i, pat in enumerate(ret):
+                # Variable Sllicing
+                ret[i] = window_slice(
+                    pat[np.newaxis, :, :],
+                    reduce_ratio=0.9 + 0.1 * warp_amount[i] / max_warp,
+                )[0]
+    return ret
+
+
+def discriminative_guided_warp_shape(
+    x, labels, batch_size=6, slope_constraint="symmetric", use_window=True
+):
+    return discriminative_guided_warp(
+        x, labels, batch_size, slope_constraint, use_window, dtw_type="shape"
+    )
+
+
+def run_augmentation(x, y, args):
+    print("Augmenting %s" % args.data)
+    np.random.seed(args.seed)
+    x_aug = x
+    y_aug = y
+    if args.augmentation_ratio > 0:
+        augmentation_tags = "%d" % args.augmentation_ratio
+        for n in range(args.augmentation_ratio):
+            x_temp, augmentation_tags = augment(x, y, args)
+            x_aug = np.append(x_aug, x_temp, axis=0)
+            y_aug = np.append(y_aug, y, axis=0)
+            print("Round %d: %s done" % (n, augmentation_tags))
+        if args.extra_tag:
+            augmentation_tags += "_" + args.extra_tag
+    else:
+        augmentation_tags = args.extra_tag
+    return x_aug, y_aug, augmentation_tags
+
+
+def run_augmentation_single(x, y, args):
+    # print("Augmenting %s"%args.data)
+    np.random.seed(args.seed)
+
+    x_aug = x
+    y_aug = y
+
+    if len(x.shape) < 3:
+        # Augmenting on the entire series: using the input data as "One Big Batch"
+        #   Before  -   (sequence_length, num_channels)
+        #   After   -   (1, sequence_length, num_channels)
+        # Note: the 'sequence_length' here is actually the length of the entire series
+        x_input = x[np.newaxis, :]
+    elif len(x.shape) == 3:
+        # Augmenting on the batch series: keep current dimension (batch_size, sequence_length, num_channels)
+        x_input = x
+    else:
+        raise ValueError(
+            "Input must be (batch_size, sequence_length, num_channels) dimensional"
+        )
+
+    if args.augmentation_ratio > 0:
+        augmentation_tags = "%d" % args.augmentation_ratio
+        for n in range(args.augmentation_ratio):
+            x_aug, augmentation_tags = augment(x_input, y, args)
+            # print("Round %d: %s done"%(n, augmentation_tags))
+        if args.extra_tag:
+            augmentation_tags += "_" + args.extra_tag
+    else:
+        augmentation_tags = args.extra_tag
+
+    if len(x.shape) < 3:
+        # Reverse to two-dimensional in whole series augmentation scenario
+        x_aug = x_aug.squeeze(0)
+    return x_aug, y_aug, augmentation_tags
+
+
+def augment(x, y, args):
+    import utils.augmentation as aug
+
+    augmentation_tags = ""
+    if args.jitter:
+        x = aug.jitter(x)
+        augmentation_tags += "_jitter"
+    if args.scaling:
+        x = aug.scaling(x)
+        augmentation_tags += "_scaling"
+    if args.rotation:
+        x = aug.rotation(x)
+        augmentation_tags += "_rotation"
+    if args.permutation:
+        x = aug.permutation(x)
+        augmentation_tags += "_permutation"
+    if args.randompermutation:
+        x = aug.permutation(x, seg_mode="random")
+        augmentation_tags += "_randomperm"
+    if args.magwarp:
+        x = aug.magnitude_warp(x)
+        augmentation_tags += "_magwarp"
+    if args.timewarp:
+        x = aug.time_warp(x)
+        augmentation_tags += "_timewarp"
+    if args.windowslice:
+        x = aug.window_slice(x)
+        augmentation_tags += "_windowslice"
+    if args.windowwarp:
+        x = aug.window_warp(x)
+        augmentation_tags += "_windowwarp"
+    if args.spawner:
+        x = aug.spawner(x, y)
+        augmentation_tags += "_spawner"
+    if args.dtwwarp:
+        x = aug.random_guided_warp(x, y)
+        augmentation_tags += "_rgw"
+    if args.shapedtwwarp:
+        x = aug.random_guided_warp_shape(x, y)
+        augmentation_tags += "_rgws"
+    if args.wdba:
+        x = aug.wdba(x, y)
+        augmentation_tags += "_wdba"
+    if args.discdtw:
+        x = aug.discriminative_guided_warp(x, y)
+        augmentation_tags += "_dgw"
+    if args.discsdtw:
+        x = aug.discriminative_guided_warp_shape(x, y)
+        augmentation_tags += "_dgws"
+    return x, augmentation_tags
--- a/utils/dtw.py
+++ b/utils/dtw.py
@ -0,0 +1,258 @@
+__author__ = "Brian Iwana"
+
+import numpy as np
+import math
+import sys
+
+RETURN_VALUE = 0
+RETURN_PATH = 1
+RETURN_ALL = -1
+
+
+# Core DTW
+def _traceback(DTW, slope_constraint):
+    i, j = np.array(DTW.shape) - 1
+    p, q = [i - 1], [j - 1]
+
+    if slope_constraint == "asymmetric":
+        while i > 1:
+            tb = np.argmin((DTW[i - 1, j], DTW[i - 1, j - 1], DTW[i - 1, j - 2]))
+
+            if tb == 0:
+                i = i - 1
+            elif tb == 1:
+                i = i - 1
+                j = j - 1
+            elif tb == 2:
+                i = i - 1
+                j = j - 2
+
+            p.insert(0, i - 1)
+            q.insert(0, j - 1)
+    elif slope_constraint == "symmetric":
+        while i > 1 or j > 1:
+            tb = np.argmin((DTW[i - 1, j - 1], DTW[i - 1, j], DTW[i, j - 1]))
+
+            if tb == 0:
+                i = i - 1
+                j = j - 1
+            elif tb == 1:
+                i = i - 1
+            elif tb == 2:
+                j = j - 1
+
+            p.insert(0, i - 1)
+            q.insert(0, j - 1)
+    else:
+        sys.exit("Unknown slope constraint %s" % slope_constraint)
+
+    return (np.array(p), np.array(q))
+
+
+def dtw(
+    prototype,
+    sample,
+    return_flag=RETURN_VALUE,
+    slope_constraint="asymmetric",
+    window=None,
+):
+    """Computes the DTW of two sequences.
+    :param prototype: np array [0..b]
+    :param sample: np array [0..t]
+    :param extended: bool
+    """
+    p = prototype.shape[0]
+    assert p != 0, "Prototype empty!"
+    s = sample.shape[0]
+    assert s != 0, "Sample empty!"
+
+    if window is None:
+        window = s
+
+    cost = np.full((p, s), np.inf)
+    for i in range(p):
+        start = max(0, i - window)
+        end = min(s, i + window) + 1
+        cost[i, start:end] = np.linalg.norm(sample[start:end] - prototype[i], axis=1)
+
+    DTW = _cummulative_matrix(cost, slope_constraint, window)
+
+    if return_flag == RETURN_ALL:
+        return DTW[-1, -1], cost, DTW[1:, 1:], _traceback(DTW, slope_constraint)
+    elif return_flag == RETURN_PATH:
+        return _traceback(DTW, slope_constraint)
+    else:
+        return DTW[-1, -1]
+
+
+def _cummulative_matrix(cost, slope_constraint, window):
+    p = cost.shape[0]
+    s = cost.shape[1]
+
+    # Note: DTW is one larger than cost and the original patterns
+    DTW = np.full((p + 1, s + 1), np.inf)
+
+    DTW[0, 0] = 0.0
+
+    if slope_constraint == "asymmetric":
+        for i in range(1, p + 1):
+            if i <= window + 1:
+                DTW[i, 1] = cost[i - 1, 0] + min(DTW[i - 1, 0], DTW[i - 1, 1])
+            for j in range(max(2, i - window), min(s, i + window) + 1):
+                DTW[i, j] = cost[i - 1, j - 1] + min(
+                    DTW[i - 1, j - 2], DTW[i - 1, j - 1], DTW[i - 1, j]
+                )
+    elif slope_constraint == "symmetric":
+        for i in range(1, p + 1):
+            for j in range(max(1, i - window), min(s, i + window) + 1):
+                DTW[i, j] = cost[i - 1, j - 1] + min(
+                    DTW[i - 1, j - 1], DTW[i, j - 1], DTW[i - 1, j]
+                )
+    else:
+        sys.exit("Unknown slope constraint %s" % slope_constraint)
+
+    return DTW
+
+
+def shape_dtw(
+    prototype,
+    sample,
+    return_flag=RETURN_VALUE,
+    slope_constraint="asymmetric",
+    window=None,
+    descr_ratio=0.05,
+):
+    """Computes the shapeDTW of two sequences.
+    :param prototype: np array [0..b]
+    :param sample: np array [0..t]
+    :param extended: bool
+    """
+    # shapeDTW
+    # https://www.sciencedirect.com/science/article/pii/S0031320317303710
+
+    p = prototype.shape[0]
+    assert p != 0, "Prototype empty!"
+    s = sample.shape[0]
+    assert s != 0, "Sample empty!"
+
+    if window is None:
+        window = s
+
+    p_feature_len = np.clip(np.round(p * descr_ratio), 5, 100).astype(int)
+    s_feature_len = np.clip(np.round(s * descr_ratio), 5, 100).astype(int)
+
+    # padding
+    p_pad_front = (np.ceil(p_feature_len / 2.0)).astype(int)
+    p_pad_back = (np.floor(p_feature_len / 2.0)).astype(int)
+    s_pad_front = (np.ceil(s_feature_len / 2.0)).astype(int)
+    s_pad_back = (np.floor(s_feature_len / 2.0)).astype(int)
+
+    prototype_pad = np.pad(prototype, ((p_pad_front, p_pad_back), (0, 0)), mode="edge")
+    sample_pad = np.pad(sample, ((s_pad_front, s_pad_back), (0, 0)), mode="edge")
+    p_p = prototype_pad.shape[0]
+    s_p = sample_pad.shape[0]
+
+    cost = np.full((p, s), np.inf)
+    for i in range(p):
+        for j in range(max(0, i - window), min(s, i + window)):
+            cost[i, j] = np.linalg.norm(
+                sample_pad[j : j + s_feature_len] - prototype_pad[i : i + p_feature_len]
+            )
+
+    DTW = _cummulative_matrix(cost, slope_constraint=slope_constraint, window=window)
+
+    if return_flag == RETURN_ALL:
+        return DTW[-1, -1], cost, DTW[1:, 1:], _traceback(DTW, slope_constraint)
+    elif return_flag == RETURN_PATH:
+        return _traceback(DTW, slope_constraint)
+    else:
+        return DTW[-1, -1]
+
+
+# Draw helpers
+def draw_graph2d(cost, DTW, path, prototype, sample):
+    import matplotlib.pyplot as plt
+
+    plt.figure(figsize=(12, 8))
+    # plt.subplots_adjust(left=.02, right=.98, bottom=.001, top=.96, wspace=.05, hspace=.01)
+
+    # cost
+    plt.subplot(2, 3, 1)
+    plt.imshow(cost.T, cmap=plt.cm.gray, interpolation="none", origin="lower")
+    plt.plot(path[0], path[1], "y")
+    plt.xlim((-0.5, cost.shape[0] - 0.5))
+    plt.ylim((-0.5, cost.shape[0] - 0.5))
+
+    # dtw
+    plt.subplot(2, 3, 2)
+    plt.imshow(DTW.T, cmap=plt.cm.gray, interpolation="none", origin="lower")
+    plt.plot(path[0] + 1, path[1] + 1, "y")
+    plt.xlim((-0.5, DTW.shape[0] - 0.5))
+    plt.ylim((-0.5, DTW.shape[0] - 0.5))
+
+    # prototype
+    plt.subplot(2, 3, 4)
+    plt.plot(prototype[:, 0], prototype[:, 1], "b-o")
+
+    # connection
+    plt.subplot(2, 3, 5)
+    for i in range(0, path[0].shape[0]):
+        plt.plot(
+            [prototype[path[0][i], 0], sample[path[1][i], 0]],
+            [prototype[path[0][i], 1], sample[path[1][i], 1]],
+            "y-",
+        )
+    plt.plot(sample[:, 0], sample[:, 1], "g-o")
+    plt.plot(prototype[:, 0], prototype[:, 1], "b-o")
+
+    # sample
+    plt.subplot(2, 3, 6)
+    plt.plot(sample[:, 0], sample[:, 1], "g-o")
+
+    plt.tight_layout()
+    plt.show()
+
+
+def draw_graph1d(cost, DTW, path, prototype, sample):
+    import matplotlib.pyplot as plt
+
+    plt.figure(figsize=(12, 8))
+    # plt.subplots_adjust(left=.02, right=.98, bottom=.001, top=.96, wspace=.05, hspace=.01)
+    p_steps = np.arange(prototype.shape[0])
+    s_steps = np.arange(sample.shape[0])
+
+    # cost
+    plt.subplot(2, 3, 1)
+    plt.imshow(cost.T, cmap=plt.cm.gray, interpolation="none", origin="lower")
+    plt.plot(path[0], path[1], "y")
+    plt.xlim((-0.5, cost.shape[0] - 0.5))
+    plt.ylim((-0.5, cost.shape[0] - 0.5))
+
+    # dtw
+    plt.subplot(2, 3, 2)
+    plt.imshow(DTW.T, cmap=plt.cm.gray, interpolation="none", origin="lower")
+    plt.plot(path[0] + 1, path[1] + 1, "y")
+    plt.xlim((-0.5, DTW.shape[0] - 0.5))
+    plt.ylim((-0.5, DTW.shape[0] - 0.5))
+
+    # prototype
+    plt.subplot(2, 3, 4)
+    plt.plot(p_steps, prototype[:, 0], "b-o")
+
+    # connection
+    plt.subplot(2, 3, 5)
+    for i in range(0, path[0].shape[0]):
+        plt.plot(
+            [path[0][i], path[1][i]],
+            [prototype[path[0][i], 0], sample[path[1][i], 0]],
+            "y-",
+        )
+    plt.plot(p_steps, sample[:, 0], "g-o")
+    plt.plot(s_steps, prototype[:, 0], "b-o")
+
+    # sample
+    plt.subplot(2, 3, 6)
+    plt.plot(s_steps, sample[:, 0], "g-o")
+
+    plt.tight_layout()
+    plt.show()
--- a/utils/dtw_metric.py
+++ b/utils/dtw_metric.py
@ -0,0 +1,185 @@
+from numpy import array, zeros, full, argmin, inf, ndim
+from scipy.spatial.distance import cdist
+from math import isinf
+
+
+def dtw(x, y, dist, warp=1, w=inf, s=1.0):
+    """
+    Computes Dynamic Time Warping (DTW) of two sequences.
+
+    :param array x: N1*M array
+    :param array y: N2*M array
+    :param func dist: distance used as cost measure
+    :param int warp: how many shifts are computed.
+    :param int w: window size limiting the maximal distance between indices of matched entries |i,j|.
+    :param float s: weight applied on off-diagonal moves of the path. As s gets larger, the warping path is increasingly biased towards the diagonal
+    Returns the minimum distance, the cost matrix, the accumulated cost matrix, and the wrap path.
+    """
+    assert len(x)
+    assert len(y)
+    assert isinf(w) or (w >= abs(len(x) - len(y)))
+    assert s > 0
+    r, c = len(x), len(y)
+    if not isinf(w):
+        D0 = full((r + 1, c + 1), inf)
+        for i in range(1, r + 1):
+            D0[i, max(1, i - w) : min(c + 1, i + w + 1)] = 0
+        D0[0, 0] = 0
+    else:
+        D0 = zeros((r + 1, c + 1))
+        D0[0, 1:] = inf
+        D0[1:, 0] = inf
+    D1 = D0[1:, 1:]  # view
+    for i in range(r):
+        for j in range(c):
+            if isinf(w) or (max(0, i - w) <= j <= min(c, i + w)):
+                D1[i, j] = dist(x[i], y[j])
+    C = D1.copy()
+    jrange = range(c)
+    for i in range(r):
+        if not isinf(w):
+            jrange = range(max(0, i - w), min(c, i + w + 1))
+        for j in jrange:
+            min_list = [D0[i, j]]
+            for k in range(1, warp + 1):
+                i_k = min(i + k, r)
+                j_k = min(j + k, c)
+                min_list += [D0[i_k, j] * s, D0[i, j_k] * s]
+            D1[i, j] += min(min_list)
+    if len(x) == 1:
+        path = zeros(len(y)), range(len(y))
+    elif len(y) == 1:
+        path = range(len(x)), zeros(len(x))
+    else:
+        path = _traceback(D0)
+    return D1[-1, -1], C, D1, path
+
+
+def accelerated_dtw(x, y, dist, warp=1):
+    """
+    Computes Dynamic Time Warping (DTW) of two sequences in a faster way.
+    Instead of iterating through each element and calculating each distance,
+    this uses the cdist function from scipy (https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.cdist.html)
+
+    :param array x: N1*M array
+    :param array y: N2*M array
+    :param string or func dist: distance parameter for cdist. When string is given, cdist uses optimized functions for the distance metrics.
+    If a string is passed, the distance function can be 'braycurtis', 'canberra', 'chebyshev', 'cityblock', 'correlation', 'cosine', 'dice', 'euclidean', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'wminkowski', 'yule'.
+    :param int warp: how many shifts are computed.
+    Returns the minimum distance, the cost matrix, the accumulated cost matrix, and the wrap path.
+    """
+    assert len(x)
+    assert len(y)
+    if ndim(x) == 1:
+        x = x.reshape(-1, 1)
+    if ndim(y) == 1:
+        y = y.reshape(-1, 1)
+    r, c = len(x), len(y)
+    D0 = zeros((r + 1, c + 1))
+    D0[0, 1:] = inf
+    D0[1:, 0] = inf
+    D1 = D0[1:, 1:]
+    D0[1:, 1:] = cdist(x, y, dist)
+    C = D1.copy()
+    for i in range(r):
+        for j in range(c):
+            min_list = [D0[i, j]]
+            for k in range(1, warp + 1):
+                min_list += [D0[min(i + k, r), j], D0[i, min(j + k, c)]]
+            D1[i, j] += min(min_list)
+    if len(x) == 1:
+        path = zeros(len(y)), range(len(y))
+    elif len(y) == 1:
+        path = range(len(x)), zeros(len(x))
+    else:
+        path = _traceback(D0)
+    return D1[-1, -1], C, D1, path
+
+
+def _traceback(D):
+    i, j = array(D.shape) - 2
+    p, q = [i], [j]
+    while (i > 0) or (j > 0):
+        tb = argmin((D[i, j], D[i, j + 1], D[i + 1, j]))
+        if tb == 0:
+            i -= 1
+            j -= 1
+        elif tb == 1:
+            i -= 1
+        else:  # (tb == 2):
+            j -= 1
+        p.insert(0, i)
+        q.insert(0, j)
+    return array(p), array(q)
+
+
+if __name__ == "__main__":
+    w = inf
+    s = 1.0
+    if 1:  # 1-D numeric
+        from sklearn.metrics.pairwise import manhattan_distances
+
+        x = [0, 0, 1, 1, 2, 4, 2, 1, 2, 0]
+        y = [1, 1, 1, 2, 2, 2, 2, 3, 2, 0]
+        dist_fun = manhattan_distances
+        w = 1
+        # s = 1.2
+    elif 0:  # 2-D numeric
+        from sklearn.metrics.pairwise import euclidean_distances
+
+        x = [
+            [0, 0],
+            [0, 1],
+            [1, 1],
+            [1, 2],
+            [2, 2],
+            [4, 3],
+            [2, 3],
+            [1, 1],
+            [2, 2],
+            [0, 1],
+        ]
+        y = [
+            [1, 0],
+            [1, 1],
+            [1, 1],
+            [2, 1],
+            [4, 3],
+            [4, 3],
+            [2, 3],
+            [3, 1],
+            [1, 2],
+            [1, 0],
+        ]
+        dist_fun = euclidean_distances
+    else:  # 1-D list of strings
+        from nltk.metrics.distance import edit_distance
+
+        # x = ['we', 'shelled', 'clams', 'for', 'the', 'chowder']
+        # y = ['class', 'too']
+        x = ["i", "soon", "found", "myself", "muttering", "to", "the", "walls"]
+        y = ["see", "drown", "himself"]
+        # x = 'we talked about the situation'.split()
+        # y = 'we talked about the situation'.split()
+        dist_fun = edit_distance
+    dist, cost, acc, path = dtw(x, y, dist_fun, w=w, s=s)
+
+    # Vizualize
+    from matplotlib import pyplot as plt
+
+    plt.imshow(cost.T, origin="lower", cmap=plt.cm.Reds, interpolation="nearest")
+    plt.plot(path[0], path[1], "-o")  # relation
+    plt.xticks(range(len(x)), x)
+    plt.yticks(range(len(y)), y)
+    plt.xlabel("x")
+    plt.ylabel("y")
+    plt.axis("tight")
+    if isinf(w):
+        plt.title("Minimum distance: {}, slope weight: {}".format(dist, s))
+    else:
+        plt.title(
+            "Minimum distance: {}, window widht: {}, slope weight: {}".format(
+                dist, w, s
+            )
+        )
+    plt.show()
--- a/utils/losses.py
+++ b/utils/losses.py
@ -0,0 +1,111 @@
+# This source code is provided for the purposes of scientific reproducibility
+# under the following limited license from Element AI Inc. The code is an
+# implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis
+# expansion analysis for interpretable time series forecasting,
+# https://arxiv.org/abs/1905.10437). The copyright to the source code is
+# licensed under the Creative Commons - Attribution-NonCommercial 4.0
+# International license (CC BY-NC 4.0):
+# https://creativecommons.org/licenses/by-nc/4.0/.  Any commercial use (whether
+# for the benefit of third parties or internally in production) requires an
+# explicit license. The subject-matter of the N-BEATS model and associated
+# materials are the property of Element AI Inc. and may be subject to patent
+# protection. No license to patents is granted hereunder (whether express or
+# implied). Copyright © 2020 Element AI Inc. All rights reserved.
+
+"""
+Loss functions for PyTorch.
+"""
+
+import torch as t
+import torch.nn as nn
+import numpy as np
+import pdb
+
+
+def divide_no_nan(a, b):
+    """
+    a/b where the resulted NaN or Inf are replaced by 0.
+    """
+    result = a / b
+    result[result != result] = 0.0
+    result[result == np.inf] = 0.0
+    return result
+
+
+class mape_loss(nn.Module):
+    def __init__(self):
+        super(mape_loss, self).__init__()
+
+    def forward(
+        self,
+        insample: t.Tensor,
+        freq: int,
+        forecast: t.Tensor,
+        target: t.Tensor,
+        mask: t.Tensor,
+    ) -> t.float:
+        """
+        MAPE loss as defined in: https://en.wikipedia.org/wiki/Mean_absolute_percentage_error
+
+        :param forecast: Forecast values. Shape: batch, time
+        :param target: Target values. Shape: batch, time
+        :param mask: 0/1 mask. Shape: batch, time
+        :return: Loss value
+        """
+        weights = divide_no_nan(mask, target)
+        return t.mean(t.abs((forecast - target) * weights))
+
+
+class smape_loss(nn.Module):
+    def __init__(self):
+        super(smape_loss, self).__init__()
+
+    def forward(
+        self,
+        insample: t.Tensor,
+        freq: int,
+        forecast: t.Tensor,
+        target: t.Tensor,
+        mask: t.Tensor,
+    ) -> t.float:
+        """
+        sMAPE loss as defined in https://robjhyndman.com/hyndsight/smape/ (Makridakis 1993)
+
+        :param forecast: Forecast values. Shape: batch, time
+        :param target: Target values. Shape: batch, time
+        :param mask: 0/1 mask. Shape: batch, time
+        :return: Loss value
+        """
+        return 200 * t.mean(
+            divide_no_nan(
+                t.abs(forecast - target), t.abs(forecast.data) + t.abs(target.data)
+            )
+            * mask
+        )
+
+
+class mase_loss(nn.Module):
+    def __init__(self):
+        super(mase_loss, self).__init__()
+
+    def forward(
+        self,
+        insample: t.Tensor,
+        freq: int,
+        forecast: t.Tensor,
+        target: t.Tensor,
+        mask: t.Tensor,
+    ) -> t.float:
+        """
+        MASE loss as defined in "Scaled Errors" https://robjhyndman.com/papers/mase.pdf
+
+        :param insample: Insample values. Shape: batch, time_i
+        :param freq: Frequency value
+        :param forecast: Forecast values. Shape: batch, time_o
+        :param target: Target values. Shape: batch, time_o
+        :param mask: 0/1 mask. Shape: batch, time_o
+        :return: Loss value
+        """
+        masep = t.mean(t.abs(insample[:, freq:] - insample[:, :-freq]), dim=1)
+        masked_masep_inv = divide_no_nan(mask, masep[:, None])
+        return t.mean(t.abs(target - forecast) * masked_masep_inv)
--- a/utils/m4_summary.py
+++ b/utils/m4_summary.py
@ -0,0 +1,175 @@
+# This source code is provided for the purposes of scientific reproducibility
+# under the following limited license from Element AI Inc. The code is an
+# implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis
+# expansion analysis for interpretable time series forecasting,
+# https://arxiv.org/abs/1905.10437). The copyright to the source code is
+# licensed under the Creative Commons - Attribution-NonCommercial 4.0
+# International license (CC BY-NC 4.0):
+# https://creativecommons.org/licenses/by-nc/4.0/.  Any commercial use (whether
+# for the benefit of third parties or internally in production) requires an
+# explicit license. The subject-matter of the N-BEATS model and associated
+# materials are the property of Element AI Inc. and may be subject to patent
+# protection. No license to patents is granted hereunder (whether express or
+# implied). Copyright 2020 Element AI Inc. All rights reserved.
+
+"""
+M4 Summary
+"""
+
+from collections import OrderedDict
+
+import numpy as np
+import pandas as pd
+
+from data_provider.m4 import M4Dataset
+from data_provider.m4 import M4Meta
+import os
+
+
+def group_values(values, groups, group_name):
+    return np.array([v[~np.isnan(v)] for v in values[groups == group_name]])
+
+
+def mase(forecast, insample, outsample, frequency):
+    return np.mean(np.abs(forecast - outsample)) / np.mean(
+        np.abs(insample[:-frequency] - insample[frequency:])
+    )
+
+
+def smape_2(forecast, target):
+    denom = np.abs(target) + np.abs(forecast)
+    # divide by 1.0 instead of 0.0, in case when denom is zero the enumerator will be 0.0 anyway.
+    denom[denom == 0.0] = 1.0
+    return 200 * np.abs(forecast - target) / denom
+
+
+def mape(forecast, target):
+    denom = np.abs(target)
+    # divide by 1.0 instead of 0.0, in case when denom is zero the enumerator will be 0.0 anyway.
+    denom[denom == 0.0] = 1.0
+    return 100 * np.abs(forecast - target) / denom
+
+
+class M4Summary:
+    def __init__(self, file_path, root_path):
+        self.file_path = file_path
+        self.training_set = M4Dataset.load(training=True, dataset_file=root_path)
+        self.test_set = M4Dataset.load(training=False, dataset_file=root_path)
+        self.naive_path = os.path.join(root_path, "submission-Naive2.csv")
+
+    def evaluate(self):
+        """
+        Evaluate forecasts using M4 test dataset.
+
+        :param forecast: Forecasts. Shape: timeseries, time.
+        :return: sMAPE and OWA grouped by seasonal patterns.
+        """
+        grouped_owa = OrderedDict()
+
+        naive2_forecasts = pd.read_csv(self.naive_path).values[:, 1:].astype(np.float32)
+        naive2_forecasts = np.array([v[~np.isnan(v)] for v in naive2_forecasts])
+
+        model_mases = {}
+        naive2_smapes = {}
+        naive2_mases = {}
+        grouped_smapes = {}
+        grouped_mapes = {}
+        for group_name in M4Meta.seasonal_patterns:
+            file_name = self.file_path + group_name + "_forecast.csv"
+            if os.path.exists(file_name):
+                model_forecast = pd.read_csv(file_name).values
+
+            naive2_forecast = group_values(
+                naive2_forecasts, self.test_set.groups, group_name
+            )
+            target = group_values(
+                self.test_set.values, self.test_set.groups, group_name
+            )
+            # all timeseries within group have same frequency
+            frequency = self.training_set.frequencies[
+                self.test_set.groups == group_name
+            ][0]
+            insample = group_values(
+                self.training_set.values, self.test_set.groups, group_name
+            )
+
+            model_mases[group_name] = np.mean(
+                [
+                    mase(
+                        forecast=model_forecast[i],
+                        insample=insample[i],
+                        outsample=target[i],
+                        frequency=frequency,
+                    )
+                    for i in range(len(model_forecast))
+                ]
+            )
+            naive2_mases[group_name] = np.mean(
+                [
+                    mase(
+                        forecast=naive2_forecast[i],
+                        insample=insample[i],
+                        outsample=target[i],
+                        frequency=frequency,
+                    )
+                    for i in range(len(model_forecast))
+                ]
+            )
+
+            naive2_smapes[group_name] = np.mean(smape_2(naive2_forecast, target))
+            grouped_smapes[group_name] = np.mean(
+                smape_2(forecast=model_forecast, target=target)
+            )
+            grouped_mapes[group_name] = np.mean(
+                mape(forecast=model_forecast, target=target)
+            )
+
+        grouped_smapes = self.summarize_groups(grouped_smapes)
+        grouped_mapes = self.summarize_groups(grouped_mapes)
+        grouped_model_mases = self.summarize_groups(model_mases)
+        grouped_naive2_smapes = self.summarize_groups(naive2_smapes)
+        grouped_naive2_mases = self.summarize_groups(naive2_mases)
+        for k in grouped_model_mases.keys():
+            grouped_owa[k] = (
+                grouped_model_mases[k] / grouped_naive2_mases[k]
+                + grouped_smapes[k] / grouped_naive2_smapes[k]
+            ) / 2
+
+        def round_all(d):
+            return dict(map(lambda kv: (kv[0], np.round(kv[1], 3)), d.items()))
+
+        return (
+            round_all(grouped_smapes),
+            round_all(grouped_owa),
+            round_all(grouped_mapes),
+            round_all(grouped_model_mases),
+        )
+
+    def summarize_groups(self, scores):
+        """
+        Re-group scores respecting M4 rules.
+        :param scores: Scores per group.
+        :return: Grouped scores.
+        """
+        scores_summary = OrderedDict()
+
+        def group_count(group_name):
+            return len(np.where(self.test_set.groups == group_name)[0])
+
+        weighted_score = {}
+        for g in ["Yearly", "Quarterly", "Monthly"]:
+            weighted_score[g] = scores[g] * group_count(g)
+            scores_summary[g] = scores[g]
+
+        others_score = 0
+        others_count = 0
+        for g in ["Weekly", "Daily", "Hourly"]:
+            others_score += scores[g] * group_count(g)
+            others_count += group_count(g)
+        weighted_score["Others"] = others_score
+        scores_summary["Others"] = others_score / others_count
+
+        average = np.sum(list(weighted_score.values())) / len(self.test_set.groups)
+        scores_summary["Average"] = average
+
+        return scores_summary
--- a/utils/masking.py
+++ b/utils/masking.py
@ -0,0 +1,28 @@
+import torch
+
+
+class TriangularCausalMask:
+    def __init__(self, B, L, device="cpu"):
+        mask_shape = [B, 1, L, L]
+        with torch.no_grad():
+            self._mask = torch.triu(
+                torch.ones(mask_shape, dtype=torch.bool), diagonal=1
+            ).to(device)
+
+    @property
+    def mask(self):
+        return self._mask
+
+
+class ProbMask:
+    def __init__(self, B, H, L, index, scores, device="cpu"):
+        _mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1)
+        _mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1])
+        indicator = _mask_ex[
+            torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :
+        ].to(device)
+        self._mask = indicator.view(scores.shape).to(device)
+
+    @property
+    def mask(self):
+        return self._mask
--- a/utils/metrics.py
+++ b/utils/metrics.py
@ -0,0 +1,43 @@
+import numpy as np
+
+
+def RSE(pred, true):
+    return np.sqrt(np.sum((true - pred) ** 2)) / np.sqrt(
+        np.sum((true - true.mean()) ** 2)
+    )
+
+
+def CORR(pred, true):
+    u = ((true - true.mean(0)) * (pred - pred.mean(0))).sum(0)
+    d = np.sqrt(((true - true.mean(0)) ** 2 * (pred - pred.mean(0)) ** 2).sum(0))
+    return (u / d).mean(-1)
+
+
+def MAE(pred, true):
+    return np.mean(np.abs(true - pred))
+
+
+def MSE(pred, true):
+    return np.mean((true - pred) ** 2)
+
+
+def RMSE(pred, true):
+    return np.sqrt(MSE(pred, true))
+
+
+def MAPE(pred, true):
+    return np.mean(np.abs((true - pred) / true))
+
+
+def MSPE(pred, true):
+    return np.mean(np.square((true - pred) / true))
+
+
+def metric(pred, true):
+    mae = MAE(pred, true)
+    mse = MSE(pred, true)
+    rmse = RMSE(pred, true)
+    mape = MAPE(pred, true)
+    mspe = MSPE(pred, true)
+
+    return mae, mse, rmse, mape, mspe
--- a/utils/print_args.py
+++ b/utils/print_args.py
@ -0,0 +1,74 @@
+def print_args(args):
+    print("\033[1m" + "Basic Config" + "\033[0m")
+    print(
+        f"  {'Task Name:':<20}{args.task_name:<20}{'Is Training:':<20}{args.is_training:<20}"
+    )
+    print(f"  {'Model ID:':<20}{args.model_id:<20}{'Model:':<20}{args.model:<20}")
+    print()
+
+    print("\033[1m" + "Data Loader" + "\033[0m")
+    print(f"  {'Data:':<20}{args.data:<20}{'Root Path:':<20}{args.root_path:<20}")
+    print(
+        f"  {'Data Path:':<20}{args.data_path:<20}{'Features:':<20}{args.features:<20}"
+    )
+    print(f"  {'Target:':<20}{args.target:<20}{'Freq:':<20}{args.freq:<20}")
+    print(f"  {'Checkpoints:':<20}{args.checkpoints:<20}")
+    print()
+
+    if args.task_name in ["long_term_forecast", "short_term_forecast"]:
+        print("\033[1m" + "Forecasting Task" + "\033[0m")
+        print(
+            f"  {'Seq Len:':<20}{args.seq_len:<20}{'Label Len:':<20}{args.label_len:<20}"
+        )
+        print(
+            f"  {'Pred Len:':<20}{args.pred_len:<20}{'Seasonal Patterns:':<20}{args.seasonal_patterns:<20}"
+        )
+        print(f"  {'Inverse:':<20}{args.inverse:<20}")
+        print()
+
+    if args.task_name == "imputation":
+        print("\033[1m" + "Imputation Task" + "\033[0m")
+        print(f"  {'Mask Rate:':<20}{args.mask_rate:<20}")
+        print()
+
+    if args.task_name == "anomaly_detection":
+        print("\033[1m" + "Anomaly Detection Task" + "\033[0m")
+        print(f"  {'Anomaly Ratio:':<20}{args.anomaly_ratio:<20}")
+        print()
+
+    print("\033[1m" + "Model Parameters" + "\033[0m")
+    print(f"  {'Top k:':<20}{args.top_k:<20}{'Num Kernels:':<20}{args.num_kernels:<20}")
+    print(f"  {'Enc In:':<20}{args.enc_in:<20}{'Dec In:':<20}{args.dec_in:<20}")
+    print(f"  {'C Out:':<20}{args.c_out:<20}{'d model:':<20}{args.d_model:<20}")
+    print(f"  {'n heads:':<20}{args.n_heads:<20}{'e layers:':<20}{args.e_layers:<20}")
+    print(f"  {'d layers:':<20}{args.d_layers:<20}{'d FF:':<20}{args.d_ff:<20}")
+    print(f"  {'Moving Avg:':<20}{args.moving_avg:<20}{'Factor:':<20}{args.factor:<20}")
+    print(f"  {'Distil:':<20}{args.distil:<20}{'Dropout:':<20}{args.dropout:<20}")
+    print(f"  {'Embed:':<20}{args.embed:<20}{'Activation:':<20}{args.activation:<20}")
+    print()
+
+    print("\033[1m" + "Run Parameters" + "\033[0m")
+    print(f"  {'Num Workers:':<20}{args.num_workers:<20}{'Itr:':<20}{args.itr:<20}")
+    print(
+        f"  {'Train Epochs:':<20}{args.train_epochs:<20}{'Batch Size:':<20}{args.batch_size:<20}"
+    )
+    print(
+        f"  {'Patience:':<20}{args.patience:<20}{'Learning Rate:':<20}{args.learning_rate:<20}"
+    )
+    print(f"  {'Des:':<20}{args.des:<20}{'Loss:':<20}{args.loss:<20}")
+    print(f"  {'Lradj:':<20}{args.lradj:<20}{'Use Amp:':<20}{args.use_amp:<20}")
+    print()
+
+    print("\033[1m" + "GPU" + "\033[0m")
+    print(f"  {'Use GPU:':<20}{args.use_gpu:<20}{'GPU:':<20}{args.gpu:<20}")
+    print(
+        f"  {'Use Multi GPU:':<20}{args.use_multi_gpu:<20}{'Devices:':<20}{args.devices:<20}"
+    )
+    print()
+
+    print("\033[1m" + "De-stationary Projector Params" + "\033[0m")
+    p_hidden_dims_str = ", ".join(map(str, args.p_hidden_dims))
+    print(
+        f"  {'P Hidden Dims:':<20}{p_hidden_dims_str:<20}{'P Hidden Layers:':<20}{args.p_hidden_layers:<20}"
+    )
+    print()
--- a/utils/timefeatures.py
+++ b/utils/timefeatures.py
@ -0,0 +1,148 @@
+# From: gluonts/src/gluonts/time_feature/_base.py
+# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License").
+# You may not use this file except in compliance with the License.
+# A copy of the License is located at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# or in the "license" file accompanying this file. This file is distributed
+# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied. See the License for the specific language governing
+# permissions and limitations under the License.
+
+from typing import List
+
+import numpy as np
+import pandas as pd
+from pandas.tseries import offsets
+from pandas.tseries.frequencies import to_offset
+
+
+class TimeFeature:
+    def __init__(self):
+        pass
+
+    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
+        pass
+
+    def __repr__(self):
+        return self.__class__.__name__ + "()"
+
+
+class SecondOfMinute(TimeFeature):
+    """Minute of hour encoded as value between [-0.5, 0.5]"""
+
+    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
+        return index.second / 59.0 - 0.5
+
+
+class MinuteOfHour(TimeFeature):
+    """Minute of hour encoded as value between [-0.5, 0.5]"""
+
+    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
+        return index.minute / 59.0 - 0.5
+
+
+class HourOfDay(TimeFeature):
+    """Hour of day encoded as value between [-0.5, 0.5]"""
+
+    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
+        return index.hour / 23.0 - 0.5
+
+
+class DayOfWeek(TimeFeature):
+    """Hour of day encoded as value between [-0.5, 0.5]"""
+
+    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
+        return index.dayofweek / 6.0 - 0.5
+
+
+class DayOfMonth(TimeFeature):
+    """Day of month encoded as value between [-0.5, 0.5]"""
+
+    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
+        return (index.day - 1) / 30.0 - 0.5
+
+
+class DayOfYear(TimeFeature):
+    """Day of year encoded as value between [-0.5, 0.5]"""
+
+    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
+        return (index.dayofyear - 1) / 365.0 - 0.5
+
+
+class MonthOfYear(TimeFeature):
+    """Month of year encoded as value between [-0.5, 0.5]"""
+
+    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
+        return (index.month - 1) / 11.0 - 0.5
+
+
+class WeekOfYear(TimeFeature):
+    """Week of year encoded as value between [-0.5, 0.5]"""
+
+    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
+        return (index.isocalendar().week - 1) / 52.0 - 0.5
+
+
+def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]:
+    """
+    Returns a list of time features that will be appropriate for the given frequency string.
+    Parameters
+    ----------
+    freq_str
+        Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc.
+    """
+
+    features_by_offsets = {
+        offsets.YearEnd: [],
+        offsets.QuarterEnd: [MonthOfYear],
+        offsets.MonthEnd: [MonthOfYear],
+        offsets.Week: [DayOfMonth, WeekOfYear],
+        offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear],
+        offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear],
+        offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear],
+        offsets.Minute: [
+            MinuteOfHour,
+            HourOfDay,
+            DayOfWeek,
+            DayOfMonth,
+            DayOfYear,
+        ],
+        offsets.Second: [
+            SecondOfMinute,
+            MinuteOfHour,
+            HourOfDay,
+            DayOfWeek,
+            DayOfMonth,
+            DayOfYear,
+        ],
+    }
+
+    offset = to_offset(freq_str)
+
+    for offset_type, feature_classes in features_by_offsets.items():
+        if isinstance(offset, offset_type):
+            return [cls() for cls in feature_classes]
+
+    supported_freq_msg = f"""
+    Unsupported frequency {freq_str}
+    The following frequencies are supported:
+        Y   - yearly
+            alias: A
+        M   - monthly
+        W   - weekly
+        D   - daily
+        B   - business days
+        H   - hourly
+        T   - minutely
+            alias: min
+        S   - secondly
+    """
+    raise RuntimeError(supported_freq_msg)
+
+
+def time_features(dates, freq="h"):
+    return np.vstack([feat(dates) for feat in time_features_from_frequency_str(freq)])
--- a/utils/tools.py
+++ b/utils/tools.py
@ -0,0 +1,128 @@
+import os
+
+import numpy as np
+import torch
+import matplotlib.pyplot as plt
+import pandas as pd
+import math
+
+plt.switch_backend("agg")
+
+
+def adjust_learning_rate(optimizer, epoch, args):
+    # lr = args.learning_rate * (0.2 ** (epoch // 2))
+    if args.lradj == "type1":
+        lr_adjust = {epoch: args.learning_rate * (0.5 ** ((epoch - 1) // 1))}
+    elif args.lradj == "type2":
+        lr_adjust = {2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6, 10: 5e-7, 15: 1e-7, 20: 5e-8}
+    elif args.lradj == "type3":
+        lr_adjust = {
+            epoch: args.learning_rate
+            if epoch < 3
+            else args.learning_rate * (0.9 ** ((epoch - 3) // 1))
+        }
+    elif args.lradj == "cosine":
+        lr_adjust = {
+            epoch: args.learning_rate
+            / 2
+            * (1 + math.cos(epoch / args.train_epochs * math.pi))
+        }
+    if epoch in lr_adjust.keys():
+        lr = lr_adjust[epoch]
+        for param_group in optimizer.param_groups:
+            param_group["lr"] = lr
+        print("Updating learning rate to {}".format(lr))
+
+
+class EarlyStopping:
+    def __init__(self, patience=7, verbose=False, delta=0):
+        self.patience = patience
+        self.verbose = verbose
+        self.counter = 0
+        self.best_score = None
+        self.early_stop = False
+        self.val_loss_min = np.Inf
+        self.delta = delta
+
+    def __call__(self, val_loss, model, path):
+        score = -val_loss
+        if self.best_score is None:
+            self.best_score = score
+            self.save_checkpoint(val_loss, model, path)
+        elif score < self.best_score + self.delta:
+            self.counter += 1
+            print(f"EarlyStopping counter: {self.counter} out of {self.patience}")
+            if self.counter >= self.patience:
+                self.early_stop = True
+        else:
+            self.best_score = score
+            self.save_checkpoint(val_loss, model, path)
+            self.counter = 0
+
+    def save_checkpoint(self, val_loss, model, path):
+        if self.verbose:
+            print(
+                f"Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ..."
+            )
+        torch.save(model.state_dict(), path + "/" + "checkpoint.pth")
+        self.val_loss_min = val_loss
+
+
+class dotdict(dict):
+    """dot.notation access to dictionary attributes"""
+
+    __getattr__ = dict.get
+    __setattr__ = dict.__setitem__
+    __delattr__ = dict.__delitem__
+
+
+class StandardScaler:
+    def __init__(self, mean, std):
+        self.mean = mean
+        self.std = std
+
+    def transform(self, data):
+        return (data - self.mean) / self.std
+
+    def inverse_transform(self, data):
+        return (data * self.std) + self.mean
+
+
+def visual(true, preds=None, name="./pic/test.pdf"):
+    """
+    Results visualization
+    """
+    plt.figure()
+    if preds is not None:
+        plt.plot(preds, label="Prediction", linewidth=2)
+    plt.plot(true, label="GroundTruth", linewidth=2)
+    plt.legend()
+    plt.savefig(name, bbox_inches="tight")
+
+
+def adjustment(gt, pred):
+    anomaly_state = False
+    for i in range(len(gt)):
+        if gt[i] == 1 and pred[i] == 1 and not anomaly_state:
+            anomaly_state = True
+            for j in range(i, 0, -1):
+                if gt[j] == 0:
+                    break
+                else:
+                    if pred[j] == 0:
+                        pred[j] = 1
+            for j in range(i, len(gt)):
+                if gt[j] == 0:
+                    break
+                else:
+                    if pred[j] == 0:
+                        pred[j] = 1
+        elif gt[i] == 0:
+            anomaly_state = False
+        if anomaly_state:
+            pred[i] = 1
+    return gt, pred
+
+
+def cal_accuracy(y_pred, y_true):
+    return np.mean(y_pred == y_true)