Add files via upload
This commit is contained in:
commit
bfb8dc3614
|
|
@ -0,0 +1,128 @@
|
|||
import numpy as np
|
||||
import os
|
||||
import torch
|
||||
|
||||
|
||||
def data_provider(args):
|
||||
data = {}
|
||||
for category in ['train', 'val', 'test']:
|
||||
|
||||
cat_data = np.load(os.path.join(args.root_path, args.data_path, category + '.npz'),allow_pickle=True)
|
||||
|
||||
data['x_' + category] = torch.Tensor(cat_data['x'].astype(float)).to(torch.device(args.device))
|
||||
data['y_' + category] = torch.Tensor(cat_data['y'].astype(float)).to(torch.device(args.device))
|
||||
|
||||
data['train_loader'] = Data_Loader(data['x_train'], data['y_train'], args.batch_size)
|
||||
data['val_loader'] = Data_Loader(data['x_val'], data['y_val'], args.batch_size)
|
||||
data['test_loader'] = Data_Loader(data['x_test'], data['y_test'], args.batch_size)
|
||||
train_loader = data['train_loader']
|
||||
vali_loader = data['val_loader']
|
||||
test_loader = data['test_loader']
|
||||
|
||||
return train_loader, vali_loader, test_loader
|
||||
|
||||
|
||||
|
||||
|
||||
class Data_Loader(object):
|
||||
def __init__(self, xs, ys, batch_size, pad_with_last_sample=False):
|
||||
"""
|
||||
:param xs:
|
||||
:param ys:
|
||||
:param batch_size:
|
||||
:param pad_with_last_sample: pad with the last sample to make number of samples divisible to batch_size.
|
||||
"""
|
||||
self.batch_size = batch_size
|
||||
self.current_ind = 0
|
||||
if pad_with_last_sample:
|
||||
num_padding = (batch_size - (len(xs) % batch_size)) % batch_size
|
||||
x_padding = np.repeat(xs[-1:], num_padding, axis=0)
|
||||
y_padding = np.repeat(ys[-1:], num_padding, axis=0)
|
||||
xs = np.concatenate([xs, x_padding], axis=0)
|
||||
ys = np.concatenate([ys, y_padding], axis=0)
|
||||
self.size = len(xs)
|
||||
self.num_batch = int(self.size // self.batch_size)
|
||||
self.xs = xs
|
||||
self.ys = ys
|
||||
|
||||
def shuffle(self):
|
||||
permutation = np.random.permutation(self.size)
|
||||
xs, ys = self.xs[permutation], self.ys[permutation]
|
||||
self.xs = xs
|
||||
self.ys = ys
|
||||
|
||||
def get_iterator(self):
|
||||
self.current_ind = 0
|
||||
|
||||
def _wrapper():
|
||||
while self.current_ind < self.num_batch:
|
||||
start_ind = self.batch_size * self.current_ind
|
||||
end_ind = min(self.size, self.batch_size * (self.current_ind + 1))
|
||||
x_i = self.xs[start_ind: end_ind, ...]
|
||||
y_i = self.ys[start_ind: end_ind, ...]
|
||||
yield (x_i, y_i)
|
||||
self.current_ind += 1
|
||||
|
||||
return _wrapper()
|
||||
|
||||
|
||||
class StandardScaler():
|
||||
"""
|
||||
Standard the input
|
||||
"""
|
||||
|
||||
def __init__(self, mean, std):
|
||||
self.mean = mean
|
||||
self.std = std
|
||||
|
||||
def transform(self, data):
|
||||
return (data - self.mean) / self.std
|
||||
|
||||
def inverse_transform(self, data):
|
||||
return (data * self.std) + self.mean
|
||||
|
||||
|
||||
|
||||
|
||||
class Data_Loader(object):
|
||||
def __init__(self, xs, ys, batch_size, pad_with_last_sample=False):
|
||||
"""
|
||||
:param xs:
|
||||
:param ys:
|
||||
:param batch_size:
|
||||
:param pad_with_last_sample: pad with the last sample to make number of samples divisible to batch_size.
|
||||
"""
|
||||
self.batch_size = batch_size
|
||||
self.current_ind = 0
|
||||
if pad_with_last_sample:
|
||||
num_padding = (batch_size - (len(xs) % batch_size)) % batch_size
|
||||
x_padding = np.repeat(xs[-1:], num_padding, axis=0)
|
||||
y_padding = np.repeat(ys[-1:], num_padding, axis=0)
|
||||
xs = np.concatenate([xs, x_padding], axis=0)
|
||||
ys = np.concatenate([ys, y_padding], axis=0)
|
||||
self.size = len(xs)
|
||||
self.num_batch = int(self.size // self.batch_size)
|
||||
self.xs = xs
|
||||
self.ys = ys
|
||||
|
||||
def shuffle(self):
|
||||
permutation = np.random.permutation(self.size)
|
||||
xs, ys = self.xs[permutation], self.ys[permutation]
|
||||
self.xs = xs
|
||||
self.ys = ys
|
||||
|
||||
def get_iterator(self):
|
||||
self.current_ind = 0
|
||||
|
||||
def _wrapper():
|
||||
while self.current_ind < self.num_batch:
|
||||
start_ind = self.batch_size * self.current_ind
|
||||
end_ind = min(self.size, self.batch_size * (self.current_ind + 1))
|
||||
x_i = self.xs[start_ind: end_ind, ...]
|
||||
y_i = self.ys[start_ind: end_ind, ...]
|
||||
yield (x_i, y_i)
|
||||
self.current_ind += 1
|
||||
|
||||
return _wrapper()
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,330 @@
|
|||
import os
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import os
|
||||
import torch
|
||||
from torch.utils.data import Dataset, DataLoader
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from utils.timefeatures import time_features
|
||||
from utils.tools import convert_tsf_to_dataframe
|
||||
import warnings
|
||||
from pathlib import Path
|
||||
|
||||
warnings.filterwarnings('ignore')
|
||||
|
||||
|
||||
|
||||
class Dataset_Custom(Dataset):
|
||||
def __init__(self, root_path, flag='train', size=None,
|
||||
features='S', data_path='ETTh1.csv',
|
||||
target='OT', scale=True, timeenc=0, freq='h',
|
||||
percent=10, max_len=-1, train_all=False):
|
||||
# size [seq_len, label_len, pred_len]
|
||||
# info
|
||||
if size == None:
|
||||
self.seq_len = 24 * 4 * 4
|
||||
self.label_len = 24 * 4
|
||||
self.pred_len = 24 * 4
|
||||
else:
|
||||
self.seq_len = size[0]
|
||||
self.label_len = size[1]
|
||||
self.pred_len = size[2]
|
||||
# init
|
||||
assert flag in ['train', 'test', 'val']
|
||||
type_map = {'train': 0, 'val': 1, 'test': 2}
|
||||
self.set_type = type_map[flag]
|
||||
|
||||
self.features = features
|
||||
self.target = target
|
||||
self.scale = scale
|
||||
self.timeenc = timeenc
|
||||
self.freq = freq
|
||||
self.percent = percent
|
||||
|
||||
self.root_path = root_path
|
||||
self.data_path = data_path
|
||||
self.__read_data__()
|
||||
|
||||
self.enc_in = self.data_x.shape[-1]
|
||||
self.tot_len = len(self.data_x) - self.seq_len - self.pred_len + 1
|
||||
|
||||
def __read_data__(self):
|
||||
self.scaler = StandardScaler()
|
||||
df_raw = pd.read_csv(os.path.join(self.root_path,
|
||||
self.data_path))
|
||||
|
||||
'''
|
||||
df_raw.columns: ['date', ...(other features), target feature]
|
||||
'''
|
||||
cols = list(df_raw.columns)
|
||||
cols.remove(self.target)
|
||||
cols.remove('date')
|
||||
df_raw = df_raw[['date'] + cols + [self.target]]
|
||||
# print(cols)
|
||||
num_train = int(len(df_raw) * 0.7)
|
||||
num_test = int(len(df_raw) * 0.2)
|
||||
num_vali = len(df_raw) - num_train - num_test
|
||||
border1s = [0, num_train - self.seq_len, len(df_raw) - num_test - self.seq_len]
|
||||
border2s = [num_train, num_train + num_vali, len(df_raw)]
|
||||
border1 = border1s[self.set_type]
|
||||
border2 = border2s[self.set_type]
|
||||
|
||||
if self.set_type == 0:
|
||||
border2 = (border2 - self.seq_len) * self.percent // 100 + self.seq_len
|
||||
|
||||
if self.features == 'M' or self.features == 'MS':
|
||||
cols_data = df_raw.columns[1:]
|
||||
df_data = df_raw[cols_data]
|
||||
elif self.features == 'S':
|
||||
df_data = df_raw[[self.target]]
|
||||
|
||||
if self.scale:
|
||||
train_data = df_data[border1s[0]:border2s[0]]
|
||||
self.scaler.fit(train_data.values)
|
||||
data = self.scaler.transform(df_data.values)
|
||||
else:
|
||||
data = df_data.values
|
||||
|
||||
df_stamp = df_raw[['date']][border1:border2]
|
||||
df_stamp['date'] = pd.to_datetime(df_stamp.date)
|
||||
if self.timeenc == 0:
|
||||
df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
|
||||
df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
|
||||
df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
|
||||
df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
|
||||
data_stamp = df_stamp.drop(['date'], 1).values
|
||||
elif self.timeenc == 1:
|
||||
data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
|
||||
data_stamp = data_stamp.transpose(1, 0)
|
||||
|
||||
self.data_x = data[border1:border2]
|
||||
self.data_y = data[border1:border2]
|
||||
self.data_stamp = data_stamp
|
||||
|
||||
def __getitem__(self, index):
|
||||
feat_id = index // self.tot_len
|
||||
s_begin = index % self.tot_len
|
||||
|
||||
s_end = s_begin + self.seq_len
|
||||
r_begin = s_end - self.label_len
|
||||
r_end = r_begin + self.label_len + self.pred_len
|
||||
seq_x = self.data_x[s_begin:s_end, feat_id:feat_id+1]
|
||||
seq_y = self.data_y[r_begin:r_end, feat_id:feat_id+1]
|
||||
seq_x_mark = self.data_stamp[s_begin:s_end]
|
||||
seq_y_mark = self.data_stamp[r_begin:r_end]
|
||||
|
||||
return seq_x, seq_y, seq_x_mark, seq_y_mark
|
||||
|
||||
def __len__(self):
|
||||
return (len(self.data_x) - self.seq_len - self.pred_len + 1) * self.enc_in
|
||||
|
||||
def inverse_transform(self, data):
|
||||
return self.scaler.inverse_transform(data)
|
||||
|
||||
|
||||
class Dataset_Pred(Dataset):
|
||||
def __init__(self, root_path, flag='pred', size=None,
|
||||
features='S', data_path='ETTh1.csv',
|
||||
target='OT', scale=True, inverse=False, timeenc=0, freq='15min', cols=None,
|
||||
percent=None, train_all=False):
|
||||
# size [seq_len, label_len, pred_len]
|
||||
# info
|
||||
if size == None:
|
||||
self.seq_len = 24 * 4 * 4
|
||||
self.label_len = 24 * 4
|
||||
self.pred_len = 24 * 4
|
||||
else:
|
||||
self.seq_len = size[0]
|
||||
self.label_len = size[1]
|
||||
self.pred_len = size[2]
|
||||
# init
|
||||
assert flag in ['pred']
|
||||
|
||||
self.features = features
|
||||
self.target = target
|
||||
self.scale = scale
|
||||
self.inverse = inverse
|
||||
self.timeenc = timeenc
|
||||
self.freq = freq
|
||||
self.cols = cols
|
||||
self.root_path = root_path
|
||||
self.data_path = data_path
|
||||
self.__read_data__()
|
||||
|
||||
def __read_data__(self):
|
||||
self.scaler = StandardScaler()
|
||||
df_raw = pd.read_csv(os.path.join(self.root_path,
|
||||
self.data_path))
|
||||
'''
|
||||
df_raw.columns: ['date', ...(other features), target feature]
|
||||
'''
|
||||
if self.cols:
|
||||
cols = self.cols.copy()
|
||||
cols.remove(self.target)
|
||||
else:
|
||||
cols = list(df_raw.columns)
|
||||
cols.remove(self.target)
|
||||
cols.remove('date')
|
||||
df_raw = df_raw[['date'] + cols + [self.target]]
|
||||
border1 = len(df_raw) - self.seq_len
|
||||
border2 = len(df_raw)
|
||||
|
||||
if self.features == 'M' or self.features == 'MS':
|
||||
cols_data = df_raw.columns[1:]
|
||||
df_data = df_raw[cols_data]
|
||||
elif self.features == 'S':
|
||||
df_data = df_raw[[self.target]]
|
||||
|
||||
if self.scale:
|
||||
self.scaler.fit(df_data.values)
|
||||
data = self.scaler.transform(df_data.values)
|
||||
else:
|
||||
data = df_data.values
|
||||
|
||||
tmp_stamp = df_raw[['date']][border1:border2]
|
||||
tmp_stamp['date'] = pd.to_datetime(tmp_stamp.date)
|
||||
pred_dates = pd.date_range(tmp_stamp.date.values[-1], periods=self.pred_len + 1, freq=self.freq)
|
||||
|
||||
df_stamp = pd.DataFrame(columns=['date'])
|
||||
df_stamp.date = list(tmp_stamp.date.values) + list(pred_dates[1:])
|
||||
if self.timeenc == 0:
|
||||
df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
|
||||
df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
|
||||
df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
|
||||
df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
|
||||
df_stamp['minute'] = df_stamp.date.apply(lambda row: row.minute, 1)
|
||||
df_stamp['minute'] = df_stamp.minute.map(lambda x: x // 15)
|
||||
data_stamp = df_stamp.drop(['date'], 1).values
|
||||
elif self.timeenc == 1:
|
||||
data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
|
||||
data_stamp = data_stamp.transpose(1, 0)
|
||||
|
||||
self.data_x = data[border1:border2]
|
||||
if self.inverse:
|
||||
self.data_y = df_data.values[border1:border2]
|
||||
else:
|
||||
self.data_y = data[border1:border2]
|
||||
self.data_stamp = data_stamp
|
||||
|
||||
def __getitem__(self, index):
|
||||
s_begin = index
|
||||
s_end = s_begin + self.seq_len
|
||||
r_begin = s_end - self.label_len
|
||||
r_end = r_begin + self.label_len + self.pred_len
|
||||
|
||||
seq_x = self.data_x[s_begin:s_end]
|
||||
if self.inverse:
|
||||
seq_y = self.data_x[r_begin:r_begin + self.label_len]
|
||||
else:
|
||||
seq_y = self.data_y[r_begin:r_begin + self.label_len]
|
||||
seq_x_mark = self.data_stamp[s_begin:s_end]
|
||||
seq_y_mark = self.data_stamp[r_begin:r_end]
|
||||
|
||||
return seq_x, seq_y, seq_x_mark, seq_y_mark
|
||||
|
||||
def __len__(self):
|
||||
return len(self.data_x) - self.seq_len + 1
|
||||
|
||||
def inverse_transform(self, data):
|
||||
return self.scaler.inverse_transform(data)
|
||||
|
||||
|
||||
class Dataset_TSF(Dataset):
|
||||
def __init__(self, root_path, flag='train', size=None,
|
||||
features='S', data_path=None,
|
||||
target='OT', scale=True, timeenc=0, freq='Daily',
|
||||
percent=10, max_len=-1, train_all=False):
|
||||
|
||||
self.train_all = train_all
|
||||
|
||||
self.seq_len = size[0]
|
||||
self.pred_len = size[2]
|
||||
type_map = {'train': 0, 'val': 1, 'test': 2}
|
||||
self.set_type = type_map[flag]
|
||||
|
||||
self.percent = percent
|
||||
self.max_len = max_len
|
||||
if self.max_len == -1:
|
||||
self.max_len = 1e8
|
||||
|
||||
self.root_path = root_path
|
||||
self.data_path = data_path
|
||||
self.timeseries = self.__read_data__()
|
||||
|
||||
|
||||
def __read_data__(self):
|
||||
df, frequency, forecast_horizon, contain_missing_values, contain_equal_length = convert_tsf_to_dataframe(os.path.join(self.root_path,
|
||||
self.data_path))
|
||||
self.freq = frequency
|
||||
def dropna(x):
|
||||
return x[~np.isnan(x)]
|
||||
timeseries = [dropna(ts).astype(np.float32) for ts in df.series_value]
|
||||
|
||||
self.tot_len = 0
|
||||
self.len_seq = []
|
||||
self.seq_id = []
|
||||
for i in range(len(timeseries)):
|
||||
res_len = max(self.pred_len + self.seq_len - timeseries[i].shape[0], 0)
|
||||
pad_zeros = np.zeros(res_len)
|
||||
timeseries[i] = np.hstack([pad_zeros, timeseries[i]])
|
||||
|
||||
_len = timeseries[i].shape[0]
|
||||
train_len = _len-self.pred_len
|
||||
if self.train_all:
|
||||
border1s = [0, 0, train_len-self.seq_len]
|
||||
border2s = [train_len, train_len, _len]
|
||||
else:
|
||||
border1s = [0, train_len - self.seq_len - self.pred_len, train_len-self.seq_len]
|
||||
border2s = [train_len - self.pred_len, train_len, _len]
|
||||
border2s[0] = (border2s[0] - self.seq_len) * self.percent // 100 + self.seq_len
|
||||
# print("_len = {}".format(_len))
|
||||
|
||||
curr_len = border2s[self.set_type] - max(border1s[self.set_type], 0) - self.pred_len - self.seq_len + 1
|
||||
curr_len = max(0, curr_len)
|
||||
|
||||
self.len_seq.append(np.zeros(curr_len) + self.tot_len)
|
||||
self.seq_id.append(np.zeros(curr_len) + i)
|
||||
self.tot_len += curr_len
|
||||
|
||||
self.len_seq = np.hstack(self.len_seq)
|
||||
self.seq_id = np.hstack(self.seq_id)
|
||||
|
||||
return timeseries
|
||||
|
||||
def __getitem__(self, index):
|
||||
len_seq = self.len_seq[index]
|
||||
seq_id = int(self.seq_id[index])
|
||||
index = index - int(len_seq)
|
||||
|
||||
_len = self.timeseries[seq_id].shape[0]
|
||||
train_len = _len - self.pred_len
|
||||
if self.train_all:
|
||||
border1s = [0, 0, train_len-self.seq_len]
|
||||
border2s = [train_len, train_len, _len]
|
||||
else:
|
||||
border1s = [0, train_len - self.seq_len - self.pred_len, train_len-self.seq_len]
|
||||
border2s = [train_len - self.pred_len, train_len, _len]
|
||||
border2s[0] = (border2s[0] - self.seq_len) * self.percent // 100 + self.seq_len
|
||||
|
||||
s_begin = index + border1s[self.set_type]
|
||||
s_end = s_begin + self.seq_len
|
||||
r_begin = s_end
|
||||
r_end = r_begin + self.pred_len
|
||||
if self.set_type == 2:
|
||||
s_end = -self.pred_len
|
||||
|
||||
data_x = self.timeseries[seq_id][s_begin:s_end]
|
||||
data_y = self.timeseries[seq_id][r_begin:r_end]
|
||||
data_x = np.expand_dims(data_x, axis=-1)
|
||||
data_y = np.expand_dims(data_y, axis=-1)
|
||||
# if self.set_type == 2:
|
||||
# print("data_x.shape = {}, data_y.shape = {}".format(data_x.shape, data_y.shape))
|
||||
|
||||
return data_x, data_y, data_x, data_y
|
||||
|
||||
def __len__(self):
|
||||
if self.set_type == 0:
|
||||
# return self.tot_len
|
||||
return min(self.max_len, self.tot_len)
|
||||
else:
|
||||
return self.tot_len
|
||||
|
|
@ -0,0 +1,154 @@
|
|||
import os
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from pydmd import DMD, MrDMD
|
||||
|
||||
def load_data(file_path):
|
||||
"""
|
||||
Loads the solar energy data from a CSV file.
|
||||
|
||||
Args:
|
||||
file_path (str): Path to the CSV file containing the solar energy data.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Data array with shape (num_samples, num_nodes, 1).
|
||||
"""
|
||||
df = pd.read_csv(file_path, index_col='time')
|
||||
data = df.values
|
||||
return np.expand_dims(np.asarray(data), axis=-1)
|
||||
|
||||
def generate_offsets(seq_length_x, seq_length_y):
|
||||
"""
|
||||
Generates the x and y offsets based on the given sequence lengths.
|
||||
|
||||
Args:
|
||||
seq_length_x (int): Length of the input sequence.
|
||||
seq_length_y (int): Length of the output sequence.
|
||||
|
||||
Returns:
|
||||
tuple: x_offsets, y_offsets arrays.
|
||||
"""
|
||||
x_offsets = np.sort(np.concatenate((np.arange(-(seq_length_x - 1), 1, 1),)))
|
||||
y_offsets = np.sort(np.arange(1, seq_length_y + 1, 1))
|
||||
return x_offsets, y_offsets
|
||||
|
||||
def fit_dmd_model(data, svd_rank=-1, max_level=2, max_cycles=3):
|
||||
"""
|
||||
Fits a DMD model to the input data.
|
||||
|
||||
Args:
|
||||
data (np.ndarray): Input data for DMD model fitting.
|
||||
svd_rank (int): Rank of the singular value decomposition. Default is -1 for auto-selection.
|
||||
max_level (int): Maximum level for MrDMD. Default is 2.
|
||||
max_cycles (int): Maximum number of cycles for MrDMD. Default is 3.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Reconstructed data after DMD fitting.
|
||||
"""
|
||||
base_dmd = DMD(svd_rank=svd_rank)
|
||||
dmd = MrDMD(dmd=base_dmd, max_level=max_level, max_cycles=max_cycles)
|
||||
dmd.fit(data.T)
|
||||
reconstructed = dmd.reconstructed_data.T
|
||||
return reconstructed
|
||||
|
||||
def prepare_data(data, x_offsets, y_offsets):
|
||||
"""
|
||||
Prepares the input and output sequences from the given data.
|
||||
|
||||
Args:
|
||||
data (np.ndarray): The input data array.
|
||||
x_offsets (np.ndarray): Offsets for the input sequence.
|
||||
y_offsets (np.ndarray): Offsets for the output sequence.
|
||||
|
||||
Returns:
|
||||
tuple: x (input sequences), y (output sequences).
|
||||
"""
|
||||
num_samples = data.shape[0]
|
||||
min_t = abs(min(x_offsets))
|
||||
max_t = abs(num_samples - abs(max(y_offsets))) # Exclusive
|
||||
|
||||
x, y = [], []
|
||||
for t in range(min_t, max_t): # t is the index of the last observation.
|
||||
x.append(data[t + x_offsets, ...])
|
||||
y.append(data[t + y_offsets, ...])
|
||||
|
||||
x = np.stack(x, axis=0, dtype='complex64')
|
||||
y = np.stack(y, axis=0, dtype='complex64')
|
||||
|
||||
return x.transpose(0, 2, 1, 3), y.transpose(0, 2, 1, 3)
|
||||
|
||||
def split_data(x, y, train_ratio=0.7, val_ratio=0.2):
|
||||
"""
|
||||
Splits the data into training, validation, and test sets.
|
||||
|
||||
Args:
|
||||
x (np.ndarray): Input sequences.
|
||||
y (np.ndarray): Output sequences.
|
||||
train_ratio (float): Ratio of data for training. Default is 0.7.
|
||||
val_ratio (float): Ratio of data for validation. Default is 0.2.
|
||||
|
||||
Returns:
|
||||
tuple: x_train, y_train, x_val, y_val, x_test, y_test
|
||||
"""
|
||||
num_samples = x.shape[0]
|
||||
num_train = round(num_samples * train_ratio)
|
||||
num_val = round(num_samples * val_ratio)
|
||||
num_test = num_samples - num_train - num_val
|
||||
|
||||
x_train, y_train = x[:num_train], y[:num_train]
|
||||
x_val, y_val = x[num_train:num_train + num_val], y[num_train:num_train + num_val]
|
||||
x_test, y_test = x[-num_test:], y[-num_test:]
|
||||
|
||||
return x_train, y_train, x_val, y_val, x_test, y_test
|
||||
|
||||
def save_data(x, y, x_offsets, y_offsets, save_dir, dataset_type):
|
||||
"""
|
||||
Saves the prepared data as compressed .npz files.
|
||||
|
||||
Args:
|
||||
x (np.ndarray): Input sequences.
|
||||
y (np.ndarray): Output sequences.
|
||||
x_offsets (np.ndarray): x_offsets array.
|
||||
y_offsets (np.ndarray): y_offsets array.
|
||||
save_dir (str): Directory where the data will be saved.
|
||||
dataset_type (str): The type of dataset (train/val/test).
|
||||
"""
|
||||
np.savez_compressed(
|
||||
os.path.join(save_dir, f"{dataset_type}.npz"),
|
||||
x=x,
|
||||
y=y,
|
||||
x_offsets=x_offsets.reshape(list(x_offsets.shape) + [1]),
|
||||
y_offsets=y_offsets.reshape(list(y_offsets.shape) + [1]),
|
||||
)
|
||||
|
||||
def main():
|
||||
# Configuration
|
||||
data_file = './Solar-energy_data.csv'
|
||||
save_dir = './solar-energy'
|
||||
seq_length_x, seq_length_y = 24, 24
|
||||
|
||||
# Data loading and preprocessing
|
||||
data = load_data(data_file)
|
||||
x_offsets, y_offsets = generate_offsets(seq_length_x, seq_length_y)
|
||||
|
||||
# DMD model fitting
|
||||
reconstructed = fit_dmd_model(data)
|
||||
|
||||
# Prepare the final data for training
|
||||
feature_list = [data, reconstructed, data - reconstructed]
|
||||
data = np.concatenate(feature_list, axis=-1)
|
||||
|
||||
# Prepare sequences
|
||||
x, y = prepare_data(data, x_offsets, y_offsets)
|
||||
|
||||
# Split the data into train, val, test sets
|
||||
x_train, y_train, x_val, y_val, x_test, y_test = split_data(x, y)
|
||||
|
||||
# Save the datasets
|
||||
for dataset_type, _x, _y in zip(["train", "val", "test"], [x_train, x_val, x_test], [y_train, y_val, y_test]):
|
||||
save_data(_x, _y, x_offsets, y_offsets, save_dir, dataset_type)
|
||||
|
||||
print("Data preparation and saving completed!")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -0,0 +1,105 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
def gumbel_softmax(logits, tau=1, k=1000, hard=True):
|
||||
|
||||
y_soft = F.gumbel_softmax(logits, tau, hard)
|
||||
|
||||
if hard:
|
||||
# 生成硬掩码
|
||||
_, indices = y_soft.topk(k, dim=0) # 选择Top-K
|
||||
y_hard = torch.zeros_like(logits)
|
||||
y_hard.scatter_(0, indices, 1)
|
||||
return torch.squeeze(y_hard, dim=-1)
|
||||
return torch.squeeze(y_soft, dim=-1)
|
||||
|
||||
|
||||
|
||||
class Normalize(nn.Module):
|
||||
def __init__(self, num_features: int, eps=1e-5, affine=False, subtract_last=False, non_norm=False):
|
||||
"""
|
||||
:param num_features: the number of features or channels
|
||||
:param eps: a value added for numerical stability
|
||||
:param affine: if True, RevIN has learnable affine parameters
|
||||
"""
|
||||
super(Normalize, self).__init__()
|
||||
self.num_features = num_features
|
||||
self.eps = eps
|
||||
self.affine = affine
|
||||
self.subtract_last = subtract_last
|
||||
self.non_norm = non_norm
|
||||
if self.affine:
|
||||
self._init_params()
|
||||
|
||||
def forward(self, x, mode: str):
|
||||
if mode == 'norm':
|
||||
self._get_statistics(x)
|
||||
x = self._normalize(x)
|
||||
elif mode == 'denorm':
|
||||
x = self._denormalize(x)
|
||||
else:
|
||||
raise NotImplementedError
|
||||
return x
|
||||
|
||||
def _init_params(self):
|
||||
# initialize RevIN params: (C,)
|
||||
self.affine_weight = nn.Parameter(torch.ones(self.num_features))
|
||||
self.affine_bias = nn.Parameter(torch.zeros(self.num_features))
|
||||
|
||||
def _get_statistics(self, x):
|
||||
dim2reduce = tuple(range(1, x.ndim - 1))
|
||||
if self.subtract_last:
|
||||
self.last = x[:, -1, :].unsqueeze(1)
|
||||
else:
|
||||
self.mean = torch.mean(x, dim=dim2reduce, keepdim=True).detach()
|
||||
self.stdev = torch.sqrt(torch.var(x, dim=dim2reduce, keepdim=True, unbiased=False) + self.eps).detach()
|
||||
|
||||
def _normalize(self, x):
|
||||
if self.non_norm:
|
||||
return x
|
||||
if self.subtract_last:
|
||||
x = x - self.last
|
||||
else:
|
||||
x = x - self.mean
|
||||
x = x / self.stdev
|
||||
if self.affine:
|
||||
x = x * self.affine_weight
|
||||
x = x + self.affine_bias
|
||||
return x
|
||||
|
||||
def _denormalize(self, x):
|
||||
if self.non_norm:
|
||||
return x
|
||||
if self.affine:
|
||||
x = x - self.affine_bias
|
||||
x = x / (self.affine_weight + self.eps * self.eps)
|
||||
x = x * self.stdev
|
||||
if self.subtract_last:
|
||||
x = x + self.last
|
||||
else:
|
||||
x = x + self.mean
|
||||
return x
|
||||
|
||||
|
||||
class MultiLayerPerceptron(nn.Module):
|
||||
"""Multi-Layer Perceptron with residual links."""
|
||||
|
||||
def __init__(self, input_dim, hidden_dim) -> None:
|
||||
super().__init__()
|
||||
self.fc1 = nn.Conv2d(
|
||||
in_channels=input_dim, out_channels=hidden_dim, kernel_size=(1, 1), bias=True)
|
||||
self.fc2 = nn.Conv2d(
|
||||
in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=(1, 1), bias=True)
|
||||
self.act = nn.ReLU()
|
||||
self.drop = nn.Dropout(p=0.15)
|
||||
|
||||
def forward(self, input_data: torch.Tensor) -> torch.Tensor:
|
||||
"""
|
||||
input_data (torch.Tensor): input data with shape [B, D, N]
|
||||
"""
|
||||
|
||||
hidden = self.fc2(self.drop(self.act(self.fc1(input_data)))) # MLP
|
||||
hidden = hidden + input_data # residual
|
||||
return hidden
|
||||
|
|
@ -0,0 +1,98 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from torch import Tensor
|
||||
|
||||
|
||||
class ReplicationPad1d(nn.Module):
|
||||
def __init__(self, padding) -> None:
|
||||
super(ReplicationPad1d, self).__init__()
|
||||
self.padding = padding
|
||||
|
||||
def forward(self, input: Tensor) -> Tensor:
|
||||
replicate_padding = input[:, :, :, -1].unsqueeze(-1).repeat(1, 1, 1, self.padding[-1])
|
||||
output = torch.cat([input, replicate_padding], dim=-1)
|
||||
return output
|
||||
|
||||
class TokenEmbedding(nn.Module):
|
||||
def __init__(self, c_in, d_model):
|
||||
super(TokenEmbedding, self).__init__()
|
||||
padding = 1
|
||||
self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model,
|
||||
kernel_size=3, padding=padding, padding_mode='circular', bias=False)
|
||||
self.confusion_layer = nn.Linear(12, 1)
|
||||
# if air_quality
|
||||
# self.confusion_layer = nn.Linear(42, 1)
|
||||
|
||||
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv1d):
|
||||
nn.init.kaiming_normal_(
|
||||
m.weight, mode='fan_in', nonlinearity='leaky_relu')
|
||||
|
||||
def forward(self, x):
|
||||
b, n, m, pn, pl = x.shape
|
||||
x = self.tokenConv(x.reshape(b*n, pl, m*pn))
|
||||
|
||||
x = self.confusion_layer(x)
|
||||
return x.reshape(b, n, -1)
|
||||
|
||||
|
||||
class PatchEmbedding(nn.Module):
|
||||
def __init__(self, d_model, patch_len, stride, dropout):
|
||||
super(PatchEmbedding, self).__init__()
|
||||
# Patching
|
||||
self.patch_len = patch_len
|
||||
self.stride = stride
|
||||
self.padding_patch_layer = ReplicationPad1d((0, stride))
|
||||
self.value_embedding = TokenEmbedding(patch_len, d_model)
|
||||
self.dropout = nn.Dropout(dropout)
|
||||
|
||||
def forward(self, x):
|
||||
|
||||
n_vars = x.shape[2]
|
||||
x = self.padding_patch_layer(x)
|
||||
x = x.unfold(dimension=-1, size=self.patch_len, step=self.stride)
|
||||
x_value_embed = self.value_embedding(x)
|
||||
|
||||
return self.dropout(x_value_embed), n_vars
|
||||
|
||||
class ReprogrammingLayer(nn.Module):
|
||||
def __init__(self, d_model, n_heads, d_keys=None, d_llm=None, attention_dropout=0.1):
|
||||
super(ReprogrammingLayer, self).__init__()
|
||||
|
||||
d_keys = d_keys or (d_model // n_heads)
|
||||
|
||||
self.query_projection = nn.Linear(d_model, d_keys * n_heads)
|
||||
self.key_projection = nn.Linear(d_llm, d_keys * n_heads)
|
||||
self.value_projection = nn.Linear(d_llm, d_keys * n_heads)
|
||||
self.out_projection = nn.Linear(d_keys * n_heads, d_llm)
|
||||
self.n_heads = n_heads
|
||||
self.dropout = nn.Dropout(attention_dropout)
|
||||
|
||||
def forward(self, target_embedding, source_embedding, value_embedding):
|
||||
B, L, _ = target_embedding.shape
|
||||
S, _ = source_embedding.shape
|
||||
H = self.n_heads
|
||||
|
||||
target_embedding = self.query_projection(target_embedding).view(B, L, H, -1)
|
||||
source_embedding = self.key_projection(source_embedding).view(S, H, -1)
|
||||
value_embedding = self.value_projection(value_embedding).view(S, H, -1)
|
||||
|
||||
out = self.reprogramming(target_embedding, source_embedding, value_embedding)
|
||||
out = out.reshape(B, L, -1)
|
||||
|
||||
return self.out_projection(out)
|
||||
|
||||
def reprogramming(self, target_embedding, source_embedding, value_embedding):
|
||||
B, L, H, E = target_embedding.shape
|
||||
|
||||
scale = 1. / sqrt(E)
|
||||
|
||||
scores = torch.einsum("blhe,she->bhls", target_embedding, source_embedding)
|
||||
|
||||
A = self.dropout(torch.softmax(scale * scores, dim=-1))
|
||||
reprogramming_embedding = torch.einsum("bhls,she->blhe", A, value_embedding)
|
||||
|
||||
return reprogramming_embedding
|
||||
|
||||
|
|
@ -0,0 +1,102 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
from math import sqrt
|
||||
from transformers.models.gpt2.modeling_gpt2 import GPT2Model
|
||||
from transformers import GPT2Model, GPT2Config
|
||||
|
||||
from einops import rearrange
|
||||
from reprogramming import *
|
||||
from normalizer import *
|
||||
|
||||
class repst(nn.Module):
|
||||
|
||||
def __init__(self, configs, device):
|
||||
super(repst, self).__init__()
|
||||
|
||||
self.device = device
|
||||
self.pred_len = configs.pred_len
|
||||
self.seq_len = configs.seq_len
|
||||
self.patch_len = configs.patch_len
|
||||
self.stride = configs.stride
|
||||
self.dropout = configs.dropout
|
||||
self.gpt_layers = configs.gpt_layers
|
||||
self.d_ff = configs.d_ff # output mapping dimention
|
||||
|
||||
self.d_model = configs.d_model
|
||||
self.n_heads= configs.n_heads
|
||||
self.d_keys = None
|
||||
self.d_llm = 768
|
||||
|
||||
self.patch_nums = int((self.seq_len - self.patch_len) / self.stride + 2)
|
||||
self.head_nf = self.d_ff * self.patch_nums
|
||||
|
||||
self.patch_embedding = PatchEmbedding(self.d_model, self.patch_len, self.stride, self.dropout)
|
||||
|
||||
|
||||
|
||||
|
||||
self.gpts = GPT2Model.from_pretrained('./GPT-2', output_attentions=True, output_hidden_states=True)
|
||||
self.gpts.h = self.gpts.h[:self.gpt_layers]
|
||||
|
||||
self.gpts.apply(self.reset_parameters)
|
||||
|
||||
self.word_embeddings = self.gpts.get_input_embeddings().weight.to(self.device)
|
||||
self.vocab_size = self.word_embeddings.shape[0]
|
||||
self.num_tokens = 1000
|
||||
self.n_vars = 5
|
||||
|
||||
|
||||
self.normalize_layers = Normalize(num_features=1, affine=False)
|
||||
self.mapping_layer = nn.Linear(self.vocab_size, 1)
|
||||
|
||||
self.reprogramming_layer = ReprogrammingLayer(self.d_model, self.n_heads, self.d_keys, self.d_llm)
|
||||
|
||||
self.out_mlp = nn.Sequential(
|
||||
nn.Linear(self.d_llm, 128),
|
||||
nn.ReLU(),
|
||||
nn.Linear(128, self.pred_len)
|
||||
)
|
||||
|
||||
for i, (name, param) in enumerate(self.gpts.named_parameters()):
|
||||
if 'wpe' in name:
|
||||
param.requires_grad = True
|
||||
else:
|
||||
param.requires_grad = False
|
||||
|
||||
def reset_parameters(self, module):
|
||||
if hasattr(module, 'weight') and module.weight is not None:
|
||||
torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
|
||||
if hasattr(module, 'bias') and module.bias is not None:
|
||||
torch.nn.init.zeros_(module.bias)
|
||||
|
||||
|
||||
|
||||
def forward(self, x):
|
||||
|
||||
|
||||
x_enc = self.normalize_layers(x, 'norm')
|
||||
|
||||
x_enc = rearrange(x_enc, 'b n l m -> b n m l')
|
||||
enc_out, n_vars = self.patch_embedding(x_enc)
|
||||
|
||||
|
||||
embeddings = self.mapping_layer(self.word_embeddings.permute(1, 0)).permute(1, 0)
|
||||
masks = gumbel_softmax(self.mapping_layer.weight.data.permute(1,0))
|
||||
source_embeddings = self.word_embeddings[masks==1]
|
||||
|
||||
enc_out = self.reprogramming_layer(enc_out, source_embeddings, source_embeddings)
|
||||
|
||||
enc_out = self.gpts(inputs_embeds=enc_out).last_hidden_state
|
||||
|
||||
dec_out = self.out_mlp(enc_out)
|
||||
|
||||
outputs = dec_out.unsqueeze(dim=-1)
|
||||
outputs = outputs.repeat(1, 1, 1, n_vars)
|
||||
|
||||
dec_out = self.normalize_layers(outputs, 'denorm')
|
||||
|
||||
return dec_out
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,144 @@
|
|||
from data_provider.data_factory import data_provider
|
||||
from utils.former_tools import vali, test, masked_mae, EarlyStopping
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
from models.repst import repst
|
||||
import pickle
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch import optim
|
||||
import os
|
||||
import time
|
||||
import warnings
|
||||
|
||||
import argparse
|
||||
import random
|
||||
import logging
|
||||
|
||||
warnings.filterwarnings('ignore')
|
||||
|
||||
fix_seed = 2023
|
||||
random.seed(fix_seed)
|
||||
torch.manual_seed(fix_seed)
|
||||
np.random.seed(fix_seed)
|
||||
|
||||
parser = argparse.ArgumentParser(description='RePST')
|
||||
|
||||
parser.add_argument('--device', type=str, default='cuda:0')
|
||||
parser.add_argument('--checkpoints', type=str, default='./checkpoints/')
|
||||
|
||||
parser.add_argument('--root_path', type=str, default='path_to_data')
|
||||
parser.add_argument('--data_path', type=str, default='dataset_name')
|
||||
|
||||
parser.add_argument('--pred_len', type=int, default=24)
|
||||
parser.add_argument('--seq_len', type=int, default=24)
|
||||
|
||||
parser.add_argument('--decay_fac', type=float, default=0.75)
|
||||
parser.add_argument('--learning_rate', type=float, default=0.002)
|
||||
parser.add_argument('--batch_size', type=int, default=16)
|
||||
parser.add_argument('--num_workers', type=int, default=10)
|
||||
parser.add_argument('--train_epochs', type=int, default=100)
|
||||
parser.add_argument('--patience', type=int, default=20)
|
||||
|
||||
parser.add_argument('--gpt_layers', type=int, default=9)
|
||||
parser.add_argument('--d_model', type=int, default=64)
|
||||
parser.add_argument('--n_heads', type=int, default=1)
|
||||
parser.add_argument('--d_ff', type=int, default=128)
|
||||
parser.add_argument('--dropout', type=float, default=0.2)
|
||||
|
||||
parser.add_argument('--patch_len', type=int, default=6)
|
||||
|
||||
parser.add_argument('--stride', type=int, default=7)
|
||||
|
||||
parser.add_argument('--tmax', type=int, default=5)
|
||||
|
||||
|
||||
args = parser.parse_args()
|
||||
device = torch.device(args.device)
|
||||
|
||||
logging.basicConfig(filename="./log/{}.log".format(args.data_path), level=logging.INFO)
|
||||
logging.info(args)
|
||||
|
||||
rmses = []
|
||||
maes = []
|
||||
mapes = []
|
||||
|
||||
|
||||
|
||||
|
||||
train_loader, vali_loader, test_loader = data_provider(args)
|
||||
|
||||
|
||||
time_now = time.time()
|
||||
|
||||
model = repst(args, device).to(device)
|
||||
early_stopping = EarlyStopping(patience=args.patience, verbose=True)
|
||||
params = model.parameters()
|
||||
model_optim = torch.optim.Adam(params, lr=args.learning_rate)
|
||||
|
||||
# class SMAPE(nn.Module):
|
||||
# def __init__(self):
|
||||
# super(SMAPE, self).__init__()
|
||||
# def forward(self, pred, true):
|
||||
# return torch.mean(200 * torch.abs(pred - true) / (torch.abs(pred) + torch.abs(true) + 1e-8))
|
||||
# criterion = SMAPE()
|
||||
criterion = nn.MSELoss()
|
||||
|
||||
|
||||
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(model_optim, T_max=args.tmax, eta_min=1e-8)
|
||||
|
||||
path = "./checkpoints/{}_{}_{}".format(args.data_path, args.gpt_layers, args.learning_rate)
|
||||
if not os.path.exists(path):
|
||||
os.makedirs(path)
|
||||
for epoch in range(args.train_epochs):
|
||||
|
||||
iter_count = 0
|
||||
train_loss = []
|
||||
epoch_time = time.time()
|
||||
train_loader.shuffle()
|
||||
model_optim.zero_grad()
|
||||
for i, (x, y) in enumerate(train_loader.get_iterator()):
|
||||
|
||||
iter_count += 1
|
||||
|
||||
x = x.to(device)
|
||||
y = y.to(device)
|
||||
|
||||
outputs = model(x)
|
||||
outputs = outputs[..., 0]
|
||||
y = y[..., 0]
|
||||
|
||||
loss = criterion(outputs, y)
|
||||
|
||||
train_loss.append(loss.item())
|
||||
|
||||
if i % 100 == 0:
|
||||
print("iters: {}, loss: {}, time_cost: {}".format(i + 1, np.average(train_loss[-100:]), time.time() - epoch_time))
|
||||
logging.info("iters: {}, loss: {}, time_cost: {}".format(i + 1, np.average(train_loss[-100:]), time.time() - epoch_time))
|
||||
|
||||
loss.backward()
|
||||
|
||||
model_optim.step()
|
||||
model_optim.zero_grad()
|
||||
|
||||
logging.info("Epoch: {} cost time: {}".format(epoch , time.time() - epoch_time))
|
||||
print("Epoch: {} cost time: {}".format(epoch , time.time() - epoch_time))
|
||||
|
||||
train_loss = np.average(train_loss)
|
||||
vali_loss = vali(model, vali_loader, criterion, args, device)
|
||||
scheduler.step()
|
||||
|
||||
early_stopping(vali_loss, model, path)
|
||||
|
||||
if (epoch + 1) % 1 ==0:
|
||||
|
||||
print("------------------------------------")
|
||||
logging.info("------------------------------------")
|
||||
mae, mape, rmse = test(model, test_loader, args, device)
|
||||
log = 'On average over all horizons, Test MAE: {:.4f}, Test MAPE: {:.4f}, Test RMSE: {:.4f}'
|
||||
logging.info(log.format(mae,mape,rmse))
|
||||
print(log.format(mae,mape,rmse))
|
||||
|
||||
|
|
@ -0,0 +1,255 @@
|
|||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import matplotlib.pyplot as plt
|
||||
from tqdm import tqdm
|
||||
|
||||
from datetime import datetime
|
||||
from distutils.util import strtobool
|
||||
import pandas as pd
|
||||
|
||||
from utils.metrics import metric
|
||||
|
||||
plt.switch_backend('agg')
|
||||
|
||||
def adjust_learning_rate(optimizer, epoch, args):
|
||||
# lr = args.learning_rate * (0.2 ** (epoch // 2))
|
||||
# if args.decay_fac is None:
|
||||
# args.decay_fac = 0.5
|
||||
# if args.lradj == 'type1':
|
||||
# lr_adjust = {epoch: args.learning_rate * (args.decay_fac ** ((epoch - 1) // 1))}
|
||||
# elif args.lradj == 'type2':
|
||||
# lr_adjust = {
|
||||
# 2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6,
|
||||
# 10: 5e-7, 15: 1e-7, 20: 5e-8
|
||||
# }
|
||||
if args.lradj =='type1':
|
||||
lr_adjust = {epoch: args.learning_rate if epoch < 3 else args.learning_rate * (0.9 ** ((epoch - 3) // 1))}
|
||||
elif args.lradj =='type2':
|
||||
lr_adjust = {epoch: args.learning_rate * (args.decay_fac ** ((epoch - 1) // 1))}
|
||||
elif args.lradj =='type4':
|
||||
lr_adjust = {epoch: args.learning_rate * (args.decay_fac ** ((epoch) // 1))}
|
||||
else:
|
||||
args.learning_rate = 1e-4
|
||||
lr_adjust = {epoch: args.learning_rate if epoch < 3 else args.learning_rate * (0.9 ** ((epoch - 3) // 1))}
|
||||
print("lr_adjust = {}".format(lr_adjust))
|
||||
if epoch in lr_adjust.keys():
|
||||
lr = lr_adjust[epoch]
|
||||
for param_group in optimizer.param_groups:
|
||||
param_group['lr'] = lr
|
||||
print('Updating learning rate to {}'.format(lr))
|
||||
|
||||
|
||||
class EarlyStopping:
|
||||
def __init__(self, patience=7, verbose=False, delta=0):
|
||||
self.patience = patience
|
||||
self.verbose = verbose
|
||||
self.counter = 0
|
||||
self.best_score = None
|
||||
self.early_stop = False
|
||||
self.val_loss_min = np.Inf
|
||||
self.delta = delta
|
||||
|
||||
def __call__(self, val_loss, model, path):
|
||||
score = -val_loss
|
||||
if self.best_score is None:
|
||||
self.best_score = score
|
||||
self.save_checkpoint(val_loss, model, path)
|
||||
elif score < self.best_score + self.delta:
|
||||
self.counter += 1
|
||||
print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
|
||||
if self.counter >= self.patience:
|
||||
self.early_stop = True
|
||||
else:
|
||||
self.best_score = score
|
||||
self.save_checkpoint(val_loss, model, path)
|
||||
self.counter = 0
|
||||
|
||||
def save_checkpoint(self, val_loss, model, path):
|
||||
if self.verbose:
|
||||
print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')
|
||||
torch.save(model.state_dict(), path + '/' + 'checkpoint.pth')
|
||||
self.val_loss_min = val_loss
|
||||
|
||||
|
||||
class dotdict(dict):
|
||||
"""dot.notation access to dictionary attributes"""
|
||||
__getattr__ = dict.get
|
||||
__setattr__ = dict.__setitem__
|
||||
__delattr__ = dict.__delitem__
|
||||
|
||||
|
||||
class StandardScaler():
|
||||
def __init__(self, mean, std):
|
||||
self.mean = mean
|
||||
self.std = std
|
||||
|
||||
def transform(self, data):
|
||||
return (data - self.mean) / self.std
|
||||
|
||||
def inverse_transform(self, data):
|
||||
return (data * self.std) + self.mean
|
||||
|
||||
|
||||
def vali(model, vali_loader, criterion, args, device):
|
||||
total_loss = []
|
||||
|
||||
model.eval()
|
||||
|
||||
with torch.no_grad():
|
||||
for i, (batch_x, batch_y) in enumerate(vali_loader.get_iterator()):
|
||||
# batch_x = torch.squeeze(batch_x)
|
||||
# batch_y = torch.squeeze(batch_y)
|
||||
outputs = model(batch_x)
|
||||
|
||||
# encoder - decoder
|
||||
outputs = outputs[..., 0]
|
||||
batch_y = batch_y[..., 0]
|
||||
|
||||
# pred = outputs.detach().cpu()
|
||||
# true = batch_y.detach().cpu()
|
||||
pred = outputs
|
||||
true = batch_y
|
||||
|
||||
# loss = criterion(pred, true)
|
||||
loss = masked_mae(pred, true, 0.0)
|
||||
|
||||
total_loss.append(loss)
|
||||
# total_loss = np.average(total_loss)
|
||||
total_loss = torch.mean(torch.tensor(total_loss))
|
||||
|
||||
|
||||
model.train()
|
||||
|
||||
return total_loss
|
||||
|
||||
def MASE(x, freq, pred, true):
|
||||
masep = np.mean(np.abs(x[:, freq:] - x[:, :-freq]))
|
||||
return np.mean(np.abs(pred - true) / (masep + 1e-8))
|
||||
|
||||
def test(model, test_loader, args, device):
|
||||
preds = []
|
||||
trues = []
|
||||
# mases = []
|
||||
|
||||
model.eval()
|
||||
with torch.no_grad():
|
||||
for i, (batch_x, batch_y) in enumerate(test_loader.get_iterator()):
|
||||
|
||||
outputs = model(batch_x)
|
||||
|
||||
# encoder - decoder
|
||||
outputs = outputs[... , 0]
|
||||
batch_y = batch_y[... , 0]
|
||||
|
||||
# pred = outputs.detach().cpu().numpy()
|
||||
# true = batch_y.detach().cpu().numpy()
|
||||
pred = outputs
|
||||
true = batch_y
|
||||
|
||||
preds.append(pred)
|
||||
trues.append(true)
|
||||
|
||||
|
||||
# preds = torch.Tensor(preds)
|
||||
# trues = torch.Tensor(trues)
|
||||
preds = torch.stack(preds[:-1])
|
||||
trues = torch.stack(trues[:-1])
|
||||
|
||||
amae = []
|
||||
amape = []
|
||||
armse = []
|
||||
for i in range(args.pred_len):
|
||||
pred = preds[..., i]
|
||||
real = trues[..., i]
|
||||
|
||||
metric = metrics(pred,real)
|
||||
|
||||
log = 'Evaluate best model on test data for horizon {:d}, Test MAE: {:.4f}, Test MAPE: {:.4f}, Test RMSE: {:.4f}'
|
||||
print(log.format(i+1, metric[0], metric[1], metric[2]))
|
||||
amae.append(metric[0])
|
||||
amape.append(metric[1])
|
||||
armse.append(metric[2])
|
||||
|
||||
|
||||
# return np.mean(amae),np.mean(amape),np.mean(armse)
|
||||
return torch.mean(torch.tensor(amae)), torch.mean(torch.tensor(amape)), torch.mean(torch.tensor(armse))
|
||||
|
||||
|
||||
|
||||
def masked_mse(preds, labels, null_val=np.nan):
|
||||
if np.isnan(null_val):
|
||||
mask = ~torch.isnan(labels)
|
||||
else:
|
||||
mask = (labels!=null_val)
|
||||
mask = mask.float()
|
||||
mask /= torch.mean((mask))
|
||||
mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
|
||||
loss = (preds-labels)**2
|
||||
loss = loss * mask
|
||||
loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
|
||||
loss = (preds-labels)**2
|
||||
return torch.mean(loss)
|
||||
|
||||
def masked_rmse(preds, labels, null_val=np.nan):
|
||||
return torch.sqrt(masked_mse(preds=preds, labels=labels, null_val=null_val))
|
||||
|
||||
|
||||
def masked_mae(preds, labels, null_val=np.nan):
|
||||
if np.isnan(null_val):
|
||||
mask = ~torch.isnan(labels)
|
||||
else:
|
||||
mask = (labels!=null_val)
|
||||
mask = mask.float()
|
||||
mask /= torch.mean((mask))
|
||||
mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
|
||||
loss = torch.abs(preds-labels)
|
||||
loss = loss * mask
|
||||
loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
|
||||
loss = torch.abs(preds-labels)
|
||||
return torch.mean(loss)
|
||||
|
||||
|
||||
def masked_mape(preds, labels, null_val=np.nan):
|
||||
if np.isnan(null_val):
|
||||
mask = ~torch.isnan(labels)
|
||||
else:
|
||||
mask = (labels!=null_val)
|
||||
mask = mask.float()
|
||||
mask /= torch.mean((mask))
|
||||
mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
|
||||
loss = torch.abs(preds-labels)/labels
|
||||
loss = loss * mask
|
||||
loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
|
||||
loss = torch.abs(preds-labels)/labels
|
||||
return torch.mean(loss)
|
||||
|
||||
|
||||
def metrics(pred, real):
|
||||
mae = masked_mae(pred,real,0.0).item()
|
||||
mape = masked_mape(pred,real,0.0).item()
|
||||
rmse = masked_rmse(pred,real,0.0).item()
|
||||
return mae,mape,rmse
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# # import numpy as np
|
||||
# def cal_metrics(y_true, y_pred):
|
||||
# mse = torch.square(y_pred - y_true)
|
||||
# mse = torch.mean(mse)
|
||||
|
||||
|
||||
# # rmse = torch.square(np.abs(y_pred - y_true))
|
||||
# rmse = torch.sqrt(mse)
|
||||
|
||||
|
||||
|
||||
# mae = torch.abs(y_pred - y_true)
|
||||
# mae = torch.mean(mae)
|
||||
# return rmse, 0, mae
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
|
||||
import numpy as np
|
||||
|
||||
|
||||
def RSE(pred, true):
|
||||
return np.sqrt(np.sum((true - pred) ** 2)) / np.sqrt(np.sum((true - true.mean()) ** 2))
|
||||
|
||||
|
||||
def CORR(pred, true):
|
||||
u = ((true - true.mean(0)) * (pred - pred.mean(0))).sum(0)
|
||||
d = np.sqrt(((true - true.mean(0)) ** 2 * (pred - pred.mean(0)) ** 2).sum(0))
|
||||
return (u / d).mean(-1)
|
||||
|
||||
|
||||
def MAE(pred, true):
|
||||
return np.mean(np.abs(pred - true))
|
||||
|
||||
|
||||
def MSE(pred, true):
|
||||
return np.mean((pred - true) ** 2)
|
||||
|
||||
|
||||
def RMSE(pred, true):
|
||||
return np.sqrt(MSE(pred, true))
|
||||
|
||||
|
||||
def MAPE(pred, true):
|
||||
return np.mean(np.abs(100 * (pred - true) / (true +1e-8)))
|
||||
|
||||
|
||||
def MSPE(pred, true):
|
||||
return np.mean(np.square((pred - true) / (true + 1e-8)))
|
||||
|
||||
def SMAPE(pred, true):
|
||||
return np.mean(200 * np.abs(pred - true) / (np.abs(pred) + np.abs(true) + 1e-8))
|
||||
# return np.mean(200 * np.abs(pred - true) / (pred + true + 1e-8))
|
||||
|
||||
def ND(pred, true):
|
||||
return np.mean(np.abs(true - pred)) / np.mean(np.abs(true))
|
||||
|
||||
def metric(pred, true):
|
||||
mae = MAE(pred, true)
|
||||
mse = MSE(pred, true)
|
||||
rmse = RMSE(pred, true)
|
||||
mape = MAPE(pred, true)
|
||||
mspe = MSPE(pred, true)
|
||||
smape = SMAPE(pred, true)
|
||||
nd = ND(pred, true)
|
||||
|
||||
return mae, mse, rmse, mape, mspe, smape, nd
|
||||
|
|
@ -0,0 +1,134 @@
|
|||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from pandas.tseries import offsets
|
||||
from pandas.tseries.frequencies import to_offset
|
||||
|
||||
|
||||
class TimeFeature:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
|
||||
pass
|
||||
|
||||
def __repr__(self):
|
||||
return self.__class__.__name__ + "()"
|
||||
|
||||
|
||||
class SecondOfMinute(TimeFeature):
|
||||
"""Minute of hour encoded as value between [-0.5, 0.5]"""
|
||||
|
||||
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
|
||||
return index.second / 59.0 - 0.5
|
||||
|
||||
|
||||
class MinuteOfHour(TimeFeature):
|
||||
"""Minute of hour encoded as value between [-0.5, 0.5]"""
|
||||
|
||||
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
|
||||
return index.minute / 59.0 - 0.5
|
||||
|
||||
|
||||
class HourOfDay(TimeFeature):
|
||||
"""Hour of day encoded as value between [-0.5, 0.5]"""
|
||||
|
||||
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
|
||||
return index.hour / 23.0 - 0.5
|
||||
|
||||
|
||||
class DayOfWeek(TimeFeature):
|
||||
"""Hour of day encoded as value between [-0.5, 0.5]"""
|
||||
|
||||
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
|
||||
return index.dayofweek / 6.0 - 0.5
|
||||
|
||||
|
||||
class DayOfMonth(TimeFeature):
|
||||
"""Day of month encoded as value between [-0.5, 0.5]"""
|
||||
|
||||
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
|
||||
return (index.day - 1) / 30.0 - 0.5
|
||||
|
||||
|
||||
class DayOfYear(TimeFeature):
|
||||
"""Day of year encoded as value between [-0.5, 0.5]"""
|
||||
|
||||
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
|
||||
return (index.dayofyear - 1) / 365.0 - 0.5
|
||||
|
||||
|
||||
class MonthOfYear(TimeFeature):
|
||||
"""Month of year encoded as value between [-0.5, 0.5]"""
|
||||
|
||||
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
|
||||
return (index.month - 1) / 11.0 - 0.5
|
||||
|
||||
|
||||
class WeekOfYear(TimeFeature):
|
||||
"""Week of year encoded as value between [-0.5, 0.5]"""
|
||||
|
||||
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
|
||||
return (index.isocalendar().week - 1) / 52.0 - 0.5
|
||||
|
||||
|
||||
def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]:
|
||||
"""
|
||||
Returns a list of time features that will be appropriate for the given frequency string.
|
||||
Parameters
|
||||
----------
|
||||
freq_str
|
||||
Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc.
|
||||
"""
|
||||
|
||||
features_by_offsets = {
|
||||
offsets.YearEnd: [],
|
||||
offsets.QuarterEnd: [MonthOfYear],
|
||||
offsets.MonthEnd: [MonthOfYear],
|
||||
offsets.Week: [DayOfMonth, WeekOfYear],
|
||||
offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear],
|
||||
offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear],
|
||||
offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear],
|
||||
offsets.Minute: [
|
||||
MinuteOfHour,
|
||||
HourOfDay,
|
||||
DayOfWeek,
|
||||
DayOfMonth,
|
||||
DayOfYear,
|
||||
],
|
||||
offsets.Second: [
|
||||
SecondOfMinute,
|
||||
MinuteOfHour,
|
||||
HourOfDay,
|
||||
DayOfWeek,
|
||||
DayOfMonth,
|
||||
DayOfYear,
|
||||
],
|
||||
}
|
||||
|
||||
offset = to_offset(freq_str)
|
||||
|
||||
for offset_type, feature_classes in features_by_offsets.items():
|
||||
if isinstance(offset, offset_type):
|
||||
return [cls() for cls in feature_classes]
|
||||
|
||||
supported_freq_msg = f"""
|
||||
Unsupported frequency {freq_str}
|
||||
The following frequencies are supported:
|
||||
Y - yearly
|
||||
alias: A
|
||||
M - monthly
|
||||
W - weekly
|
||||
D - daily
|
||||
B - business days
|
||||
H - hourly
|
||||
T - minutely
|
||||
alias: min
|
||||
S - secondly
|
||||
"""
|
||||
raise RuntimeError(supported_freq_msg)
|
||||
|
||||
|
||||
def time_features(dates, freq='h'):
|
||||
return np.vstack([feat(dates) for feat in time_features_from_frequency_str(freq)])
|
||||
|
|
@ -0,0 +1,379 @@
|
|||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import matplotlib.pyplot as plt
|
||||
from tqdm import tqdm
|
||||
|
||||
from datetime import datetime
|
||||
from distutils.util import strtobool
|
||||
import pandas as pd
|
||||
|
||||
from utils.metrics import metric
|
||||
|
||||
plt.switch_backend('agg')
|
||||
|
||||
def adjust_learning_rate(optimizer, epoch, args):
|
||||
# lr = args.learning_rate * (0.2 ** (epoch // 2))
|
||||
# if args.decay_fac is None:
|
||||
# args.decay_fac = 0.5
|
||||
# if args.lradj == 'type1':
|
||||
# lr_adjust = {epoch: args.learning_rate * (args.decay_fac ** ((epoch - 1) // 1))}
|
||||
# elif args.lradj == 'type2':
|
||||
# lr_adjust = {
|
||||
# 2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6,
|
||||
# 10: 5e-7, 15: 1e-7, 20: 5e-8
|
||||
# }
|
||||
if args.lradj =='type1':
|
||||
lr_adjust = {epoch: args.learning_rate if epoch < 3 else args.learning_rate * (0.9 ** ((epoch - 3) // 1))}
|
||||
elif args.lradj =='type2':
|
||||
lr_adjust = {epoch: args.learning_rate * (args.decay_fac ** ((epoch - 1) // 1))}
|
||||
elif args.lradj =='type4':
|
||||
lr_adjust = {epoch: args.learning_rate * (args.decay_fac ** ((epoch) // 1))}
|
||||
else:
|
||||
args.learning_rate = 1e-4
|
||||
lr_adjust = {epoch: args.learning_rate if epoch < 3 else args.learning_rate * (0.9 ** ((epoch - 3) // 1))}
|
||||
print("lr_adjust = {}".format(lr_adjust))
|
||||
if epoch in lr_adjust.keys():
|
||||
lr = lr_adjust[epoch]
|
||||
for param_group in optimizer.param_groups:
|
||||
param_group['lr'] = lr
|
||||
print('Updating learning rate to {}'.format(lr))
|
||||
|
||||
|
||||
class EarlyStopping:
|
||||
def __init__(self, patience=7, verbose=False, delta=0):
|
||||
self.patience = patience
|
||||
self.verbose = verbose
|
||||
self.counter = 0
|
||||
self.best_score = None
|
||||
self.early_stop = False
|
||||
self.val_loss_min = np.Inf
|
||||
self.delta = delta
|
||||
|
||||
def __call__(self, val_loss, model, path):
|
||||
score = -val_loss
|
||||
if self.best_score is None:
|
||||
self.best_score = score
|
||||
self.save_checkpoint(val_loss, model, path)
|
||||
elif score < self.best_score + self.delta:
|
||||
self.counter += 1
|
||||
print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
|
||||
if self.counter >= self.patience:
|
||||
self.early_stop = True
|
||||
else:
|
||||
self.best_score = score
|
||||
self.save_checkpoint(val_loss, model, path)
|
||||
self.counter = 0
|
||||
|
||||
def save_checkpoint(self, val_loss, model, path):
|
||||
if self.verbose:
|
||||
print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')
|
||||
torch.save(model.state_dict(), path + '/' + 'checkpoint.pth')
|
||||
self.val_loss_min = val_loss
|
||||
|
||||
|
||||
class dotdict(dict):
|
||||
"""dot.notation access to dictionary attributes"""
|
||||
__getattr__ = dict.get
|
||||
__setattr__ = dict.__setitem__
|
||||
__delattr__ = dict.__delitem__
|
||||
|
||||
|
||||
class StandardScaler():
|
||||
def __init__(self, mean, std):
|
||||
self.mean = mean
|
||||
self.std = std
|
||||
|
||||
def transform(self, data):
|
||||
return (data - self.mean) / self.std
|
||||
|
||||
def inverse_transform(self, data):
|
||||
return (data * self.std) + self.mean
|
||||
|
||||
|
||||
def visual(true, preds=None, name='./pic/test.pdf'):
|
||||
"""
|
||||
Results visualization
|
||||
"""
|
||||
plt.figure()
|
||||
plt.plot(true, label='GroundTruth', linewidth=2)
|
||||
if preds is not None:
|
||||
plt.plot(preds, label='Prediction', linewidth=2)
|
||||
plt.legend()
|
||||
plt.savefig(name, bbox_inches='tight')
|
||||
|
||||
|
||||
def convert_tsf_to_dataframe(
|
||||
full_file_path_and_name,
|
||||
replace_missing_vals_with="NaN",
|
||||
value_column_name="series_value",
|
||||
):
|
||||
col_names = []
|
||||
col_types = []
|
||||
all_data = {}
|
||||
line_count = 0
|
||||
frequency = None
|
||||
forecast_horizon = None
|
||||
contain_missing_values = None
|
||||
contain_equal_length = None
|
||||
found_data_tag = False
|
||||
found_data_section = False
|
||||
started_reading_data_section = False
|
||||
|
||||
with open(full_file_path_and_name, "r", encoding="cp1252") as file:
|
||||
for line in file:
|
||||
# Strip white space from start/end of line
|
||||
line = line.strip()
|
||||
|
||||
if line:
|
||||
if line.startswith("@"): # Read meta-data
|
||||
if not line.startswith("@data"):
|
||||
line_content = line.split(" ")
|
||||
if line.startswith("@attribute"):
|
||||
if (
|
||||
len(line_content) != 3
|
||||
): # Attributes have both name and type
|
||||
raise Exception("Invalid meta-data specification.")
|
||||
|
||||
col_names.append(line_content[1])
|
||||
col_types.append(line_content[2])
|
||||
else:
|
||||
if (
|
||||
len(line_content) != 2
|
||||
): # Other meta-data have only values
|
||||
raise Exception("Invalid meta-data specification.")
|
||||
|
||||
if line.startswith("@frequency"):
|
||||
frequency = line_content[1]
|
||||
elif line.startswith("@horizon"):
|
||||
forecast_horizon = int(line_content[1])
|
||||
elif line.startswith("@missing"):
|
||||
contain_missing_values = bool(
|
||||
strtobool(line_content[1])
|
||||
)
|
||||
elif line.startswith("@equallength"):
|
||||
contain_equal_length = bool(strtobool(line_content[1]))
|
||||
|
||||
else:
|
||||
if len(col_names) == 0:
|
||||
raise Exception(
|
||||
"Missing attribute section. Attribute section must come before data."
|
||||
)
|
||||
|
||||
found_data_tag = True
|
||||
elif not line.startswith("#"):
|
||||
if len(col_names) == 0:
|
||||
raise Exception(
|
||||
"Missing attribute section. Attribute section must come before data."
|
||||
)
|
||||
elif not found_data_tag:
|
||||
raise Exception("Missing @data tag.")
|
||||
else:
|
||||
if not started_reading_data_section:
|
||||
started_reading_data_section = True
|
||||
found_data_section = True
|
||||
all_series = []
|
||||
|
||||
for col in col_names:
|
||||
all_data[col] = []
|
||||
|
||||
full_info = line.split(":")
|
||||
|
||||
if len(full_info) != (len(col_names) + 1):
|
||||
raise Exception("Missing attributes/values in series.")
|
||||
|
||||
series = full_info[len(full_info) - 1]
|
||||
series = series.split(",")
|
||||
|
||||
if len(series) == 0:
|
||||
raise Exception(
|
||||
"A given series should contains a set of comma separated numeric values. At least one numeric value should be there in a series. Missing values should be indicated with ? symbol"
|
||||
)
|
||||
|
||||
numeric_series = []
|
||||
|
||||
for val in series:
|
||||
if val == "?":
|
||||
numeric_series.append(replace_missing_vals_with)
|
||||
else:
|
||||
numeric_series.append(float(val))
|
||||
|
||||
if numeric_series.count(replace_missing_vals_with) == len(
|
||||
numeric_series
|
||||
):
|
||||
raise Exception(
|
||||
"All series values are missing. A given series should contains a set of comma separated numeric values. At least one numeric value should be there in a series."
|
||||
)
|
||||
|
||||
all_series.append(pd.Series(numeric_series).array)
|
||||
|
||||
for i in range(len(col_names)):
|
||||
att_val = None
|
||||
if col_types[i] == "numeric":
|
||||
att_val = int(full_info[i])
|
||||
elif col_types[i] == "string":
|
||||
att_val = str(full_info[i])
|
||||
elif col_types[i] == "date":
|
||||
att_val = datetime.strptime(
|
||||
full_info[i], "%Y-%m-%d %H-%M-%S"
|
||||
)
|
||||
else:
|
||||
raise Exception(
|
||||
"Invalid attribute type."
|
||||
) # Currently, the code supports only numeric, string and date types. Extend this as required.
|
||||
|
||||
if att_val is None:
|
||||
raise Exception("Invalid attribute value.")
|
||||
else:
|
||||
all_data[col_names[i]].append(att_val)
|
||||
|
||||
line_count = line_count + 1
|
||||
|
||||
if line_count == 0:
|
||||
raise Exception("Empty file.")
|
||||
if len(col_names) == 0:
|
||||
raise Exception("Missing attribute section.")
|
||||
if not found_data_section:
|
||||
raise Exception("Missing series information under data section.")
|
||||
|
||||
all_data[value_column_name] = all_series
|
||||
loaded_data = pd.DataFrame(all_data)
|
||||
|
||||
return (
|
||||
loaded_data,
|
||||
frequency,
|
||||
forecast_horizon,
|
||||
contain_missing_values,
|
||||
contain_equal_length,
|
||||
)
|
||||
|
||||
|
||||
def vali(model, vali_loader, criterion, args, device):
|
||||
total_loss = []
|
||||
|
||||
model.in_layer.eval()
|
||||
model.out_layer.eval()
|
||||
with torch.no_grad():
|
||||
for i, (batch_x, batch_y) in enumerate(vali_loader.get_iterator()):
|
||||
batch_x = torch.Tensor(batch_x).to(device)
|
||||
batch_y = torch.Tensor(batch_y).to(device)
|
||||
|
||||
outputs = model(batch_x)
|
||||
|
||||
# encoder - decoder
|
||||
outputs = outputs[:, -args.pred_len:, :]
|
||||
batch_y = batch_y[:, -args.pred_len:, :].to(device)
|
||||
|
||||
pred = outputs.detach().cpu()
|
||||
true = batch_y.detach().cpu()
|
||||
|
||||
loss = criterion(pred, true)
|
||||
|
||||
total_loss.append(loss)
|
||||
total_loss = np.average(total_loss)
|
||||
|
||||
model.in_layer.train()
|
||||
model.out_layer.train()
|
||||
return total_loss
|
||||
|
||||
def MASE(x, freq, pred, true):
|
||||
masep = np.mean(np.abs(x[:, freq:] - x[:, :-freq]))
|
||||
return np.mean(np.abs(pred - true) / (masep + 1e-8))
|
||||
|
||||
def test(model, test_loader, args, device):
|
||||
preds = []
|
||||
trues = []
|
||||
# mases = []
|
||||
|
||||
model.eval()
|
||||
with torch.no_grad():
|
||||
for i, (batch_x, batch_y) in enumerate(test_loader.get_iterator()):
|
||||
|
||||
|
||||
batch_x = torch.Tensor(batch_x).to(device)
|
||||
batch_y = torch.Tensor(batch_y)
|
||||
|
||||
outputs = model(batch_x[:, -args.seq_len:, :])
|
||||
|
||||
# encoder - decoder
|
||||
outputs = outputs[:, -args.pred_len:, :]
|
||||
batch_y = batch_y[:, -args.pred_len:, :].to(device)
|
||||
|
||||
pred = outputs.detach().cpu().numpy()
|
||||
true = batch_y.detach().cpu().numpy()
|
||||
|
||||
preds.append(pred)
|
||||
trues.append(true)
|
||||
|
||||
|
||||
preds = torch.Tensor(preds)
|
||||
trues = torch.Tensor(trues)
|
||||
|
||||
amae = []
|
||||
amape = []
|
||||
armse = []
|
||||
for i in range(args.pred_len):
|
||||
pred = preds[:,:,i]
|
||||
real = trues[:,:,i]
|
||||
|
||||
metric = metrics(pred,real)
|
||||
|
||||
amae.append(metric[0])
|
||||
amape.append(metric[1])
|
||||
armse.append(metric[2])
|
||||
|
||||
|
||||
return np.mean(amae),np.mean(amape),np.mean(armse)
|
||||
|
||||
|
||||
|
||||
def masked_mse(preds, labels, null_val=np.nan):
|
||||
if np.isnan(null_val):
|
||||
mask = ~torch.isnan(labels)
|
||||
else:
|
||||
mask = (labels!=null_val)
|
||||
mask = mask.float()
|
||||
mask /= torch.mean((mask))
|
||||
mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
|
||||
loss = (preds-labels)**2
|
||||
loss = loss * mask
|
||||
loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
|
||||
return torch.mean(loss)
|
||||
|
||||
def masked_rmse(preds, labels, null_val=np.nan):
|
||||
return torch.sqrt(masked_mse(preds=preds, labels=labels, null_val=null_val))
|
||||
|
||||
|
||||
def masked_mae(preds, labels, null_val=np.nan):
|
||||
if np.isnan(null_val):
|
||||
mask = ~torch.isnan(labels)
|
||||
else:
|
||||
mask = (labels!=null_val)
|
||||
mask = mask.float()
|
||||
mask /= torch.mean((mask))
|
||||
mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
|
||||
loss = torch.abs(preds-labels)
|
||||
loss = loss * mask
|
||||
loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
|
||||
return torch.mean(loss)
|
||||
|
||||
|
||||
def masked_mape(preds, labels, null_val=np.nan):
|
||||
if np.isnan(null_val):
|
||||
mask = ~torch.isnan(labels)
|
||||
else:
|
||||
mask = (labels!=null_val)
|
||||
mask = mask.float()
|
||||
mask /= torch.mean((mask))
|
||||
mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
|
||||
loss = torch.abs(preds-labels)/labels
|
||||
loss = loss * mask
|
||||
loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
|
||||
return torch.mean(loss)
|
||||
|
||||
|
||||
def metrics(pred, real):
|
||||
mae = masked_mae(pred,real,0.0).item()
|
||||
mape = masked_mape(pred,real,0.0).item()
|
||||
rmse = masked_rmse(pred,real,0.0).item()
|
||||
return mae,mape,rmse
|
||||
Loading…
Reference in New Issue