379 lines
13 KiB
Python
379 lines
13 KiB
Python
import numpy as np
|
|
import torch
|
|
import torch.nn as nn
|
|
import matplotlib.pyplot as plt
|
|
from tqdm import tqdm
|
|
|
|
from datetime import datetime
|
|
from distutils.util import strtobool
|
|
import pandas as pd
|
|
|
|
from utils.metrics import metric
|
|
|
|
plt.switch_backend('agg')
|
|
|
|
def adjust_learning_rate(optimizer, epoch, args):
|
|
# lr = args.learning_rate * (0.2 ** (epoch // 2))
|
|
# if args.decay_fac is None:
|
|
# args.decay_fac = 0.5
|
|
# if args.lradj == 'type1':
|
|
# lr_adjust = {epoch: args.learning_rate * (args.decay_fac ** ((epoch - 1) // 1))}
|
|
# elif args.lradj == 'type2':
|
|
# lr_adjust = {
|
|
# 2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6,
|
|
# 10: 5e-7, 15: 1e-7, 20: 5e-8
|
|
# }
|
|
if args.lradj =='type1':
|
|
lr_adjust = {epoch: args.learning_rate if epoch < 3 else args.learning_rate * (0.9 ** ((epoch - 3) // 1))}
|
|
elif args.lradj =='type2':
|
|
lr_adjust = {epoch: args.learning_rate * (args.decay_fac ** ((epoch - 1) // 1))}
|
|
elif args.lradj =='type4':
|
|
lr_adjust = {epoch: args.learning_rate * (args.decay_fac ** ((epoch) // 1))}
|
|
else:
|
|
args.learning_rate = 1e-4
|
|
lr_adjust = {epoch: args.learning_rate if epoch < 3 else args.learning_rate * (0.9 ** ((epoch - 3) // 1))}
|
|
print("lr_adjust = {}".format(lr_adjust))
|
|
if epoch in lr_adjust.keys():
|
|
lr = lr_adjust[epoch]
|
|
for param_group in optimizer.param_groups:
|
|
param_group['lr'] = lr
|
|
print('Updating learning rate to {}'.format(lr))
|
|
|
|
|
|
class EarlyStopping:
|
|
def __init__(self, patience=7, verbose=False, delta=0):
|
|
self.patience = patience
|
|
self.verbose = verbose
|
|
self.counter = 0
|
|
self.best_score = None
|
|
self.early_stop = False
|
|
self.val_loss_min = np.inf
|
|
self.delta = delta
|
|
|
|
def __call__(self, val_loss, model, path):
|
|
score = -val_loss
|
|
if self.best_score is None:
|
|
self.best_score = score
|
|
self.save_checkpoint(val_loss, model, path)
|
|
elif score < self.best_score + self.delta:
|
|
self.counter += 1
|
|
print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
|
|
if self.counter >= self.patience:
|
|
self.early_stop = True
|
|
else:
|
|
self.best_score = score
|
|
self.save_checkpoint(val_loss, model, path)
|
|
self.counter = 0
|
|
|
|
def save_checkpoint(self, val_loss, model, path):
|
|
if self.verbose:
|
|
print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')
|
|
torch.save(model.state_dict(), path + '/' + 'checkpoint.pth')
|
|
self.val_loss_min = val_loss
|
|
|
|
|
|
class dotdict(dict):
|
|
"""dot.notation access to dictionary attributes"""
|
|
__getattr__ = dict.get
|
|
__setattr__ = dict.__setitem__
|
|
__delattr__ = dict.__delitem__
|
|
|
|
|
|
class StandardScaler():
|
|
def __init__(self, mean, std):
|
|
self.mean = mean
|
|
self.std = std
|
|
|
|
def transform(self, data):
|
|
return (data - self.mean) / self.std
|
|
|
|
def inverse_transform(self, data):
|
|
return (data * self.std) + self.mean
|
|
|
|
|
|
def visual(true, preds=None, name='./pic/test.pdf'):
|
|
"""
|
|
Results visualization
|
|
"""
|
|
plt.figure()
|
|
plt.plot(true, label='GroundTruth', linewidth=2)
|
|
if preds is not None:
|
|
plt.plot(preds, label='Prediction', linewidth=2)
|
|
plt.legend()
|
|
plt.savefig(name, bbox_inches='tight')
|
|
|
|
|
|
def convert_tsf_to_dataframe(
|
|
full_file_path_and_name,
|
|
replace_missing_vals_with="NaN",
|
|
value_column_name="series_value",
|
|
):
|
|
col_names = []
|
|
col_types = []
|
|
all_data = {}
|
|
line_count = 0
|
|
frequency = None
|
|
forecast_horizon = None
|
|
contain_missing_values = None
|
|
contain_equal_length = None
|
|
found_data_tag = False
|
|
found_data_section = False
|
|
started_reading_data_section = False
|
|
|
|
with open(full_file_path_and_name, "r", encoding="cp1252") as file:
|
|
for line in file:
|
|
# Strip white space from start/end of line
|
|
line = line.strip()
|
|
|
|
if line:
|
|
if line.startswith("@"): # Read meta-data
|
|
if not line.startswith("@data"):
|
|
line_content = line.split(" ")
|
|
if line.startswith("@attribute"):
|
|
if (
|
|
len(line_content) != 3
|
|
): # Attributes have both name and type
|
|
raise Exception("Invalid meta-data specification.")
|
|
|
|
col_names.append(line_content[1])
|
|
col_types.append(line_content[2])
|
|
else:
|
|
if (
|
|
len(line_content) != 2
|
|
): # Other meta-data have only values
|
|
raise Exception("Invalid meta-data specification.")
|
|
|
|
if line.startswith("@frequency"):
|
|
frequency = line_content[1]
|
|
elif line.startswith("@horizon"):
|
|
forecast_horizon = int(line_content[1])
|
|
elif line.startswith("@missing"):
|
|
contain_missing_values = bool(
|
|
strtobool(line_content[1])
|
|
)
|
|
elif line.startswith("@equallength"):
|
|
contain_equal_length = bool(strtobool(line_content[1]))
|
|
|
|
else:
|
|
if len(col_names) == 0:
|
|
raise Exception(
|
|
"Missing attribute section. Attribute section must come before data."
|
|
)
|
|
|
|
found_data_tag = True
|
|
elif not line.startswith("#"):
|
|
if len(col_names) == 0:
|
|
raise Exception(
|
|
"Missing attribute section. Attribute section must come before data."
|
|
)
|
|
elif not found_data_tag:
|
|
raise Exception("Missing @data tag.")
|
|
else:
|
|
if not started_reading_data_section:
|
|
started_reading_data_section = True
|
|
found_data_section = True
|
|
all_series = []
|
|
|
|
for col in col_names:
|
|
all_data[col] = []
|
|
|
|
full_info = line.split(":")
|
|
|
|
if len(full_info) != (len(col_names) + 1):
|
|
raise Exception("Missing attributes/values in series.")
|
|
|
|
series = full_info[len(full_info) - 1]
|
|
series = series.split(",")
|
|
|
|
if len(series) == 0:
|
|
raise Exception(
|
|
"A given series should contains a set of comma separated numeric values. At least one numeric value should be there in a series. Missing values should be indicated with ? symbol"
|
|
)
|
|
|
|
numeric_series = []
|
|
|
|
for val in series:
|
|
if val == "?":
|
|
numeric_series.append(replace_missing_vals_with)
|
|
else:
|
|
numeric_series.append(float(val))
|
|
|
|
if numeric_series.count(replace_missing_vals_with) == len(
|
|
numeric_series
|
|
):
|
|
raise Exception(
|
|
"All series values are missing. A given series should contains a set of comma separated numeric values. At least one numeric value should be there in a series."
|
|
)
|
|
|
|
all_series.append(pd.Series(numeric_series).array)
|
|
|
|
for i in range(len(col_names)):
|
|
att_val = None
|
|
if col_types[i] == "numeric":
|
|
att_val = int(full_info[i])
|
|
elif col_types[i] == "string":
|
|
att_val = str(full_info[i])
|
|
elif col_types[i] == "date":
|
|
att_val = datetime.strptime(
|
|
full_info[i], "%Y-%m-%d %H-%M-%S"
|
|
)
|
|
else:
|
|
raise Exception(
|
|
"Invalid attribute type."
|
|
) # Currently, the code supports only numeric, string and date types. Extend this as required.
|
|
|
|
if att_val is None:
|
|
raise Exception("Invalid attribute value.")
|
|
else:
|
|
all_data[col_names[i]].append(att_val)
|
|
|
|
line_count = line_count + 1
|
|
|
|
if line_count == 0:
|
|
raise Exception("Empty file.")
|
|
if len(col_names) == 0:
|
|
raise Exception("Missing attribute section.")
|
|
if not found_data_section:
|
|
raise Exception("Missing series information under data section.")
|
|
|
|
all_data[value_column_name] = all_series
|
|
loaded_data = pd.DataFrame(all_data)
|
|
|
|
return (
|
|
loaded_data,
|
|
frequency,
|
|
forecast_horizon,
|
|
contain_missing_values,
|
|
contain_equal_length,
|
|
)
|
|
|
|
|
|
def vali(model, vali_loader, criterion, args, device):
|
|
total_loss = []
|
|
|
|
model.in_layer.eval()
|
|
model.out_layer.eval()
|
|
with torch.no_grad():
|
|
for i, (batch_x, batch_y) in enumerate(vali_loader.get_iterator()):
|
|
batch_x = torch.Tensor(batch_x).to(device)
|
|
batch_y = torch.Tensor(batch_y).to(device)
|
|
|
|
outputs = model(batch_x)
|
|
|
|
# encoder - decoder
|
|
outputs = outputs[:, -args.pred_len:, :]
|
|
batch_y = batch_y[:, -args.pred_len:, :].to(device)
|
|
|
|
pred = outputs.detach().cpu()
|
|
true = batch_y.detach().cpu()
|
|
|
|
loss = criterion(pred, true)
|
|
|
|
total_loss.append(loss)
|
|
total_loss = np.average(total_loss)
|
|
|
|
model.in_layer.train()
|
|
model.out_layer.train()
|
|
return total_loss
|
|
|
|
def MASE(x, freq, pred, true):
|
|
masep = np.mean(np.abs(x[:, freq:] - x[:, :-freq]))
|
|
return np.mean(np.abs(pred - true) / (masep + 1e-8))
|
|
|
|
def test(model, test_loader, args, device):
|
|
preds = []
|
|
trues = []
|
|
# mases = []
|
|
|
|
model.eval()
|
|
with torch.no_grad():
|
|
for i, (batch_x, batch_y) in enumerate(test_loader.get_iterator()):
|
|
|
|
|
|
batch_x = torch.Tensor(batch_x).to(device)
|
|
batch_y = torch.Tensor(batch_y)
|
|
|
|
outputs = model(batch_x[:, -args.seq_len:, :])
|
|
|
|
# encoder - decoder
|
|
outputs = outputs[:, -args.pred_len:, :]
|
|
batch_y = batch_y[:, -args.pred_len:, :].to(device)
|
|
|
|
pred = outputs.detach().cpu().numpy()
|
|
true = batch_y.detach().cpu().numpy()
|
|
|
|
preds.append(pred)
|
|
trues.append(true)
|
|
|
|
|
|
preds = torch.Tensor(preds)
|
|
trues = torch.Tensor(trues)
|
|
|
|
amae = []
|
|
amape = []
|
|
armse = []
|
|
for i in range(args.pred_len):
|
|
pred = preds[:,:,i]
|
|
real = trues[:,:,i]
|
|
|
|
metric = metrics(pred,real)
|
|
|
|
amae.append(metric[0])
|
|
amape.append(metric[1])
|
|
armse.append(metric[2])
|
|
|
|
|
|
return np.mean(amae),np.mean(amape),np.mean(armse)
|
|
|
|
|
|
|
|
def masked_mse(preds, labels, null_val=np.nan):
|
|
if np.isnan(null_val):
|
|
mask = ~torch.isnan(labels)
|
|
else:
|
|
mask = (labels!=null_val)
|
|
mask = mask.float()
|
|
mask /= torch.mean((mask))
|
|
mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
|
|
loss = (preds-labels)**2
|
|
loss = loss * mask
|
|
loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
|
|
return torch.mean(loss)
|
|
|
|
def masked_rmse(preds, labels, null_val=np.nan):
|
|
return torch.sqrt(masked_mse(preds=preds, labels=labels, null_val=null_val))
|
|
|
|
|
|
def masked_mae(preds, labels, null_val=np.nan):
|
|
if np.isnan(null_val):
|
|
mask = ~torch.isnan(labels)
|
|
else:
|
|
mask = (labels!=null_val)
|
|
mask = mask.float()
|
|
mask /= torch.mean((mask))
|
|
mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
|
|
loss = torch.abs(preds-labels)
|
|
loss = loss * mask
|
|
loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
|
|
return torch.mean(loss)
|
|
|
|
|
|
def masked_mape(preds, labels, null_val=np.nan):
|
|
if np.isnan(null_val):
|
|
mask = ~torch.isnan(labels)
|
|
else:
|
|
mask = (labels!=null_val)
|
|
mask = mask.float()
|
|
mask /= torch.mean((mask))
|
|
mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
|
|
loss = torch.abs(preds-labels)/labels
|
|
loss = loss * mask
|
|
loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
|
|
return torch.mean(loss)
|
|
|
|
|
|
def metrics(pred, real):
|
|
mae = masked_mae(pred,real,0.0).item()
|
|
mape = masked_mape(pred,real,0.0).item()
|
|
rmse = masked_rmse(pred,real,0.0).item()
|
|
return mae,mape,rmse |