REPST/utils/tools.py

379 lines
13 KiB
Python

import numpy as np
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from tqdm import tqdm
from datetime import datetime
from distutils.util import strtobool
import pandas as pd
from utils.metrics import metric
plt.switch_backend('agg')
def adjust_learning_rate(optimizer, epoch, args):
# lr = args.learning_rate * (0.2 ** (epoch // 2))
# if args.decay_fac is None:
# args.decay_fac = 0.5
# if args.lradj == 'type1':
# lr_adjust = {epoch: args.learning_rate * (args.decay_fac ** ((epoch - 1) // 1))}
# elif args.lradj == 'type2':
# lr_adjust = {
# 2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6,
# 10: 5e-7, 15: 1e-7, 20: 5e-8
# }
if args.lradj =='type1':
lr_adjust = {epoch: args.learning_rate if epoch < 3 else args.learning_rate * (0.9 ** ((epoch - 3) // 1))}
elif args.lradj =='type2':
lr_adjust = {epoch: args.learning_rate * (args.decay_fac ** ((epoch - 1) // 1))}
elif args.lradj =='type4':
lr_adjust = {epoch: args.learning_rate * (args.decay_fac ** ((epoch) // 1))}
else:
args.learning_rate = 1e-4
lr_adjust = {epoch: args.learning_rate if epoch < 3 else args.learning_rate * (0.9 ** ((epoch - 3) // 1))}
print("lr_adjust = {}".format(lr_adjust))
if epoch in lr_adjust.keys():
lr = lr_adjust[epoch]
for param_group in optimizer.param_groups:
param_group['lr'] = lr
print('Updating learning rate to {}'.format(lr))
class EarlyStopping:
def __init__(self, patience=7, verbose=False, delta=0):
self.patience = patience
self.verbose = verbose
self.counter = 0
self.best_score = None
self.early_stop = False
self.val_loss_min = np.Inf
self.delta = delta
def __call__(self, val_loss, model, path):
score = -val_loss
if self.best_score is None:
self.best_score = score
self.save_checkpoint(val_loss, model, path)
elif score < self.best_score + self.delta:
self.counter += 1
print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
if self.counter >= self.patience:
self.early_stop = True
else:
self.best_score = score
self.save_checkpoint(val_loss, model, path)
self.counter = 0
def save_checkpoint(self, val_loss, model, path):
if self.verbose:
print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')
torch.save(model.state_dict(), path + '/' + 'checkpoint.pth')
self.val_loss_min = val_loss
class dotdict(dict):
"""dot.notation access to dictionary attributes"""
__getattr__ = dict.get
__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__
class StandardScaler():
def __init__(self, mean, std):
self.mean = mean
self.std = std
def transform(self, data):
return (data - self.mean) / self.std
def inverse_transform(self, data):
return (data * self.std) + self.mean
def visual(true, preds=None, name='./pic/test.pdf'):
"""
Results visualization
"""
plt.figure()
plt.plot(true, label='GroundTruth', linewidth=2)
if preds is not None:
plt.plot(preds, label='Prediction', linewidth=2)
plt.legend()
plt.savefig(name, bbox_inches='tight')
def convert_tsf_to_dataframe(
full_file_path_and_name,
replace_missing_vals_with="NaN",
value_column_name="series_value",
):
col_names = []
col_types = []
all_data = {}
line_count = 0
frequency = None
forecast_horizon = None
contain_missing_values = None
contain_equal_length = None
found_data_tag = False
found_data_section = False
started_reading_data_section = False
with open(full_file_path_and_name, "r", encoding="cp1252") as file:
for line in file:
# Strip white space from start/end of line
line = line.strip()
if line:
if line.startswith("@"): # Read meta-data
if not line.startswith("@data"):
line_content = line.split(" ")
if line.startswith("@attribute"):
if (
len(line_content) != 3
): # Attributes have both name and type
raise Exception("Invalid meta-data specification.")
col_names.append(line_content[1])
col_types.append(line_content[2])
else:
if (
len(line_content) != 2
): # Other meta-data have only values
raise Exception("Invalid meta-data specification.")
if line.startswith("@frequency"):
frequency = line_content[1]
elif line.startswith("@horizon"):
forecast_horizon = int(line_content[1])
elif line.startswith("@missing"):
contain_missing_values = bool(
strtobool(line_content[1])
)
elif line.startswith("@equallength"):
contain_equal_length = bool(strtobool(line_content[1]))
else:
if len(col_names) == 0:
raise Exception(
"Missing attribute section. Attribute section must come before data."
)
found_data_tag = True
elif not line.startswith("#"):
if len(col_names) == 0:
raise Exception(
"Missing attribute section. Attribute section must come before data."
)
elif not found_data_tag:
raise Exception("Missing @data tag.")
else:
if not started_reading_data_section:
started_reading_data_section = True
found_data_section = True
all_series = []
for col in col_names:
all_data[col] = []
full_info = line.split(":")
if len(full_info) != (len(col_names) + 1):
raise Exception("Missing attributes/values in series.")
series = full_info[len(full_info) - 1]
series = series.split(",")
if len(series) == 0:
raise Exception(
"A given series should contains a set of comma separated numeric values. At least one numeric value should be there in a series. Missing values should be indicated with ? symbol"
)
numeric_series = []
for val in series:
if val == "?":
numeric_series.append(replace_missing_vals_with)
else:
numeric_series.append(float(val))
if numeric_series.count(replace_missing_vals_with) == len(
numeric_series
):
raise Exception(
"All series values are missing. A given series should contains a set of comma separated numeric values. At least one numeric value should be there in a series."
)
all_series.append(pd.Series(numeric_series).array)
for i in range(len(col_names)):
att_val = None
if col_types[i] == "numeric":
att_val = int(full_info[i])
elif col_types[i] == "string":
att_val = str(full_info[i])
elif col_types[i] == "date":
att_val = datetime.strptime(
full_info[i], "%Y-%m-%d %H-%M-%S"
)
else:
raise Exception(
"Invalid attribute type."
) # Currently, the code supports only numeric, string and date types. Extend this as required.
if att_val is None:
raise Exception("Invalid attribute value.")
else:
all_data[col_names[i]].append(att_val)
line_count = line_count + 1
if line_count == 0:
raise Exception("Empty file.")
if len(col_names) == 0:
raise Exception("Missing attribute section.")
if not found_data_section:
raise Exception("Missing series information under data section.")
all_data[value_column_name] = all_series
loaded_data = pd.DataFrame(all_data)
return (
loaded_data,
frequency,
forecast_horizon,
contain_missing_values,
contain_equal_length,
)
def vali(model, vali_loader, criterion, args, device):
total_loss = []
model.in_layer.eval()
model.out_layer.eval()
with torch.no_grad():
for i, (batch_x, batch_y) in enumerate(vali_loader.get_iterator()):
batch_x = torch.Tensor(batch_x).to(device)
batch_y = torch.Tensor(batch_y).to(device)
outputs = model(batch_x)
# encoder - decoder
outputs = outputs[:, -args.pred_len:, :]
batch_y = batch_y[:, -args.pred_len:, :].to(device)
pred = outputs.detach().cpu()
true = batch_y.detach().cpu()
loss = criterion(pred, true)
total_loss.append(loss)
total_loss = np.average(total_loss)
model.in_layer.train()
model.out_layer.train()
return total_loss
def MASE(x, freq, pred, true):
masep = np.mean(np.abs(x[:, freq:] - x[:, :-freq]))
return np.mean(np.abs(pred - true) / (masep + 1e-8))
def test(model, test_loader, args, device):
preds = []
trues = []
# mases = []
model.eval()
with torch.no_grad():
for i, (batch_x, batch_y) in enumerate(test_loader.get_iterator()):
batch_x = torch.Tensor(batch_x).to(device)
batch_y = torch.Tensor(batch_y)
outputs = model(batch_x[:, -args.seq_len:, :])
# encoder - decoder
outputs = outputs[:, -args.pred_len:, :]
batch_y = batch_y[:, -args.pred_len:, :].to(device)
pred = outputs.detach().cpu().numpy()
true = batch_y.detach().cpu().numpy()
preds.append(pred)
trues.append(true)
preds = torch.Tensor(preds)
trues = torch.Tensor(trues)
amae = []
amape = []
armse = []
for i in range(args.pred_len):
pred = preds[:,:,i]
real = trues[:,:,i]
metric = metrics(pred,real)
amae.append(metric[0])
amape.append(metric[1])
armse.append(metric[2])
return np.mean(amae),np.mean(amape),np.mean(armse)
def masked_mse(preds, labels, null_val=np.nan):
if np.isnan(null_val):
mask = ~torch.isnan(labels)
else:
mask = (labels!=null_val)
mask = mask.float()
mask /= torch.mean((mask))
mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
loss = (preds-labels)**2
loss = loss * mask
loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
return torch.mean(loss)
def masked_rmse(preds, labels, null_val=np.nan):
return torch.sqrt(masked_mse(preds=preds, labels=labels, null_val=null_val))
def masked_mae(preds, labels, null_val=np.nan):
if np.isnan(null_val):
mask = ~torch.isnan(labels)
else:
mask = (labels!=null_val)
mask = mask.float()
mask /= torch.mean((mask))
mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
loss = torch.abs(preds-labels)
loss = loss * mask
loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
return torch.mean(loss)
def masked_mape(preds, labels, null_val=np.nan):
if np.isnan(null_val):
mask = ~torch.isnan(labels)
else:
mask = (labels!=null_val)
mask = mask.float()
mask /= torch.mean((mask))
mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
loss = torch.abs(preds-labels)/labels
loss = loss * mask
loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
return torch.mean(loss)
def metrics(pred, real):
mae = masked_mae(pred,real,0.0).item()
mape = masked_mape(pred,real,0.0).item()
rmse = masked_rmse(pred,real,0.0).item()
return mae,mape,rmse