import numpy as np import torch import torch.nn as nn import matplotlib.pyplot as plt from tqdm import tqdm from datetime import datetime from distutils.util import strtobool import pandas as pd from utils.metrics import metric plt.switch_backend('agg') def adjust_learning_rate(optimizer, epoch, args): # lr = args.learning_rate * (0.2 ** (epoch // 2)) # if args.decay_fac is None: # args.decay_fac = 0.5 # if args.lradj == 'type1': # lr_adjust = {epoch: args.learning_rate * (args.decay_fac ** ((epoch - 1) // 1))} # elif args.lradj == 'type2': # lr_adjust = { # 2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6, # 10: 5e-7, 15: 1e-7, 20: 5e-8 # } if args.lradj =='type1': lr_adjust = {epoch: args.learning_rate if epoch < 3 else args.learning_rate * (0.9 ** ((epoch - 3) // 1))} elif args.lradj =='type2': lr_adjust = {epoch: args.learning_rate * (args.decay_fac ** ((epoch - 1) // 1))} elif args.lradj =='type4': lr_adjust = {epoch: args.learning_rate * (args.decay_fac ** ((epoch) // 1))} else: args.learning_rate = 1e-4 lr_adjust = {epoch: args.learning_rate if epoch < 3 else args.learning_rate * (0.9 ** ((epoch - 3) // 1))} print("lr_adjust = {}".format(lr_adjust)) if epoch in lr_adjust.keys(): lr = lr_adjust[epoch] for param_group in optimizer.param_groups: param_group['lr'] = lr print('Updating learning rate to {}'.format(lr)) class EarlyStopping: def __init__(self, patience=7, verbose=False, delta=0): self.patience = patience self.verbose = verbose self.counter = 0 self.best_score = None self.early_stop = False self.val_loss_min = np.inf self.delta = delta def __call__(self, val_loss, model, path): score = -val_loss if self.best_score is None: self.best_score = score self.save_checkpoint(val_loss, model, path) elif score < self.best_score + self.delta: self.counter += 1 print(f'EarlyStopping counter: {self.counter} out of {self.patience}') if self.counter >= self.patience: self.early_stop = True else: self.best_score = score self.save_checkpoint(val_loss, model, path) self.counter = 0 def save_checkpoint(self, val_loss, model, path): if self.verbose: print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...') torch.save(model.state_dict(), path + '/' + 'checkpoint.pth') self.val_loss_min = val_loss class dotdict(dict): """dot.notation access to dictionary attributes""" __getattr__ = dict.get __setattr__ = dict.__setitem__ __delattr__ = dict.__delitem__ class StandardScaler(): def __init__(self, mean, std): self.mean = mean self.std = std def transform(self, data): return (data - self.mean) / self.std def inverse_transform(self, data): return (data * self.std) + self.mean def visual(true, preds=None, name='./pic/test.pdf'): """ Results visualization """ plt.figure() plt.plot(true, label='GroundTruth', linewidth=2) if preds is not None: plt.plot(preds, label='Prediction', linewidth=2) plt.legend() plt.savefig(name, bbox_inches='tight') def convert_tsf_to_dataframe( full_file_path_and_name, replace_missing_vals_with="NaN", value_column_name="series_value", ): col_names = [] col_types = [] all_data = {} line_count = 0 frequency = None forecast_horizon = None contain_missing_values = None contain_equal_length = None found_data_tag = False found_data_section = False started_reading_data_section = False with open(full_file_path_and_name, "r", encoding="cp1252") as file: for line in file: # Strip white space from start/end of line line = line.strip() if line: if line.startswith("@"): # Read meta-data if not line.startswith("@data"): line_content = line.split(" ") if line.startswith("@attribute"): if ( len(line_content) != 3 ): # Attributes have both name and type raise Exception("Invalid meta-data specification.") col_names.append(line_content[1]) col_types.append(line_content[2]) else: if ( len(line_content) != 2 ): # Other meta-data have only values raise Exception("Invalid meta-data specification.") if line.startswith("@frequency"): frequency = line_content[1] elif line.startswith("@horizon"): forecast_horizon = int(line_content[1]) elif line.startswith("@missing"): contain_missing_values = bool( strtobool(line_content[1]) ) elif line.startswith("@equallength"): contain_equal_length = bool(strtobool(line_content[1])) else: if len(col_names) == 0: raise Exception( "Missing attribute section. Attribute section must come before data." ) found_data_tag = True elif not line.startswith("#"): if len(col_names) == 0: raise Exception( "Missing attribute section. Attribute section must come before data." ) elif not found_data_tag: raise Exception("Missing @data tag.") else: if not started_reading_data_section: started_reading_data_section = True found_data_section = True all_series = [] for col in col_names: all_data[col] = [] full_info = line.split(":") if len(full_info) != (len(col_names) + 1): raise Exception("Missing attributes/values in series.") series = full_info[len(full_info) - 1] series = series.split(",") if len(series) == 0: raise Exception( "A given series should contains a set of comma separated numeric values. At least one numeric value should be there in a series. Missing values should be indicated with ? symbol" ) numeric_series = [] for val in series: if val == "?": numeric_series.append(replace_missing_vals_with) else: numeric_series.append(float(val)) if numeric_series.count(replace_missing_vals_with) == len( numeric_series ): raise Exception( "All series values are missing. A given series should contains a set of comma separated numeric values. At least one numeric value should be there in a series." ) all_series.append(pd.Series(numeric_series).array) for i in range(len(col_names)): att_val = None if col_types[i] == "numeric": att_val = int(full_info[i]) elif col_types[i] == "string": att_val = str(full_info[i]) elif col_types[i] == "date": att_val = datetime.strptime( full_info[i], "%Y-%m-%d %H-%M-%S" ) else: raise Exception( "Invalid attribute type." ) # Currently, the code supports only numeric, string and date types. Extend this as required. if att_val is None: raise Exception("Invalid attribute value.") else: all_data[col_names[i]].append(att_val) line_count = line_count + 1 if line_count == 0: raise Exception("Empty file.") if len(col_names) == 0: raise Exception("Missing attribute section.") if not found_data_section: raise Exception("Missing series information under data section.") all_data[value_column_name] = all_series loaded_data = pd.DataFrame(all_data) return ( loaded_data, frequency, forecast_horizon, contain_missing_values, contain_equal_length, ) def vali(model, vali_loader, criterion, args, device): total_loss = [] model.in_layer.eval() model.out_layer.eval() with torch.no_grad(): for i, (batch_x, batch_y) in enumerate(vali_loader.get_iterator()): batch_x = torch.Tensor(batch_x).to(device) batch_y = torch.Tensor(batch_y).to(device) outputs = model(batch_x) # encoder - decoder outputs = outputs[:, -args.pred_len:, :] batch_y = batch_y[:, -args.pred_len:, :].to(device) pred = outputs.detach().cpu() true = batch_y.detach().cpu() loss = criterion(pred, true) total_loss.append(loss) total_loss = np.average(total_loss) model.in_layer.train() model.out_layer.train() return total_loss def MASE(x, freq, pred, true): masep = np.mean(np.abs(x[:, freq:] - x[:, :-freq])) return np.mean(np.abs(pred - true) / (masep + 1e-8)) def test(model, test_loader, args, device): preds = [] trues = [] # mases = [] model.eval() with torch.no_grad(): for i, (batch_x, batch_y) in enumerate(test_loader.get_iterator()): batch_x = torch.Tensor(batch_x).to(device) batch_y = torch.Tensor(batch_y) outputs = model(batch_x[:, -args.seq_len:, :]) # encoder - decoder outputs = outputs[:, -args.pred_len:, :] batch_y = batch_y[:, -args.pred_len:, :].to(device) pred = outputs.detach().cpu().numpy() true = batch_y.detach().cpu().numpy() preds.append(pred) trues.append(true) preds = torch.Tensor(preds) trues = torch.Tensor(trues) amae = [] amape = [] armse = [] for i in range(args.pred_len): pred = preds[:,:,i] real = trues[:,:,i] metric = metrics(pred,real) amae.append(metric[0]) amape.append(metric[1]) armse.append(metric[2]) return np.mean(amae),np.mean(amape),np.mean(armse) def masked_mse(preds, labels, null_val=np.nan): if np.isnan(null_val): mask = ~torch.isnan(labels) else: mask = (labels!=null_val) mask = mask.float() mask /= torch.mean((mask)) mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask) loss = (preds-labels)**2 loss = loss * mask loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss) return torch.mean(loss) def masked_rmse(preds, labels, null_val=np.nan): return torch.sqrt(masked_mse(preds=preds, labels=labels, null_val=null_val)) def masked_mae(preds, labels, null_val=np.nan): if np.isnan(null_val): mask = ~torch.isnan(labels) else: mask = (labels!=null_val) mask = mask.float() mask /= torch.mean((mask)) mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask) loss = torch.abs(preds-labels) loss = loss * mask loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss) return torch.mean(loss) def masked_mape(preds, labels, null_val=np.nan): if np.isnan(null_val): mask = ~torch.isnan(labels) else: mask = (labels!=null_val) mask = mask.float() mask /= torch.mean((mask)) mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask) loss = torch.abs(preds-labels)/labels loss = loss * mask loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss) return torch.mean(loss) def metrics(pred, real): mae = masked_mae(pred,real,0.0).item() mape = masked_mape(pred,real,0.0).item() rmse = masked_rmse(pred,real,0.0).item() return mae,mape,rmse