In [1]:
import os
import sys
import math
import torch

PROJECT_DIR = os.path.abspath(os.path.abspath('') + "/../..")
os.chdir(PROJECT_DIR)

import numpy as np
from tqdm import tqdm
from basicts.data import TimeSeriesForecastingDataset
from basicts.utils import get_regular_settings
from basicts.scaler import ZScoreScaler


metric = "cosine" # metric used to calculate the similarity.
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

DATA_NAME = "METR-LA"
DATA_NAME = "ETTh1"
BATCH_SIZE = 8
regular_settings = get_regular_settings(DATA_NAME)
INPUT_LEN = regular_settings['INPUT_LEN']  # Length of input sequence
OUTPUT_LEN = regular_settings['OUTPUT_LEN']  # Length of output sequence
TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO']  # Train/Validation/Test split ratios
RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel


## utilities

In [2]:
# similarity computation
def cosine_similarity(x, y):
    # denominator
    l2_x = torch.norm(x, dim=2, p=2) + 1e-7
    l2_y = torch.norm(y, dim=2, p=2) + 1e-7
    l2_n = torch.matmul(l2_x.unsqueeze(dim=2), l2_y.unsqueeze(dim=2).transpose(1, 2))
    # numerator
    l2_d = torch.matmul(x, y.transpose(1, 2))
    return l2_d / l2_n

def get_similarity_matrix(data, metric):
    if metric == "cosine":
        sim = cosine_similarity(data, data)
    elif metric == "mse":
        sim = torch.cdist(data, data, p=2)
    elif metric == "mae":
        sim = torch.cdist(data, data, p=1)
    else:
        raise NotImplementedError
    return sim

In [3]:
dataset_param = {
    'dataset_name': DATA_NAME,
    'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
    'input_len': INPUT_LEN,
    'output_len': OUTPUT_LEN,
}
# get dataloader
dataset = TimeSeriesForecastingDataset(**dataset_param, mode='train')
# the whole training data
dataloader = torch.utils.data.DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=3)

scaler_param = {
    'dataset_name': DATA_NAME,
    'train_ratio': TRAIN_VAL_TEST_RATIO[0],
    'norm_each_channel': NORM_EACH_CHANNEL,
    'rescale': RESCALE,
}
scaler = ZScoreScaler(**scaler_param)


## Generate Similarity Matrix

In [4]:
# get similarity matrices

# inference pipeline for a given dataloader
history_adjs_all = []
future_adjs_all = []
def inference(dataloader):
    for batch in tqdm(dataloader):
        future_data, history_data = batch['target'], batch['inputs']
        future_data = scaler.transform(future_data)
        history_data = scaler.transform(history_data)
        history_data = history_data[..., 0].transpose(1, 2) # batch_size, num_nodes, history_seq_len
        future_data = future_data[..., 0].transpose(1, 2) # batch_size, num_nodes, future_seq_len
        history_adjs = get_similarity_matrix(history_data, metric) # batch_size, num_nodes, num_nodes
        future_adjs = get_similarity_matrix(future_data, metric) # batch_size, num_nodes, num_nodes
        history_adjs_all.append(history_adjs)
        future_adjs_all.append(future_adjs)
# get similarity matrices
# for mode in ["valid"]:
for mode in ["train"]:
    inference(dataloader)


100%|██████████| 997/997 [00:02<00:00, 412.47it/s]


In [5]:
# get spatial indistinguishability ratio
history_similarity = torch.cat(history_adjs_all, dim=0).detach().cpu() # num_samples, num_modes, num_nodes
future_similarity = torch.cat(future_adjs_all, dim=0).detach().cpu() # num_samples, num_modes, num_nodes
L, N, N = future_similarity.shape
print(future_similarity.shape)

torch.Size([7969, 7, 7])


## Get Spatial Indistinguishability Ratio

In [6]:
e_u = 0.9
e_l = 0.4

history_similarity_filtered = torch.where(history_similarity > e_u, torch.ones_like(history_similarity), torch.zeros_like(history_similarity))
future_similarity_filtered = torch.where(future_similarity < e_l, torch.ones_like(future_similarity), torch.zeros_like(future_similarity))
overlap = history_similarity_filtered * future_similarity_filtered


In [7]:
# overlap ratio
overlap_ratio = overlap.sum() / (L * N * N)
print(overlap_ratio * 1000)

tensor(3.8568)


In [8]:
# indistinguishability ratio
indistinguishability_ratio = overlap.sum() / history_similarity_filtered.sum()
print(indistinguishability_ratio * 1000)


tensor(15.7748)
