154 lines
5.6 KiB
Python
Executable File
154 lines
5.6 KiB
Python
Executable File
import csv
|
|
import os
|
|
import pandas as pd
|
|
import numpy as np
|
|
from fastdtw import fastdtw
|
|
from tqdm import tqdm
|
|
import torch
|
|
|
|
files = {
|
|
358: ["PEMS03/PEMS03.npz", "PEMS03/PEMS03.csv"],
|
|
307: ["PEMS04/PEMS04.npz", "PEMS04/PEMS04.csv"],
|
|
883: ["PEMS07/PEMS07.npz", "PEMS07/PEMS07.csv"],
|
|
170: ["PEMS08/PEMS08.npz", "PEMS08/PEMS08.csv"],
|
|
# 'pemsbay': ['PEMSBAY/pems_bay.npz', 'PEMSBAY/distance.csv'],
|
|
# 'pemsD7M': ['PEMSD7M/PEMSD7M.npz', 'PEMSD7M/distance.csv'],
|
|
# 'pemsD7L': ['PEMSD7L/PEMSD7L.npz', 'PEMSD7L/distance.csv']
|
|
}
|
|
|
|
|
|
def get_A_hat(args):
|
|
"""read data, generate spatial adjacency matrix and semantic adjacency matrix by dtw
|
|
|
|
Args:
|
|
sigma1: float, default=0.1, sigma for the semantic matrix
|
|
sigma2: float, default=10, sigma for the spatial matrix
|
|
thres1: float, default=0.6, the threshold for the semantic matrix
|
|
thres2: float, default=0.5, the threshold for the spatial matrix
|
|
|
|
Returns:
|
|
data: tensor, T * N * 1
|
|
dtw_matrix: array, semantic adjacency matrix
|
|
sp_matrix: array, spatial adjacency matrix
|
|
"""
|
|
filepath = "./data/"
|
|
num_node = args["num_nodes"]
|
|
file = files[num_node]
|
|
filename = file[0][:6]
|
|
|
|
data = np.load(filepath + file[0])["data"]
|
|
num_node = data.shape[1]
|
|
mean_value = np.mean(data, axis=(0, 1)).reshape(1, 1, -1)
|
|
std_value = np.std(data, axis=(0, 1)).reshape(1, 1, -1)
|
|
data = (data - mean_value) / std_value
|
|
|
|
# 计算dtw_distance, 如果存在缓存则直接读取缓存
|
|
if not os.path.exists(
|
|
f"data/PEMS0{filename[-1]}/PEMS0{filename[-1]}_dtw_distance.npy"
|
|
):
|
|
data_mean = np.mean(
|
|
[
|
|
data[:, :, 0][24 * 12 * i : 24 * 12 * (i + 1)]
|
|
for i in range(data.shape[0] // (24 * 12))
|
|
],
|
|
axis=0,
|
|
)
|
|
data_mean = data_mean.squeeze().T
|
|
dtw_distance = np.zeros((num_node, num_node))
|
|
for i in tqdm(range(num_node)):
|
|
for j in range(i, num_node):
|
|
dtw_distance[i][j] = fastdtw(data_mean[i], data_mean[j], radius=6)[0]
|
|
for i in range(num_node):
|
|
for j in range(i):
|
|
dtw_distance[i][j] = dtw_distance[j][i]
|
|
np.save(
|
|
f"data/PEMS0{filename[-1]}/PEMS0{filename[-1]}_dtw_distance.npy",
|
|
dtw_distance,
|
|
)
|
|
|
|
dist_matrix = np.load(
|
|
f"data/PEMS0{filename[-1]}/PEMS0{filename[-1]}_dtw_distance.npy"
|
|
)
|
|
|
|
mean = np.mean(dist_matrix)
|
|
std = np.std(dist_matrix)
|
|
dist_matrix = (dist_matrix - mean) / std
|
|
sigma = args["sigma1"]
|
|
dist_matrix = np.exp(-(dist_matrix**2) / sigma**2)
|
|
dtw_matrix = np.zeros_like(dist_matrix)
|
|
dtw_matrix[dist_matrix > args["thres1"]] = 1
|
|
|
|
# 计算spatial_distance, 如果存在缓存则直接读取缓存
|
|
if not os.path.exists(
|
|
f"data/PEMS0{filename[-1]}/PEMS0{filename[-1]}_spatial_distance.npy"
|
|
):
|
|
if num_node == 358:
|
|
with open(f"data/PEMS0{filename[-1]}/PEMS0{filename[-1]}.txt", "r") as f:
|
|
id_dict = {
|
|
int(i): idx for idx, i in enumerate(f.read().strip().split("\n"))
|
|
} # 建立映射列表
|
|
# 使用 pandas 读取 CSV 文件,跳过标题行
|
|
df = pd.read_csv(filepath + file[1], skiprows=1, header=None)
|
|
dist_matrix = np.zeros((num_node, num_node)) + float("inf")
|
|
for _, row in df.iterrows():
|
|
start = int(id_dict[row[0]])
|
|
end = int(id_dict[row[1]])
|
|
dist_matrix[start][end] = float(row[2])
|
|
dist_matrix[end][start] = float(row[2])
|
|
np.save(
|
|
f"data/PEMS0{filename[-1]}/PEMS0{filename[-1]}_spatial_distance.npy",
|
|
dist_matrix,
|
|
)
|
|
else:
|
|
# 使用 pandas 读取 CSV 文件,跳过标题行
|
|
df = pd.read_csv(filepath + file[1], skiprows=1, header=None)
|
|
dist_matrix = np.zeros((num_node, num_node)) + float("inf")
|
|
for _, row in df.iterrows():
|
|
start = int(row[0])
|
|
end = int(row[1])
|
|
dist_matrix[start][end] = float(row[2])
|
|
dist_matrix[end][start] = float(row[2])
|
|
np.save(
|
|
f"data/PEMS0{filename[-1]}/PEMS0{filename[-1]}_spatial_distance.npy",
|
|
dist_matrix,
|
|
)
|
|
# normalization
|
|
std = np.std(dist_matrix[dist_matrix != float("inf")])
|
|
mean = np.mean(dist_matrix[dist_matrix != float("inf")])
|
|
dist_matrix = (dist_matrix - mean) / std
|
|
sigma = args["sigma2"]
|
|
sp_matrix = np.exp(-(dist_matrix**2) / sigma**2)
|
|
sp_matrix[sp_matrix < args["thres2"]] = 0
|
|
|
|
return get_normalized_adj(dtw_matrix).to(args["device"]), get_normalized_adj(
|
|
sp_matrix
|
|
).to(args["device"])
|
|
|
|
|
|
def get_normalized_adj(A):
|
|
"""
|
|
Returns a tensor, the degree normalized adjacency matrix.
|
|
"""
|
|
alpha = 0.8
|
|
D = np.array(np.sum(A, axis=1)).reshape((-1,))
|
|
D[D <= 10e-5] = 10e-5 # Prevent infs
|
|
diag = np.reciprocal(np.sqrt(D))
|
|
A_wave = np.multiply(np.multiply(diag.reshape((-1, 1)), A), diag.reshape((1, -1)))
|
|
A_reg = alpha / 2 * (np.eye(A.shape[0]) + A_wave)
|
|
return torch.from_numpy(A_reg.astype(np.float32))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if __name__ == "__main__":
|
|
config = {
|
|
"sigma1": 0.1,
|
|
"sigma2": 10,
|
|
"thres1": 0.6,
|
|
"thres2": 0.5,
|
|
"device": "cuda:0" if torch.cuda.is_available() else "cpu",
|
|
}
|
|
|
|
for nodes in [358, 170, 883]:
|
|
args = {"num_nodes": nodes, **config}
|
|
get_A_hat(args)
|