兼容BJTaxi
This commit is contained in:
parent
b7ea73bc92
commit
475a4788cd
|
|
@ -4,13 +4,6 @@
|
||||||
// 欲了解更多信息,请访问: https://go.microsoft.com/fwlink/?linkid=830387
|
// 欲了解更多信息,请访问: https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
"version": "0.2.0",
|
"version": "0.2.0",
|
||||||
"configurations": [
|
"configurations": [
|
||||||
{
|
|
||||||
"name": "Python Debugger: Current File",
|
|
||||||
"type": "debugpy",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${file}",
|
|
||||||
"console": "integratedTerminal"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"name": "STID_PEMS-BAY",
|
"name": "STID_PEMS-BAY",
|
||||||
"type": "debugpy",
|
"type": "debugpy",
|
||||||
|
|
@ -35,6 +28,14 @@
|
||||||
"console": "integratedTerminal",
|
"console": "integratedTerminal",
|
||||||
"args": "--config ./config/REPST/PEMSD8.yaml"
|
"args": "--config ./config/REPST/PEMSD8.yaml"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "REPST-BJTaxi-InFlow",
|
||||||
|
"type": "debugpy",
|
||||||
|
"request": "launch",
|
||||||
|
"program": "run.py",
|
||||||
|
"console": "integratedTerminal",
|
||||||
|
"args": "--config ./config/REPST/BJTaxi-Inflow.yaml"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "REPST-PEMSBAY",
|
"name": "REPST-PEMSBAY",
|
||||||
"type": "debugpy",
|
"type": "debugpy",
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,60 @@
|
||||||
|
basic:
|
||||||
|
dataset: "BJTaxi-InFlow"
|
||||||
|
mode : "train"
|
||||||
|
device : "cuda:0"
|
||||||
|
model: "REPST"
|
||||||
|
seed: 2023
|
||||||
|
|
||||||
|
data:
|
||||||
|
add_day_in_week: false
|
||||||
|
add_time_in_day: false
|
||||||
|
column_wise: false
|
||||||
|
days_per_week: 7
|
||||||
|
default_graph: true
|
||||||
|
horizon: 24
|
||||||
|
lag: 24
|
||||||
|
normalizer: std
|
||||||
|
num_nodes: 1024
|
||||||
|
steps_per_day: 48
|
||||||
|
test_ratio: 0.2
|
||||||
|
tod: false
|
||||||
|
val_ratio: 0.2
|
||||||
|
sample: 1
|
||||||
|
input_dim: 1
|
||||||
|
batch_size: 16
|
||||||
|
|
||||||
|
model:
|
||||||
|
pred_len: 24
|
||||||
|
seq_len: 24
|
||||||
|
patch_len: 6
|
||||||
|
stride: 7
|
||||||
|
dropout: 0.2
|
||||||
|
gpt_layers: 9
|
||||||
|
d_ff: 128
|
||||||
|
gpt_path: ./GPT-2
|
||||||
|
d_model: 64
|
||||||
|
n_heads: 1
|
||||||
|
input_dim: 1
|
||||||
|
word_num: 1000
|
||||||
|
|
||||||
|
train:
|
||||||
|
batch_size: 16
|
||||||
|
early_stop: true
|
||||||
|
early_stop_patience: 15
|
||||||
|
epochs: 100
|
||||||
|
grad_norm: false
|
||||||
|
loss_func: mae
|
||||||
|
lr_decay: true
|
||||||
|
lr_decay_rate: 0.3
|
||||||
|
lr_decay_step: "5,20,40,70"
|
||||||
|
lr_init: 0.003
|
||||||
|
max_grad_norm: 5
|
||||||
|
real_value: true
|
||||||
|
weight_decay: 0
|
||||||
|
debug: false
|
||||||
|
output_dim: 1
|
||||||
|
log_step: 100
|
||||||
|
plot: false
|
||||||
|
mae_thresh: None
|
||||||
|
mape_thresh: 0.001
|
||||||
|
|
||||||
|
|
@ -55,6 +55,10 @@ def load_st_dataset(config):
|
||||||
case "SD":
|
case "SD":
|
||||||
data_path = os.path.join("./data/SD/data.npz")
|
data_path = os.path.join("./data/SD/data.npz")
|
||||||
data = np.load(data_path)["data"][:, :, 0].astype(np.float32)
|
data = np.load(data_path)["data"][:, :, 0].astype(np.float32)
|
||||||
|
case "BJTaxi-InFlow":
|
||||||
|
data = read_BeijingTaxi()[:, :, 0:1].astype(np.float32)
|
||||||
|
case "BJTaxi-OutFlow":
|
||||||
|
data = read_BeijingTaxi()[:, :, 1:2].astype(np.float32)
|
||||||
case _:
|
case _:
|
||||||
raise ValueError(f"Unsupported dataset: {dataset}")
|
raise ValueError(f"Unsupported dataset: {dataset}")
|
||||||
|
|
||||||
|
|
@ -65,3 +69,16 @@ def load_st_dataset(config):
|
||||||
print("加载 %s 数据集中... " % dataset)
|
print("加载 %s 数据集中... " % dataset)
|
||||||
# return data[::sample]
|
# return data[::sample]
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
def read_BeijingTaxi():
|
||||||
|
files = ["TaxiBJ2013.npy", "TaxiBJ2014.npy", "TaxiBJ2015.npy",
|
||||||
|
"TaxiBJ2016_1.npy", "TaxiBJ2016_2.npy"]
|
||||||
|
all_data = []
|
||||||
|
for file in files:
|
||||||
|
data_path = os.path.join(f"./data/BeijingTaxi/{file}")
|
||||||
|
data = np.load(data_path)
|
||||||
|
all_data.append(data)
|
||||||
|
all_data = np.concatenate(all_data, axis=0)
|
||||||
|
time_num = all_data.shape[0]
|
||||||
|
all_data = all_data.transpose(0, 2, 3, 1).reshape(time_num, 32*32, 2)
|
||||||
|
return all_data
|
||||||
|
|
@ -4,12 +4,8 @@ from tqdm import tqdm
|
||||||
import kagglehub
|
import kagglehub
|
||||||
import py7zr
|
import py7zr
|
||||||
|
|
||||||
# ---------- 1. 加载结构 JSON ----------
|
|
||||||
def load_structure_json(path="utils/dataset.json"):
|
|
||||||
with open(path, "r", encoding="utf-8") as f:
|
|
||||||
return json.load(f)
|
|
||||||
|
|
||||||
# ---------- 2. 检测完整性 ----------
|
# ---------- 1. 检测完整性 ----------
|
||||||
def detect_data_integrity(data_dir, expected):
|
def detect_data_integrity(data_dir, expected):
|
||||||
missing_list = []
|
missing_list = []
|
||||||
if not os.path.isdir(data_dir):
|
if not os.path.isdir(data_dir):
|
||||||
|
|
@ -53,7 +49,7 @@ def detect_data_integrity(data_dir, expected):
|
||||||
# print(f"缺失数据集:{missing_list}")
|
# print(f"缺失数据集:{missing_list}")
|
||||||
return missing_list
|
return missing_list
|
||||||
|
|
||||||
# ---------- 3. 下载 7z 并解压 ----------
|
# ---------- 2. 下载 7z 并解压 ----------
|
||||||
def download_and_extract(url, dst_dir, max_retries=3):
|
def download_and_extract(url, dst_dir, max_retries=3):
|
||||||
os.makedirs(dst_dir, exist_ok=True)
|
os.makedirs(dst_dir, exist_ok=True)
|
||||||
filename = os.path.basename(urlsplit(url).path) or "download.7z"
|
filename = os.path.basename(urlsplit(url).path) or "download.7z"
|
||||||
|
|
@ -77,7 +73,7 @@ def download_and_extract(url, dst_dir, max_retries=3):
|
||||||
if attempt==max_retries: raise RuntimeError("下载或解压失败")
|
if attempt==max_retries: raise RuntimeError("下载或解压失败")
|
||||||
print("错误,重试中...", e)
|
print("错误,重试中...", e)
|
||||||
|
|
||||||
# ---------- 4. 下载 Kaggle 数据 ----------
|
# ---------- 3. 下载 Kaggle 数据 ----------
|
||||||
def download_kaggle_data(base_dir, dataset):
|
def download_kaggle_data(base_dir, dataset):
|
||||||
try:
|
try:
|
||||||
print(f"Downloading kaggle dataset : {dataset}")
|
print(f"Downloading kaggle dataset : {dataset}")
|
||||||
|
|
@ -86,7 +82,7 @@ def download_kaggle_data(base_dir, dataset):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Kaggle 下载失败:", dataset, e)
|
print("Kaggle 下载失败:", dataset, e)
|
||||||
|
|
||||||
# ---------- 5. 下载 GitHub 数据 ----------
|
# ---------- 4. 下载 GitHub 数据 ----------
|
||||||
def download_github_data(file_path, save_dir):
|
def download_github_data(file_path, save_dir):
|
||||||
if not os.path.exists(save_dir):
|
if not os.path.exists(save_dir):
|
||||||
os.makedirs(save_dir)
|
os.makedirs(save_dir)
|
||||||
|
|
@ -136,13 +132,13 @@ def rearrange_dir():
|
||||||
|
|
||||||
# ---------- 6. 主流程 ----------
|
# ---------- 6. 主流程 ----------
|
||||||
def check_and_download_data():
|
def check_and_download_data():
|
||||||
|
# 加载结构文件,检测缺失数据集
|
||||||
cwd = os.getcwd()
|
cwd = os.getcwd()
|
||||||
data_dir = os.path.join(cwd,"data")
|
data_dir = os.path.join(cwd,"data")
|
||||||
file_tree = load_structure_json()
|
with open("utils/dataset.json", "r", encoding="utf-8") as f:
|
||||||
|
file_tree = json.load(f)
|
||||||
# 执行一次检测,获取所有缺失项
|
|
||||||
missing_list = detect_data_integrity(data_dir, file_tree)
|
missing_list = detect_data_integrity(data_dir, file_tree)
|
||||||
print(f"缺失数据集:{missing_list}")
|
# print(f"缺失数据集:{missing_list}")
|
||||||
|
|
||||||
# 检查并下载adj数据
|
# 检查并下载adj数据
|
||||||
if "adj" in missing_list:
|
if "adj" in missing_list:
|
||||||
|
|
@ -167,7 +163,6 @@ def check_and_download_data():
|
||||||
missing_list = detect_data_integrity(data_dir, file_tree)
|
missing_list = detect_data_integrity(data_dir, file_tree)
|
||||||
|
|
||||||
# 检查并下载pems, bay, metr-la, solar-energy数据
|
# 检查并下载pems, bay, metr-la, solar-energy数据
|
||||||
# 定义数据集名称到Kaggle数据集的映射
|
|
||||||
kaggle_map = {
|
kaggle_map = {
|
||||||
"PEMS03": "elmahy/pems-dataset",
|
"PEMS03": "elmahy/pems-dataset",
|
||||||
"PEMS04": "elmahy/pems-dataset",
|
"PEMS04": "elmahy/pems-dataset",
|
||||||
|
|
@ -178,7 +173,6 @@ def check_and_download_data():
|
||||||
"SolarEnergy": "wangshaoqi/solar-energy"
|
"SolarEnergy": "wangshaoqi/solar-energy"
|
||||||
}
|
}
|
||||||
|
|
||||||
# 检查是否有需要从Kaggle下载的数据集
|
|
||||||
# 先对kaggle下载地址进行去重,避免重复下载相同的数据集
|
# 先对kaggle下载地址进行去重,避免重复下载相同的数据集
|
||||||
downloaded_kaggle_datasets = set()
|
downloaded_kaggle_datasets = set()
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue