TrafficWheel/lib/dl_other_dataset.py

40 lines
1.1 KiB
Python

if __name__ == "__main__":
import os
import subprocess
# Kaggle 数据集列表
datasets = {
"hangzhou_taxi": "changyuheng/hz-taxi",
"nyc_taxi": "new-york-city/nyc-taxi-trip-duration",
"hangzhou_bike": "changyuheng/hz-bike",
}
# 下载保存目录
save_dir = "./datasets"
os.makedirs(save_dir, exist_ok=True)
# 检查 Kaggle API 配置
kaggle_json = os.path.expanduser("~/.kaggle/kaggle.json")
if not os.path.exists(kaggle_json):
raise FileNotFoundError(
f"未找到 {kaggle_json},请先在 Kaggle 设置中下载并放置 API Key。"
)
# 循环下载
for name, kaggle_id in datasets.items():
print(f"📥 正在下载 {name} ({kaggle_id}) ...")
cmd = [
"kaggle",
"datasets",
"download",
"-d",
kaggle_id,
"-p",
os.path.join(save_dir, name),
"--unzip",
]
subprocess.run(cmd, check=True)
print(f"{name} 下载完成\n")
print("🎉 所有数据集已下载到", save_dir)