TrafficWheel/lib/dl_other_dataset.py

32 lines
1.0 KiB
Python

if __name__ == '__main__':
import os
import subprocess
# Kaggle 数据集列表
datasets = {
"hangzhou_taxi": "changyuheng/hz-taxi",
"nyc_taxi": "new-york-city/nyc-taxi-trip-duration",
"hangzhou_bike": "changyuheng/hz-bike"
}
# 下载保存目录
save_dir = "./datasets"
os.makedirs(save_dir, exist_ok=True)
# 检查 Kaggle API 配置
kaggle_json = os.path.expanduser("~/.kaggle/kaggle.json")
if not os.path.exists(kaggle_json):
raise FileNotFoundError(f"未找到 {kaggle_json},请先在 Kaggle 设置中下载并放置 API Key。")
# 循环下载
for name, kaggle_id in datasets.items():
print(f"📥 正在下载 {name} ({kaggle_id}) ...")
cmd = [
"kaggle", "datasets", "download", "-d", kaggle_id,
"-p", os.path.join(save_dir, name), "--unzip"
]
subprocess.run(cmd, check=True)
print(f"{name} 下载完成\n")
print("🎉 所有数据集已下载到", save_dir)