32 lines
1.0 KiB
Python
32 lines
1.0 KiB
Python
if __name__ == '__main__':
|
|
import os
|
|
import subprocess
|
|
|
|
# Kaggle 数据集列表
|
|
datasets = {
|
|
"hangzhou_taxi": "changyuheng/hz-taxi",
|
|
"nyc_taxi": "new-york-city/nyc-taxi-trip-duration",
|
|
"hangzhou_bike": "changyuheng/hz-bike"
|
|
}
|
|
|
|
# 下载保存目录
|
|
save_dir = "./datasets"
|
|
os.makedirs(save_dir, exist_ok=True)
|
|
|
|
# 检查 Kaggle API 配置
|
|
kaggle_json = os.path.expanduser("~/.kaggle/kaggle.json")
|
|
if not os.path.exists(kaggle_json):
|
|
raise FileNotFoundError(f"未找到 {kaggle_json},请先在 Kaggle 设置中下载并放置 API Key。")
|
|
|
|
# 循环下载
|
|
for name, kaggle_id in datasets.items():
|
|
print(f"📥 正在下载 {name} ({kaggle_id}) ...")
|
|
cmd = [
|
|
"kaggle", "datasets", "download", "-d", kaggle_id,
|
|
"-p", os.path.join(save_dir, name), "--unzip"
|
|
]
|
|
subprocess.run(cmd, check=True)
|
|
print(f"✅ {name} 下载完成\n")
|
|
|
|
print("🎉 所有数据集已下载到", save_dir)
|