40 lines
1.1 KiB
Python
40 lines
1.1 KiB
Python
if __name__ == "__main__":
|
|
import os
|
|
import subprocess
|
|
|
|
# Kaggle 数据集列表
|
|
datasets = {
|
|
"hangzhou_taxi": "changyuheng/hz-taxi",
|
|
"nyc_taxi": "new-york-city/nyc-taxi-trip-duration",
|
|
"hangzhou_bike": "changyuheng/hz-bike",
|
|
}
|
|
|
|
# 下载保存目录
|
|
save_dir = "./datasets"
|
|
os.makedirs(save_dir, exist_ok=True)
|
|
|
|
# 检查 Kaggle API 配置
|
|
kaggle_json = os.path.expanduser("~/.kaggle/kaggle.json")
|
|
if not os.path.exists(kaggle_json):
|
|
raise FileNotFoundError(
|
|
f"未找到 {kaggle_json},请先在 Kaggle 设置中下载并放置 API Key。"
|
|
)
|
|
|
|
# 循环下载
|
|
for name, kaggle_id in datasets.items():
|
|
print(f"📥 正在下载 {name} ({kaggle_id}) ...")
|
|
cmd = [
|
|
"kaggle",
|
|
"datasets",
|
|
"download",
|
|
"-d",
|
|
kaggle_id,
|
|
"-p",
|
|
os.path.join(save_dir, name),
|
|
"--unzip",
|
|
]
|
|
subprocess.run(cmd, check=True)
|
|
print(f"✅ {name} 下载完成\n")
|
|
|
|
print("🎉 所有数据集已下载到", save_dir)
|