if __name__ == '__main__': import os import subprocess # Kaggle 数据集列表 datasets = { "hangzhou_taxi": "changyuheng/hz-taxi", "nyc_taxi": "new-york-city/nyc-taxi-trip-duration", "hangzhou_bike": "changyuheng/hz-bike" } # 下载保存目录 save_dir = "./datasets" os.makedirs(save_dir, exist_ok=True) # 检查 Kaggle API 配置 kaggle_json = os.path.expanduser("~/.kaggle/kaggle.json") if not os.path.exists(kaggle_json): raise FileNotFoundError(f"未找到 {kaggle_json},请先在 Kaggle 设置中下载并放置 API Key。") # 循环下载 for name, kaggle_id in datasets.items(): print(f"📥 正在下载 {name} ({kaggle_id}) ...") cmd = [ "kaggle", "datasets", "download", "-d", kaggle_id, "-p", os.path.join(save_dir, name), "--unzip" ] subprocess.run(cmd, check=True) print(f"✅ {name} 下载完成\n") print("🎉 所有数据集已下载到", save_dir)