Spaces:
Paused
Paused
lanny xu
commited on
Commit
·
e427a94
1
Parent(s):
69629dd
delete files
Browse files- KAGGLE_CHECK_OLLAMA.py +0 -181
- KAGGLE_FIX_OLLAMA_CONNECTION.py +0 -233
- KAGGLE_LOAD_OLLAMA.py +0 -268
- KAGGLE_QUICK_START.py +0 -197
- KAGGLE_SAVE_OLLAMA.py +0 -282
KAGGLE_CHECK_OLLAMA.py
DELETED
|
@@ -1,181 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Kaggle Ollama 备份与加载 - 快速验证脚本
|
| 3 |
-
|
| 4 |
-
这个脚本帮助你验证 Ollama 和模型的位置,确保备份方案正确
|
| 5 |
-
|
| 6 |
-
在 Kaggle Notebook 中运行此脚本,检查环境
|
| 7 |
-
"""
|
| 8 |
-
|
| 9 |
-
import os
|
| 10 |
-
import subprocess
|
| 11 |
-
import shutil
|
| 12 |
-
|
| 13 |
-
print("="*70)
|
| 14 |
-
print("🔍 Kaggle Ollama 环境检查")
|
| 15 |
-
print("="*70)
|
| 16 |
-
|
| 17 |
-
# ==================== 检查 Ollama 安装 ====================
|
| 18 |
-
print("\n📍 步骤 1: 检查 Ollama 安装位置")
|
| 19 |
-
|
| 20 |
-
ollama_bin = shutil.which('ollama')
|
| 21 |
-
if ollama_bin:
|
| 22 |
-
print(f" ✅ Ollama 已安装")
|
| 23 |
-
print(f" 📂 位置: {ollama_bin}")
|
| 24 |
-
|
| 25 |
-
# 检查文件信息
|
| 26 |
-
file_size = os.path.getsize(ollama_bin) / (1024**2)
|
| 27 |
-
print(f" 📊 大小: {file_size:.2f} MB")
|
| 28 |
-
|
| 29 |
-
# 检查版本
|
| 30 |
-
version_result = subprocess.run(['ollama', '--version'], capture_output=True, text=True)
|
| 31 |
-
if version_result.returncode == 0:
|
| 32 |
-
print(f" 📌 版本: {version_result.stdout.strip()}")
|
| 33 |
-
else:
|
| 34 |
-
print(" ❌ Ollama 未安装")
|
| 35 |
-
print(" 💡 请先运行安装:")
|
| 36 |
-
print(" !curl -fsSL https://ollama.com/install.sh | sh")
|
| 37 |
-
|
| 38 |
-
# ==================== 检查 Ollama 服务 ====================
|
| 39 |
-
print("\n📍 步骤 2: 检查 Ollama 服务状态")
|
| 40 |
-
|
| 41 |
-
ps_check = subprocess.run(['pgrep', '-f', 'ollama serve'], capture_output=True)
|
| 42 |
-
if ps_check.returncode == 0:
|
| 43 |
-
print(" ✅ Ollama 服务正在运行")
|
| 44 |
-
else:
|
| 45 |
-
print(" ⚠️ Ollama 服务未运行")
|
| 46 |
-
print(" 💡 请启动服务:")
|
| 47 |
-
print(" import subprocess, time")
|
| 48 |
-
print(" subprocess.Popen(['ollama', 'serve'])")
|
| 49 |
-
print(" time.sleep(15)")
|
| 50 |
-
|
| 51 |
-
# ==================== 检查模型位置 ====================
|
| 52 |
-
print("\n📍 步骤 3: 检查模型存储位置")
|
| 53 |
-
|
| 54 |
-
possible_dirs = [
|
| 55 |
-
"~/.ollama",
|
| 56 |
-
"/root/.ollama",
|
| 57 |
-
"~/.ollama/models",
|
| 58 |
-
"/root/.ollama/models"
|
| 59 |
-
]
|
| 60 |
-
|
| 61 |
-
found_dirs = []
|
| 62 |
-
for dir_path in possible_dirs:
|
| 63 |
-
expanded_path = os.path.expanduser(dir_path)
|
| 64 |
-
if os.path.exists(expanded_path):
|
| 65 |
-
# 计算目录大小
|
| 66 |
-
total_size = 0
|
| 67 |
-
file_count = 0
|
| 68 |
-
|
| 69 |
-
for dirpath, dirnames, filenames in os.walk(expanded_path):
|
| 70 |
-
for filename in filenames:
|
| 71 |
-
fp = os.path.join(dirpath, filename)
|
| 72 |
-
if os.path.exists(fp):
|
| 73 |
-
total_size += os.path.getsize(fp)
|
| 74 |
-
file_count += 1
|
| 75 |
-
|
| 76 |
-
size_gb = total_size / (1024**3)
|
| 77 |
-
print(f"\n ✅ 找到: {expanded_path}")
|
| 78 |
-
print(f" 📊 大小: {size_gb:.2f} GB")
|
| 79 |
-
print(f" 📁 文件数: {file_count}")
|
| 80 |
-
|
| 81 |
-
# 显示目录结构
|
| 82 |
-
print(f" 📂 内容:")
|
| 83 |
-
for item in os.listdir(expanded_path)[:10]: # 只显示前10个
|
| 84 |
-
item_path = os.path.join(expanded_path, item)
|
| 85 |
-
if os.path.isdir(item_path):
|
| 86 |
-
print(f" • {item}/ (目录)")
|
| 87 |
-
else:
|
| 88 |
-
size = os.path.getsize(item_path) / (1024**2)
|
| 89 |
-
print(f" • {item} ({size:.2f} MB)")
|
| 90 |
-
|
| 91 |
-
found_dirs.append((expanded_path, size_gb))
|
| 92 |
-
|
| 93 |
-
if not found_dirs:
|
| 94 |
-
print("\n ❌ 未找到模型目录")
|
| 95 |
-
print(" 💡 请先下载模型:")
|
| 96 |
-
print(" !ollama pull mistral")
|
| 97 |
-
|
| 98 |
-
# ==================== 检查已下载的模型 ====================
|
| 99 |
-
print("\n📍 步骤 4: 检查已下载的模型")
|
| 100 |
-
|
| 101 |
-
if ollama_bin and ps_check.returncode == 0:
|
| 102 |
-
list_result = subprocess.run(['ollama', 'list'], capture_output=True, text=True)
|
| 103 |
-
if list_result.returncode == 0:
|
| 104 |
-
print("\n 已下载的模型:")
|
| 105 |
-
print(" " + "-"*60)
|
| 106 |
-
print(" " + list_result.stdout)
|
| 107 |
-
else:
|
| 108 |
-
print(" ⚠️ 无法获取模型列表")
|
| 109 |
-
print(" 请确保 Ollama 服务正在运行")
|
| 110 |
-
else:
|
| 111 |
-
print(" ⚠️ Ollama 服务未运行,无法检查模型")
|
| 112 |
-
|
| 113 |
-
# ==================== 推荐备份方案 ====================
|
| 114 |
-
print("\n" + "="*70)
|
| 115 |
-
print("📋 推荐备份方案")
|
| 116 |
-
print("="*70)
|
| 117 |
-
|
| 118 |
-
if found_dirs:
|
| 119 |
-
# 选择最大的目录(通常是完整的 .ollama 目录)
|
| 120 |
-
backup_dir = max(found_dirs, key=lambda x: x[1])[0]
|
| 121 |
-
backup_size = max(found_dirs, key=lambda x: x[1])[1]
|
| 122 |
-
|
| 123 |
-
print(f"\n推荐备份目录: {backup_dir}")
|
| 124 |
-
print(f"预计压缩包大小: ~{backup_size:.2f} GB")
|
| 125 |
-
|
| 126 |
-
print(f"\n💾 备份步骤:")
|
| 127 |
-
print(f"""
|
| 128 |
-
1. 使用 KAGGLE_SAVE_OLLAMA.py 脚本
|
| 129 |
-
exec(open('KAGGLE_SAVE_OLLAMA.py').read())
|
| 130 |
-
|
| 131 |
-
2. 脚本会自动:
|
| 132 |
-
• 找到 Ollama 二进制文件: {ollama_bin if ollama_bin else '未找到'}
|
| 133 |
-
• 打包模型目录: {backup_dir}
|
| 134 |
-
• 生成压缩包: /kaggle/working/ollama_backup/
|
| 135 |
-
|
| 136 |
-
3. 下载并创建 Dataset:
|
| 137 |
-
• 在 Notebook 右侧 Output 下载 ollama_backup 目录
|
| 138 |
-
• 访问 https://www.kaggle.com/datasets 创建 Dataset
|
| 139 |
-
• 上传 ollama 和 ollama_models.tar.gz
|
| 140 |
-
|
| 141 |
-
4. 后续使用:
|
| 142 |
-
• 添加 Dataset 到 Notebook
|
| 143 |
-
• 运行 KAGGLE_LOAD_OLLAMA.py
|
| 144 |
-
• 40-50秒完成加载!
|
| 145 |
-
""")
|
| 146 |
-
|
| 147 |
-
# 估算上传时间
|
| 148 |
-
upload_time_min = int(backup_size * 2) # 假设 2 分钟/GB
|
| 149 |
-
upload_time_max = int(backup_size * 5) # 假设 5 分钟/GB
|
| 150 |
-
|
| 151 |
-
print(f"⏱️ 预计时间:")
|
| 152 |
-
print(f" • 压缩时间: {int(backup_size * 0.5)}-{int(backup_size)} 分钟")
|
| 153 |
-
print(f" • 下载时间: {int(backup_size * 1)}-{int(backup_size * 3)} 分钟(取决于网络)")
|
| 154 |
-
print(f" • 上传时间: {upload_time_min}-{upload_time_max} 分钟(取决于网络)")
|
| 155 |
-
print(f" • 首次总计: ~{int(backup_size * 4)}-{int(backup_size * 10)} 分钟(一次性)")
|
| 156 |
-
print(f" • 后续加载: 40-50 秒(每次)")
|
| 157 |
-
|
| 158 |
-
else:
|
| 159 |
-
print("\n⚠️ 未找到模型目录,无法提供备份方案")
|
| 160 |
-
print("请先安装 Ollama 并下载模型")
|
| 161 |
-
|
| 162 |
-
# ==================== 环境摘要 ====================
|
| 163 |
-
print("\n" + "="*70)
|
| 164 |
-
print("📊 环境摘要")
|
| 165 |
-
print("="*70)
|
| 166 |
-
|
| 167 |
-
print(f"""
|
| 168 |
-
Ollama 安装: {'✅ 是' if ollama_bin else '❌ 否'}
|
| 169 |
-
Ollama 服务: {'✅ 运行中' if ps_check.returncode == 0 else '❌ 未运行'}
|
| 170 |
-
模型目录: {'✅ 找到 ' + str(len(found_dirs)) + ' 个' if found_dirs else '❌ 未找到'}
|
| 171 |
-
已下载模型: {'✅ 有' if ollama_bin and ps_check.returncode == 0 else '⚠️ 无法确认'}
|
| 172 |
-
|
| 173 |
-
准备就绪: {'✅ 可以开始备份' if (ollama_bin and found_dirs) else '❌ 请先完成安装和模型下载'}
|
| 174 |
-
""")
|
| 175 |
-
|
| 176 |
-
if ollama_bin and found_dirs:
|
| 177 |
-
print("💡 下一步: 运行 KAGGLE_SAVE_OLLAMA.py 开始备份")
|
| 178 |
-
else:
|
| 179 |
-
print("💡 下一步: 完成 Ollama 安装和模型下载")
|
| 180 |
-
|
| 181 |
-
print("\n" + "="*70)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
KAGGLE_FIX_OLLAMA_CONNECTION.py
DELETED
|
@@ -1,233 +0,0 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
-
"""
|
| 3 |
-
Kaggle Ollama 连接问题诊断和修复脚本
|
| 4 |
-
解决 GraphRAG 异步处理时的连接错误
|
| 5 |
-
"""
|
| 6 |
-
|
| 7 |
-
import subprocess
|
| 8 |
-
import time
|
| 9 |
-
import requests
|
| 10 |
-
import os
|
| 11 |
-
|
| 12 |
-
def check_ollama_service():
|
| 13 |
-
"""检查 Ollama 服务状态"""
|
| 14 |
-
print("="*70)
|
| 15 |
-
print("🔍 Ollama 服务诊断")
|
| 16 |
-
print("="*70)
|
| 17 |
-
|
| 18 |
-
# 1. 检查进程
|
| 19 |
-
print("\n1️⃣ 检查 Ollama 进程...")
|
| 20 |
-
ps_check = subprocess.run(['pgrep', '-f', 'ollama serve'], capture_output=True)
|
| 21 |
-
|
| 22 |
-
if ps_check.returncode == 0:
|
| 23 |
-
print(" ✅ Ollama 进程正在运行")
|
| 24 |
-
pids = ps_check.stdout.decode().strip().split('\n')
|
| 25 |
-
print(f" 📊 进程 PID: {', '.join(pids)}")
|
| 26 |
-
else:
|
| 27 |
-
print(" ❌ Ollama 进程未运行")
|
| 28 |
-
return False
|
| 29 |
-
|
| 30 |
-
# 2. 检查端口
|
| 31 |
-
print("\n2️⃣ 检查端口 11434...")
|
| 32 |
-
port_check = subprocess.run(
|
| 33 |
-
['netstat', '-tuln'],
|
| 34 |
-
capture_output=True,
|
| 35 |
-
text=True
|
| 36 |
-
)
|
| 37 |
-
|
| 38 |
-
if '11434' in port_check.stdout:
|
| 39 |
-
print(" ✅ 端口 11434 已监听")
|
| 40 |
-
else:
|
| 41 |
-
print(" ❌ 端口 11434 未监听")
|
| 42 |
-
return False
|
| 43 |
-
|
| 44 |
-
# 3. 测试 API 连接
|
| 45 |
-
print("\n3️⃣ 测试 API 连接...")
|
| 46 |
-
try:
|
| 47 |
-
response = requests.get('http://localhost:11434/api/tags', timeout=5)
|
| 48 |
-
if response.status_code == 200:
|
| 49 |
-
print(" ✅ API 连接正常")
|
| 50 |
-
models = response.json().get('models', [])
|
| 51 |
-
print(f" 📦 可用模型: {len(models)}")
|
| 52 |
-
for model in models:
|
| 53 |
-
print(f" • {model.get('name', 'unknown')}")
|
| 54 |
-
return True
|
| 55 |
-
else:
|
| 56 |
-
print(f" ❌ API 返回错误: {response.status_code}")
|
| 57 |
-
return False
|
| 58 |
-
except Exception as e:
|
| 59 |
-
print(f" ❌ API 连接失败: {e}")
|
| 60 |
-
return False
|
| 61 |
-
|
| 62 |
-
def start_ollama_service():
|
| 63 |
-
"""启动 Ollama 服务"""
|
| 64 |
-
print("\n"+"="*70)
|
| 65 |
-
print("🚀 启动 Ollama 服务")
|
| 66 |
-
print("="*70)
|
| 67 |
-
|
| 68 |
-
# 先杀死可能存在的僵尸进程
|
| 69 |
-
print("\n1️⃣ 清理旧进程...")
|
| 70 |
-
subprocess.run(['pkill', '-9', 'ollama'], capture_output=True)
|
| 71 |
-
time.sleep(2)
|
| 72 |
-
|
| 73 |
-
# 启动服务
|
| 74 |
-
print("\n2️⃣ 启动新服务...")
|
| 75 |
-
process = subprocess.Popen(
|
| 76 |
-
['ollama', 'serve'],
|
| 77 |
-
stdout=subprocess.PIPE,
|
| 78 |
-
stderr=subprocess.PIPE,
|
| 79 |
-
env=os.environ.copy()
|
| 80 |
-
)
|
| 81 |
-
|
| 82 |
-
print(f" ✅ 服务进程已启动 (PID: {process.pid})")
|
| 83 |
-
|
| 84 |
-
# 等待服务就绪
|
| 85 |
-
print("\n3️⃣ 等待服务就绪...")
|
| 86 |
-
max_wait = 30
|
| 87 |
-
for i in range(max_wait):
|
| 88 |
-
try:
|
| 89 |
-
response = requests.get('http://localhost:11434/api/tags', timeout=2)
|
| 90 |
-
if response.status_code == 200:
|
| 91 |
-
print(f" ✅ 服务就绪!(耗时 {i+1} 秒)")
|
| 92 |
-
return True
|
| 93 |
-
except:
|
| 94 |
-
pass
|
| 95 |
-
|
| 96 |
-
if i < max_wait - 1:
|
| 97 |
-
print(f" ⏳ 等待中... ({i+1}/{max_wait})", end='\r')
|
| 98 |
-
time.sleep(1)
|
| 99 |
-
|
| 100 |
-
print(f"\n ⚠️ 服务启动超时,但可能仍在初始化中")
|
| 101 |
-
return False
|
| 102 |
-
|
| 103 |
-
def test_generation():
|
| 104 |
-
"""测试生成功能"""
|
| 105 |
-
print("\n"+"="*70)
|
| 106 |
-
print("🧪 测试文本生成")
|
| 107 |
-
print("="*70)
|
| 108 |
-
|
| 109 |
-
print("\n ℹ️ 首次调用会加载模型到内存,需要 30-60 秒...")
|
| 110 |
-
print(" ⏳ 请耐心等待...\n")
|
| 111 |
-
|
| 112 |
-
try:
|
| 113 |
-
response = requests.post(
|
| 114 |
-
'http://localhost:11434/api/generate',
|
| 115 |
-
json={
|
| 116 |
-
"model": "mistral",
|
| 117 |
-
"prompt": "Say 'Hello' in one word",
|
| 118 |
-
"stream": False
|
| 119 |
-
},
|
| 120 |
-
timeout=120 # 增加到 120 秒,首次加载模型需要时间
|
| 121 |
-
)
|
| 122 |
-
|
| 123 |
-
if response.status_code == 200:
|
| 124 |
-
result = response.json()
|
| 125 |
-
print(f" ✅ 生成成功")
|
| 126 |
-
print(f" 📝 响应: {result.get('response', '')[:100]}")
|
| 127 |
-
return True
|
| 128 |
-
else:
|
| 129 |
-
print(f" ❌ 生成失败: {response.status_code}")
|
| 130 |
-
return False
|
| 131 |
-
except requests.exceptions.Timeout:
|
| 132 |
-
print(f" ⚠️ 生成超时(但这可能是模型加载中)")
|
| 133 |
-
print(f" 💡 建议:再等待 30 秒后重试")
|
| 134 |
-
return False
|
| 135 |
-
except Exception as e:
|
| 136 |
-
print(f" ❌ 生成错误: {e}")
|
| 137 |
-
return False
|
| 138 |
-
|
| 139 |
-
def main():
|
| 140 |
-
"""主函数"""
|
| 141 |
-
print("\n" + "="*70)
|
| 142 |
-
print("🔧 Kaggle Ollama 连接问题修复工具")
|
| 143 |
-
print("="*70)
|
| 144 |
-
print("\n解决问题: Cannot connect to host localhost:11434")
|
| 145 |
-
print("场景: GraphRAG 异步批处理时")
|
| 146 |
-
|
| 147 |
-
# 检查服务
|
| 148 |
-
is_running = check_ollama_service()
|
| 149 |
-
|
| 150 |
-
if not is_running:
|
| 151 |
-
print("\n⚠️ Ollama 服务未正常运行,正在修复...")
|
| 152 |
-
start_ollama_service()
|
| 153 |
-
|
| 154 |
-
# 再次检查
|
| 155 |
-
print("\n"+"="*70)
|
| 156 |
-
print("🔍 验证修复结果")
|
| 157 |
-
print("="*70)
|
| 158 |
-
is_running = check_ollama_service()
|
| 159 |
-
|
| 160 |
-
# 测试生成
|
| 161 |
-
if is_running:
|
| 162 |
-
test_generation()
|
| 163 |
-
|
| 164 |
-
# 输出建议
|
| 165 |
-
print("\n"+"="*70)
|
| 166 |
-
print("💡 使用建议")
|
| 167 |
-
print("="*70)
|
| 168 |
-
|
| 169 |
-
if is_running:
|
| 170 |
-
if test_generation():
|
| 171 |
-
print("""
|
| 172 |
-
✅ Ollama 服务完全就绪!现在可以运行 GraphRAG 了
|
| 173 |
-
|
| 174 |
-
📝 在 Kaggle Notebook 中运行:
|
| 175 |
-
|
| 176 |
-
from document_processor import DocumentProcessor
|
| 177 |
-
from graph_indexer import GraphRAGIndexer
|
| 178 |
-
|
| 179 |
-
# 初始化
|
| 180 |
-
processor = DocumentProcessor()
|
| 181 |
-
vectorstore, retriever, doc_splits = processor.setup_knowledge_base(
|
| 182 |
-
enable_graphrag=True
|
| 183 |
-
)
|
| 184 |
-
|
| 185 |
-
# GraphRAG 索引(异步处理)
|
| 186 |
-
indexer = GraphRAGIndexer(
|
| 187 |
-
enable_async=True, # 启用异步
|
| 188 |
-
async_batch_size=8 # 并发处理 8 个文档
|
| 189 |
-
)
|
| 190 |
-
|
| 191 |
-
graph = indexer.index_documents(doc_splits)
|
| 192 |
-
""")
|
| 193 |
-
else:
|
| 194 |
-
print("""
|
| 195 |
-
⚠️ Ollama 服务运行中,但模型可能还在加载
|
| 196 |
-
|
| 197 |
-
💡 解决方案:
|
| 198 |
-
|
| 199 |
-
1. 等待 30-60 秒让模型完全加载
|
| 200 |
-
2. 再次运行此脚本验证
|
| 201 |
-
3. 或者直接运行一次简单测试:
|
| 202 |
-
!curl http://localhost:11434/api/generate -d '{
|
| 203 |
-
"model": "mistral",
|
| 204 |
-
"prompt": "Hello",
|
| 205 |
-
"stream": false
|
| 206 |
-
}'
|
| 207 |
-
|
| 208 |
-
4. 如果上述测试成功,就可以运行 GraphRAG 了
|
| 209 |
-
""")
|
| 210 |
-
else:
|
| 211 |
-
print("""
|
| 212 |
-
❌ Ollama 服务仍然异常
|
| 213 |
-
|
| 214 |
-
🔧 手动修复步骤:
|
| 215 |
-
|
| 216 |
-
1. 在 Kaggle Notebook 新单元格运行:
|
| 217 |
-
!pkill -9 ollama
|
| 218 |
-
!ollama serve &
|
| 219 |
-
|
| 220 |
-
2. 等待 15 秒后,运行:
|
| 221 |
-
!curl http://localhost:11434/api/tags
|
| 222 |
-
|
| 223 |
-
3. 如果成功,重新运行此脚本验证
|
| 224 |
-
|
| 225 |
-
4. 如果失败,检查 Ollama 是否正确安装:
|
| 226 |
-
!which ollama
|
| 227 |
-
!ollama --version
|
| 228 |
-
""")
|
| 229 |
-
|
| 230 |
-
print("="*70)
|
| 231 |
-
|
| 232 |
-
if __name__ == "__main__":
|
| 233 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
KAGGLE_LOAD_OLLAMA.py
DELETED
|
@@ -1,268 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Kaggle Ollama 加载脚本
|
| 3 |
-
从 Kaggle Dataset 快速加载 Ollama 和模型,无需重新下载
|
| 4 |
-
|
| 5 |
-
前置条件:
|
| 6 |
-
1. 已使用 KAGGLE_SAVE_OLLAMA.py 创建备份
|
| 7 |
-
2. 已在 Kaggle 上传 Dataset
|
| 8 |
-
3. 已在 Notebook 中添加该 Dataset
|
| 9 |
-
|
| 10 |
-
使用方法:
|
| 11 |
-
在 Kaggle Notebook 第一个单元格运行:
|
| 12 |
-
exec(open('/kaggle/working/adaptive_RAG/KAGGLE_LOAD_OLLAMA.py').read())
|
| 13 |
-
"""
|
| 14 |
-
|
| 15 |
-
import os
|
| 16 |
-
import subprocess
|
| 17 |
-
import tarfile
|
| 18 |
-
import shutil
|
| 19 |
-
import time
|
| 20 |
-
|
| 21 |
-
print("="*70)
|
| 22 |
-
print("📦 从 Dataset 加载 Ollama(快速启动)")
|
| 23 |
-
print("="*70)
|
| 24 |
-
|
| 25 |
-
# ==================== 配置 ====================
|
| 26 |
-
# 修改为你的 Dataset 名称
|
| 27 |
-
# 常见名称: ollama-mistral-backup, ollama-phi-backup, ollama-backup 等
|
| 28 |
-
DATASET_NAME = "ollama-mistral-backup" # 👈 修改这里为你的实际 Dataset 名称
|
| 29 |
-
DATASET_PATH = f"/kaggle/input/{DATASET_NAME}"
|
| 30 |
-
|
| 31 |
-
print(f"💡 提示: 如果 Dataset 不存在,请检查:")
|
| 32 |
-
print(f" 1. Dataset 是否已添加到 Notebook")
|
| 33 |
-
print(f" 2. Dataset 名称是否正确")
|
| 34 |
-
print(f" 3. 可用的 Datasets:")
|
| 35 |
-
import os
|
| 36 |
-
if os.path.exists("/kaggle/input"):
|
| 37 |
-
available = os.listdir("/kaggle/input")
|
| 38 |
-
if available:
|
| 39 |
-
for ds in available:
|
| 40 |
-
print(f" • {ds}")
|
| 41 |
-
else:
|
| 42 |
-
print(f" (无)")
|
| 43 |
-
print()
|
| 44 |
-
|
| 45 |
-
print(f"\n📋 配置:")
|
| 46 |
-
print(f" Dataset 路径: {DATASET_PATH}")
|
| 47 |
-
|
| 48 |
-
# ==================== 检查 Dataset ====================
|
| 49 |
-
print(f"\n🔍 步骤 1/5: 检查 Dataset...")
|
| 50 |
-
|
| 51 |
-
if not os.path.exists(DATASET_PATH):
|
| 52 |
-
print(f" ❌ Dataset 不存在: {DATASET_PATH}")
|
| 53 |
-
print(f"\n💡 请检查:")
|
| 54 |
-
print(f" 1. Dataset 是否已添加到 Notebook")
|
| 55 |
-
print(f" 2. Dataset 名称是否正确")
|
| 56 |
-
print(f" 3. 可用的 Datasets:")
|
| 57 |
-
|
| 58 |
-
if os.path.exists("/kaggle/input"):
|
| 59 |
-
for item in os.listdir("/kaggle/input"):
|
| 60 |
-
print(f" • {item}")
|
| 61 |
-
|
| 62 |
-
print(f"\n📝 如何添加 Dataset:")
|
| 63 |
-
print(f" 1. 点击右侧 'Add data' 按钮")
|
| 64 |
-
print(f" 2. 选择 'Your Datasets'")
|
| 65 |
-
print(f" 3. 找到你的 ollama 备份 Dataset")
|
| 66 |
-
print(f" 4. 点击 'Add'")
|
| 67 |
-
|
| 68 |
-
exit(1)
|
| 69 |
-
|
| 70 |
-
print(f" ✅ Dataset 存在")
|
| 71 |
-
|
| 72 |
-
# 列出 Dataset 内容
|
| 73 |
-
print(f"\n Dataset 内容:")
|
| 74 |
-
for item in os.listdir(DATASET_PATH):
|
| 75 |
-
item_path = os.path.join(DATASET_PATH, item)
|
| 76 |
-
if os.path.isfile(item_path):
|
| 77 |
-
size = os.path.getsize(item_path)
|
| 78 |
-
size_str = f"{size / (1024**3):.2f} GB" if size > 1024**3 else f"{size / (1024**2):.2f} MB"
|
| 79 |
-
print(f" • {item}: {size_str}")
|
| 80 |
-
|
| 81 |
-
# ==================== 安装 Ollama 二进制文件 ====================
|
| 82 |
-
print(f"\n🔧 步骤 2/5: 安装 Ollama 二进制文件...")
|
| 83 |
-
|
| 84 |
-
ollama_bin_source = os.path.join(DATASET_PATH, "ollama")
|
| 85 |
-
|
| 86 |
-
if os.path.exists(ollama_bin_source):
|
| 87 |
-
# 先停止可能正在运行的 Ollama 服务
|
| 88 |
-
print(f" 🛑 检查并停止现有 Ollama 进程...")
|
| 89 |
-
subprocess.run(['pkill', '-9', 'ollama'], capture_output=True)
|
| 90 |
-
time.sleep(2)
|
| 91 |
-
|
| 92 |
-
# 复制到系统路径
|
| 93 |
-
ollama_bin_dest = "/usr/local/bin/ollama"
|
| 94 |
-
|
| 95 |
-
try:
|
| 96 |
-
shutil.copy2(ollama_bin_source, ollama_bin_dest)
|
| 97 |
-
|
| 98 |
-
# 设置执行权限
|
| 99 |
-
os.chmod(ollama_bin_dest, 0o755)
|
| 100 |
-
|
| 101 |
-
print(f" ✅ Ollama 已安装到: {ollama_bin_dest}")
|
| 102 |
-
|
| 103 |
-
# 验证版本
|
| 104 |
-
version_result = subprocess.run(['ollama', '--version'], capture_output=True, text=True)
|
| 105 |
-
if version_result.returncode == 0:
|
| 106 |
-
print(f" 📌 {version_result.stdout.strip()}")
|
| 107 |
-
except OSError as e:
|
| 108 |
-
if "Text file busy" in str(e):
|
| 109 |
-
print(f" ⚠️ 文件被占用,尝试强制停止...")
|
| 110 |
-
subprocess.run(['killall', '-9', 'ollama'], capture_output=True)
|
| 111 |
-
time.sleep(3)
|
| 112 |
-
# 重试
|
| 113 |
-
shutil.copy2(ollama_bin_source, ollama_bin_dest)
|
| 114 |
-
os.chmod(ollama_bin_dest, 0o755)
|
| 115 |
-
print(f" ✅ Ollama 已安装(重试成功)")
|
| 116 |
-
else:
|
| 117 |
-
raise
|
| 118 |
-
else:
|
| 119 |
-
print(f" ❌ 未找到 Ollama 二进制文件")
|
| 120 |
-
exit(1)
|
| 121 |
-
|
| 122 |
-
# ==================== 解压模型文件 ====================
|
| 123 |
-
print(f"\n📦 步骤 3/5: 恢复模型文件...")
|
| 124 |
-
|
| 125 |
-
models_archive = os.path.join(DATASET_PATH, "ollama_models.tar.gz")
|
| 126 |
-
ollama_home = os.path.expanduser("~")
|
| 127 |
-
|
| 128 |
-
# 检查是否有压缩包
|
| 129 |
-
if os.path.exists(models_archive):
|
| 130 |
-
# 情况1: 有压缩包,需要解压
|
| 131 |
-
print(f" 找到模型压缩包: {os.path.getsize(models_archive) / (1024**3):.2f} GB")
|
| 132 |
-
print(f" 📦 开始解压(这可能需要 10-30 秒)...")
|
| 133 |
-
|
| 134 |
-
start_time = time.time()
|
| 135 |
-
|
| 136 |
-
with tarfile.open(models_archive, 'r:gz') as tar:
|
| 137 |
-
tar.extractall(ollama_home) # 会自动创建 ~/.ollama 目录
|
| 138 |
-
|
| 139 |
-
elapsed = time.time() - start_time
|
| 140 |
-
print(f" ✅ 解压完成(耗时: {int(elapsed)}秒)")
|
| 141 |
-
|
| 142 |
-
else:
|
| 143 |
-
# 情况2: 没有压缩包,检查是否已解压
|
| 144 |
-
print(f" ⚠️ 未找到压缩包,检查是否有解压后的文件...")
|
| 145 |
-
|
| 146 |
-
# 检查常见的解压后文件/目录
|
| 147 |
-
possible_sources = [
|
| 148 |
-
os.path.join(DATASET_PATH, ".ollama"), # 直接在根目录
|
| 149 |
-
os.path.join(DATASET_PATH, "ollama_model", ".ollama"), # 在 ollama_model 文件夹内(嵌套结构)
|
| 150 |
-
os.path.join(DATASET_PATH, "ollama_models", ".ollama"), # 在 ollama_models 文件夹内
|
| 151 |
-
os.path.join(DATASET_PATH, "ollama"), # 备用路径
|
| 152 |
-
os.path.join(DATASET_PATH, "models") # 备用路径
|
| 153 |
-
]
|
| 154 |
-
|
| 155 |
-
found = False
|
| 156 |
-
for source in possible_sources:
|
| 157 |
-
if os.path.exists(source):
|
| 158 |
-
print(f" ✅ 找到解压后的目录: {source}")
|
| 159 |
-
|
| 160 |
-
# 确定目标目录
|
| 161 |
-
if source.endswith(".ollama"):
|
| 162 |
-
# 直接复制整个 .ollama 目录
|
| 163 |
-
dest = os.path.join(ollama_home, ".ollama")
|
| 164 |
-
else:
|
| 165 |
-
# 创建 .ollama/models 目录
|
| 166 |
-
dest = os.path.join(ollama_home, ".ollama", "models")
|
| 167 |
-
os.makedirs(os.path.dirname(dest), exist_ok=True)
|
| 168 |
-
|
| 169 |
-
print(f" 📋 复制到: {dest}")
|
| 170 |
-
|
| 171 |
-
# 复制文件
|
| 172 |
-
if os.path.isdir(source):
|
| 173 |
-
shutil.copytree(source, dest, dirs_exist_ok=True)
|
| 174 |
-
else:
|
| 175 |
-
shutil.copy2(source, dest)
|
| 176 |
-
|
| 177 |
-
found = True
|
| 178 |
-
break
|
| 179 |
-
|
| 180 |
-
if not found:
|
| 181 |
-
print(f" ❌ 未找到模型文件")
|
| 182 |
-
print(f"\n Dataset 内容:")
|
| 183 |
-
for item in os.listdir(DATASET_PATH):
|
| 184 |
-
print(f" • {item}")
|
| 185 |
-
exit(1)
|
| 186 |
-
|
| 187 |
-
# 检查模型目录
|
| 188 |
-
models_dir = os.path.join(ollama_home, ".ollama")
|
| 189 |
-
if os.path.exists(models_dir):
|
| 190 |
-
total_size = sum(
|
| 191 |
-
os.path.getsize(os.path.join(dirpath, filename))
|
| 192 |
-
for dirpath, dirnames, filenames in os.walk(models_dir)
|
| 193 |
-
for filename in filenames
|
| 194 |
-
)
|
| 195 |
-
print(f" 📊 模型总大小: {total_size / (1024**3):.2f} GB")
|
| 196 |
-
else:
|
| 197 |
-
print(f" ❌ 未找到模型压缩包")
|
| 198 |
-
exit(1)
|
| 199 |
-
|
| 200 |
-
# ==================== 启动 Ollama 服务 ====================
|
| 201 |
-
print(f"\n🚀 步骤 4/5: 启动 Ollama 服务...")
|
| 202 |
-
|
| 203 |
-
# 检查是否已运行
|
| 204 |
-
ps_check = subprocess.run(['pgrep', '-f', 'ollama serve'], capture_output=True)
|
| 205 |
-
|
| 206 |
-
if ps_check.returncode == 0:
|
| 207 |
-
print(f" ✅ Ollama 服务已在运行")
|
| 208 |
-
else:
|
| 209 |
-
print(f" 🔄 启动服务...")
|
| 210 |
-
subprocess.Popen(
|
| 211 |
-
['ollama', 'serve'],
|
| 212 |
-
stdout=subprocess.PIPE,
|
| 213 |
-
stderr=subprocess.PIPE
|
| 214 |
-
)
|
| 215 |
-
|
| 216 |
-
print(f" ⏳ 等待服务启动(15秒)...")
|
| 217 |
-
time.sleep(15)
|
| 218 |
-
|
| 219 |
-
# 验证服务
|
| 220 |
-
import requests
|
| 221 |
-
try:
|
| 222 |
-
response = requests.get('http://localhost:11434/api/tags', timeout=10)
|
| 223 |
-
if response.status_code == 200:
|
| 224 |
-
print(f" ✅ Ollama 服务运行正常")
|
| 225 |
-
except Exception as e:
|
| 226 |
-
print(f" ⚠️ 服务验证失败: {e}")
|
| 227 |
-
print(f" 但可能仍在启动中...")
|
| 228 |
-
|
| 229 |
-
# ==================== 验证模型 ====================
|
| 230 |
-
print(f"\n✅ 步骤 5/5: 验证模型...")
|
| 231 |
-
|
| 232 |
-
list_result = subprocess.run(['ollama', 'list'], capture_output=True, text=True)
|
| 233 |
-
print(f"\n 可用模型:")
|
| 234 |
-
print(f" {list_result.stdout}")
|
| 235 |
-
|
| 236 |
-
# ==================== 完成 ====================
|
| 237 |
-
print("="*70)
|
| 238 |
-
print("✅ Ollama 加载完成!")
|
| 239 |
-
print("="*70)
|
| 240 |
-
|
| 241 |
-
print(f"\n📊 加载总结:")
|
| 242 |
-
print(f" • Ollama 服务: ✅ 运行中")
|
| 243 |
-
print(f" • 模型: ✅ 已加载")
|
| 244 |
-
print(f" • 总耗时: < 1 分钟")
|
| 245 |
-
|
| 246 |
-
print(f"\n💡 对比:")
|
| 247 |
-
print(f" • 传统方式: 5-10 分钟(重新下载)")
|
| 248 |
-
print(f" • Dataset 方式: < 1 分钟(直接加载)")
|
| 249 |
-
print(f" • 节省时间: 约 90%!")
|
| 250 |
-
|
| 251 |
-
print(f"\n🧪 快速测试:")
|
| 252 |
-
print(f" 在新单元格运行:")
|
| 253 |
-
print(f" !ollama run mistral 'Hi, respond in one word'")
|
| 254 |
-
|
| 255 |
-
print(f"\n📝 下一步:")
|
| 256 |
-
print(f" 继续运行你的 GraphRAG 索引:")
|
| 257 |
-
print(f"""
|
| 258 |
-
from document_processor import DocumentProcessor
|
| 259 |
-
from graph_indexer import GraphRAGIndexer
|
| 260 |
-
|
| 261 |
-
processor = DocumentProcessor()
|
| 262 |
-
vectorstore, retriever, doc_splits = processor.setup_knowledge_base(enable_graphrag=True)
|
| 263 |
-
|
| 264 |
-
indexer = GraphRAGIndexer(async_batch_size=8)
|
| 265 |
-
graph = indexer.index_documents(doc_splits)
|
| 266 |
-
""")
|
| 267 |
-
|
| 268 |
-
print("\n" + "="*70)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
KAGGLE_QUICK_START.py
DELETED
|
@@ -1,197 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Kaggle 快速启动脚本 - 避免重复下载大模型
|
| 3 |
-
使用优化的小模型配置,大幅减少启动时间
|
| 4 |
-
|
| 5 |
-
使用方法:
|
| 6 |
-
在 Kaggle Notebook 第一个单元格运行:
|
| 7 |
-
exec(open('/kaggle/working/adaptive_RAG/KAGGLE_QUICK_START.py').read())
|
| 8 |
-
"""
|
| 9 |
-
|
| 10 |
-
import os
|
| 11 |
-
import subprocess
|
| 12 |
-
import sys
|
| 13 |
-
import time
|
| 14 |
-
|
| 15 |
-
print("🚀 Kaggle 快速启动(优化版)")
|
| 16 |
-
print("="*70)
|
| 17 |
-
|
| 18 |
-
# ==================== 配置区域 ====================
|
| 19 |
-
REPO_URL = "https://github.com/LannyCodes/adaptive_RAG.git"
|
| 20 |
-
PROJECT_DIR = "/kaggle/working/adaptive_RAG"
|
| 21 |
-
|
| 22 |
-
# 模型选择(根据需求修改)
|
| 23 |
-
# "phi" - 1.6GB, 2-3分钟下载,质量好 ⭐⭐⭐⭐ (推荐)
|
| 24 |
-
# "tinyllama" - 600MB, 1分钟下载,质量中等 ⭐⭐⭐
|
| 25 |
-
# "qwen:0.5b" - 350MB, 30秒下载,质量较低 ⭐⭐
|
| 26 |
-
# "mistral" - 4GB, 5-10分钟下载,质量最好 ⭐⭐⭐⭐⭐ (慢)
|
| 27 |
-
|
| 28 |
-
PREFERRED_MODEL = "phi" # 👈 修改这里选择模型
|
| 29 |
-
|
| 30 |
-
print(f"\n📌 配置:")
|
| 31 |
-
print(f" • 仓库: {REPO_URL}")
|
| 32 |
-
print(f" • 模型: {PREFERRED_MODEL}")
|
| 33 |
-
print()
|
| 34 |
-
|
| 35 |
-
# ==================== 步骤 1: 克隆项目 ====================
|
| 36 |
-
print("📦 步骤 1/6: 克隆项目...")
|
| 37 |
-
|
| 38 |
-
os.chdir('/kaggle/working')
|
| 39 |
-
|
| 40 |
-
if os.path.exists(PROJECT_DIR):
|
| 41 |
-
print(" ✅ 项目已存在")
|
| 42 |
-
else:
|
| 43 |
-
result = subprocess.run(['git', 'clone', REPO_URL], capture_output=True, text=True)
|
| 44 |
-
if result.returncode == 0:
|
| 45 |
-
print(" ✅ 项目克隆成功")
|
| 46 |
-
else:
|
| 47 |
-
print(f" ❌ 克隆失败: {result.stderr}")
|
| 48 |
-
sys.exit(1)
|
| 49 |
-
|
| 50 |
-
os.chdir(PROJECT_DIR)
|
| 51 |
-
|
| 52 |
-
# ==================== 步骤 2: 修改配置使用小模型 ====================
|
| 53 |
-
print("\n⚙️ 步骤 2/6: 优化模型配置...")
|
| 54 |
-
|
| 55 |
-
config_file = 'config.py'
|
| 56 |
-
|
| 57 |
-
with open(config_file, 'r', encoding='utf-8') as f:
|
| 58 |
-
content = f.read()
|
| 59 |
-
|
| 60 |
-
# 替换模型配置
|
| 61 |
-
if 'LOCAL_LLM = "mistral"' in content:
|
| 62 |
-
content = content.replace(
|
| 63 |
-
'LOCAL_LLM = "mistral"',
|
| 64 |
-
f'LOCAL_LLM = "{PREFERRED_MODEL}" # Kaggle优化: 使用更小的模型'
|
| 65 |
-
)
|
| 66 |
-
|
| 67 |
-
with open(config_file, 'w', encoding='utf-8') as f:
|
| 68 |
-
f.write(content)
|
| 69 |
-
|
| 70 |
-
print(f" ✅ 已切换到 {PREFERRED_MODEL} 模型")
|
| 71 |
-
else:
|
| 72 |
-
print(f" ℹ️ 配置已是优化模式")
|
| 73 |
-
|
| 74 |
-
# ==================== 步骤 3: 检查并安装 Ollama ====================
|
| 75 |
-
print("\n🔧 步骤 3/6: 检查 Ollama...")
|
| 76 |
-
|
| 77 |
-
ollama_check = subprocess.run(['which', 'ollama'], capture_output=True)
|
| 78 |
-
|
| 79 |
-
if ollama_check.returncode == 0:
|
| 80 |
-
print(" ✅ Ollama 已安装")
|
| 81 |
-
else:
|
| 82 |
-
print(" 📥 安装 Ollama...")
|
| 83 |
-
subprocess.run('curl -fsSL https://ollama.com/install.sh | sh', shell=True)
|
| 84 |
-
time.sleep(3)
|
| 85 |
-
print(" ✅ Ollama 安装完成")
|
| 86 |
-
|
| 87 |
-
# 验证安装
|
| 88 |
-
version_result = subprocess.run(['ollama', '--version'], capture_output=True, text=True)
|
| 89 |
-
if version_result.returncode == 0:
|
| 90 |
-
print(f" 📌 {version_result.stdout.strip()}")
|
| 91 |
-
|
| 92 |
-
# ==================== 步骤 4: 启动 Ollama 服务 ====================
|
| 93 |
-
print("\n🚀 步骤 4/6: 启动 Ollama 服务...")
|
| 94 |
-
|
| 95 |
-
# 检查是否已运行
|
| 96 |
-
ps_check = subprocess.run(['pgrep', '-f', 'ollama serve'], capture_output=True)
|
| 97 |
-
|
| 98 |
-
if ps_check.returncode == 0:
|
| 99 |
-
print(" ✅ Ollama 服务已运行")
|
| 100 |
-
else:
|
| 101 |
-
print(" 🔄 启动服务...")
|
| 102 |
-
subprocess.Popen(['ollama', 'serve'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
| 103 |
-
time.sleep(15)
|
| 104 |
-
|
| 105 |
-
# 验证
|
| 106 |
-
import requests
|
| 107 |
-
try:
|
| 108 |
-
response = requests.get('http://localhost:11434/api/tags', timeout=10)
|
| 109 |
-
if response.status_code == 200:
|
| 110 |
-
print(" ✅ 服务运行正常")
|
| 111 |
-
except:
|
| 112 |
-
print(" ⚠️ 服务验证失败,但可能仍在启动中...")
|
| 113 |
-
|
| 114 |
-
# ==================== 步骤 5: 下载优化的模型 ====================
|
| 115 |
-
print(f"\n📦 步骤 5/6: 下载 {PREFERRED_MODEL} 模型...")
|
| 116 |
-
|
| 117 |
-
# 检查模型是否已存在
|
| 118 |
-
list_result = subprocess.run(['ollama', 'list'], capture_output=True, text=True)
|
| 119 |
-
|
| 120 |
-
if PREFERRED_MODEL in list_result.stdout:
|
| 121 |
-
print(f" ✅ {PREFERRED_MODEL} 模型已存在")
|
| 122 |
-
else:
|
| 123 |
-
# 显示预计时间
|
| 124 |
-
time_estimates = {
|
| 125 |
-
"qwen:0.5b": "约30秒",
|
| 126 |
-
"tinyllama": "约1分钟",
|
| 127 |
-
"phi": "约2-3分钟",
|
| 128 |
-
"mistral": "约5-10分钟"
|
| 129 |
-
}
|
| 130 |
-
|
| 131 |
-
estimated_time = time_estimates.get(PREFERRED_MODEL, "未知")
|
| 132 |
-
|
| 133 |
-
print(f" 📥 开始下载(预计时间: {estimated_time})...")
|
| 134 |
-
print(f" ⏳ 请稍候...")
|
| 135 |
-
|
| 136 |
-
start_time = time.time()
|
| 137 |
-
|
| 138 |
-
pull_result = subprocess.run(
|
| 139 |
-
['ollama', 'pull', PREFERRED_MODEL],
|
| 140 |
-
capture_output=True,
|
| 141 |
-
text=True
|
| 142 |
-
)
|
| 143 |
-
|
| 144 |
-
elapsed = time.time() - start_time
|
| 145 |
-
|
| 146 |
-
if pull_result.returncode == 0:
|
| 147 |
-
print(f" ✅ 模型下载完成(耗时: {int(elapsed)}秒)")
|
| 148 |
-
else:
|
| 149 |
-
print(f" ⚠️ 下载警告: {pull_result.stderr[:200]}")
|
| 150 |
-
|
| 151 |
-
# ==================== 步骤 6: 安装 Python 依赖 ====================
|
| 152 |
-
print("\n📦 步骤 6/6: 安装 Python 依赖...")
|
| 153 |
-
|
| 154 |
-
subprocess.run([sys.executable, '-m', 'pip', 'install', '-r', 'requirements_graphrag.txt', '-q'])
|
| 155 |
-
subprocess.run([sys.executable, '-m', 'pip', 'install', '-U',
|
| 156 |
-
'langchain', 'langchain-core', 'langchain-community',
|
| 157 |
-
'langchain-text-splitters', '-q'])
|
| 158 |
-
|
| 159 |
-
print(" ✅ 依赖安装完成")
|
| 160 |
-
|
| 161 |
-
# ==================== 设置 Python 路径 ====================
|
| 162 |
-
if PROJECT_DIR not in sys.path:
|
| 163 |
-
sys.path.insert(0, PROJECT_DIR)
|
| 164 |
-
|
| 165 |
-
# ==================== 完成 ====================
|
| 166 |
-
print("\n" + "="*70)
|
| 167 |
-
print("✅ 环境准备完成!")
|
| 168 |
-
print("="*70)
|
| 169 |
-
|
| 170 |
-
print(f"\n📊 配置摘要:")
|
| 171 |
-
print(f" • 工作目录: {os.getcwd()}")
|
| 172 |
-
print(f" • 使用模型: {PREFERRED_MODEL}")
|
| 173 |
-
print(f" • Python路径: 已添加")
|
| 174 |
-
|
| 175 |
-
# 显示模型对比
|
| 176 |
-
print(f"\n📌 模型选择说明:")
|
| 177 |
-
print(" • phi (当前) - 平衡速度和质量,推荐日常使用")
|
| 178 |
-
print(" • tinyllama - 最快下载,适合快速测试")
|
| 179 |
-
print(" • mistral - 质量最高,但下载慢(不推荐Kaggle)")
|
| 180 |
-
|
| 181 |
-
print(f"\n💡 下一步:")
|
| 182 |
-
print(" 1. 开始 GraphRAG 索引:")
|
| 183 |
-
print(" from document_processor import DocumentProcessor")
|
| 184 |
-
print(" from graph_indexer import GraphRAGIndexer")
|
| 185 |
-
print(" ")
|
| 186 |
-
print(" doc_processor = DocumentProcessor()")
|
| 187 |
-
print(" vectorstore, retriever, doc_splits = doc_processor.setup_knowledge_base(enable_graphrag=True)")
|
| 188 |
-
print(" ")
|
| 189 |
-
print(" indexer = GraphRAGIndexer()")
|
| 190 |
-
print(" graph = indexer.index_documents(doc_splits, batch_size=3)")
|
| 191 |
-
print()
|
| 192 |
-
print(" 2. 如需切换模型,修改脚本顶部的 PREFERRED_MODEL 变量")
|
| 193 |
-
|
| 194 |
-
print("\n⚠️ 提示:")
|
| 195 |
-
print(f" • 当前使用 {PREFERRED_MODEL} 模型,比 Mistral 快 {2 if PREFERRED_MODEL == 'phi' else 5}x")
|
| 196 |
-
print(" • 会话结束后仍需重新下载(但速度已大幅提升)")
|
| 197 |
-
print(" • 如需最佳质量,本地开发时可用 Mistral")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
KAGGLE_SAVE_OLLAMA.py
DELETED
|
@@ -1,282 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Kaggle Ollama 保存脚本
|
| 3 |
-
将 Ollama 和模型保存到 Kaggle Dataset,下次直接使用
|
| 4 |
-
|
| 5 |
-
使用步骤:
|
| 6 |
-
1. 首次运行: 安装 Ollama 和下载模型后,运行本脚本保存
|
| 7 |
-
2. 后续使用: 使用 KAGGLE_LOAD_OLLAMA.py 从 Dataset 加载
|
| 8 |
-
|
| 9 |
-
注意: 需要手动创建 Kaggle Dataset 并上传
|
| 10 |
-
"""
|
| 11 |
-
|
| 12 |
-
import os
|
| 13 |
-
import subprocess
|
| 14 |
-
import shutil
|
| 15 |
-
import tarfile
|
| 16 |
-
import time
|
| 17 |
-
from pathlib import Path
|
| 18 |
-
|
| 19 |
-
print("="*70)
|
| 20 |
-
print("💾 Kaggle Ollama 保存工具")
|
| 21 |
-
print("="*70)
|
| 22 |
-
|
| 23 |
-
# ==================== 配置 ====================
|
| 24 |
-
OUTPUT_DIR = "/kaggle/working/ollama_backup"
|
| 25 |
-
MODEL_NAME = "mistral" # 或者 "phi", "tinyllama" 等
|
| 26 |
-
|
| 27 |
-
print(f"\n📋 配置:")
|
| 28 |
-
print(f" 模型: {MODEL_NAME}")
|
| 29 |
-
print(f" 输出目录: {OUTPUT_DIR}")
|
| 30 |
-
|
| 31 |
-
# ==================== 步骤 1: 创建输出目录 ====================
|
| 32 |
-
print(f"\n📁 步骤 1/4: 创建备份目录...")
|
| 33 |
-
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
| 34 |
-
print(f" ✅ 目录创建成功")
|
| 35 |
-
|
| 36 |
-
# ==================== 步骤 2: 备份 Ollama 二进制文件 ====================
|
| 37 |
-
print(f"\n📦 步骤 2/4: 备份 Ollama 二进制文件...")
|
| 38 |
-
|
| 39 |
-
ollama_bin = shutil.which('ollama')
|
| 40 |
-
if ollama_bin:
|
| 41 |
-
print(f" 找到 Ollama: {ollama_bin}")
|
| 42 |
-
|
| 43 |
-
# 复制二进制文件
|
| 44 |
-
shutil.copy2(ollama_bin, os.path.join(OUTPUT_DIR, 'ollama'))
|
| 45 |
-
print(f" ✅ Ollama 二进制文件已备份")
|
| 46 |
-
else:
|
| 47 |
-
print(f" ❌ 未找到 Ollama,请先安装")
|
| 48 |
-
exit(1)
|
| 49 |
-
|
| 50 |
-
# ==================== 步骤 3: 备份模型文件 ====================
|
| 51 |
-
print(f"\n🤖 步骤 3/4: 备份 {MODEL_NAME} 模型...")
|
| 52 |
-
|
| 53 |
-
# Ollama 模型存储位置(可能在不同位置)
|
| 54 |
-
possible_model_dirs = [
|
| 55 |
-
os.path.expanduser("~/.ollama/models"),
|
| 56 |
-
"/root/.ollama/models",
|
| 57 |
-
os.path.expanduser("~/.ollama")
|
| 58 |
-
]
|
| 59 |
-
|
| 60 |
-
ollama_models_dir = None
|
| 61 |
-
for dir_path in possible_model_dirs:
|
| 62 |
-
if os.path.exists(dir_path) and os.path.isdir(dir_path):
|
| 63 |
-
# 检查是否有内容
|
| 64 |
-
if os.listdir(dir_path):
|
| 65 |
-
ollama_models_dir = os.path.dirname(dir_path) if dir_path.endswith('models') else dir_path
|
| 66 |
-
break
|
| 67 |
-
|
| 68 |
-
if ollama_models_dir and os.path.exists(ollama_models_dir):
|
| 69 |
-
print(f" 找到模型目录: {ollama_models_dir}")
|
| 70 |
-
|
| 71 |
-
# 计算目录大小
|
| 72 |
-
total_size = sum(
|
| 73 |
-
os.path.getsize(os.path.join(dirpath, filename))
|
| 74 |
-
for dirpath, dirnames, filenames in os.walk(ollama_models_dir)
|
| 75 |
-
for filename in filenames
|
| 76 |
-
)
|
| 77 |
-
print(f" 模型总大小: {total_size / (1024**3):.2f} GB")
|
| 78 |
-
|
| 79 |
-
# 创建压缩包(整个 .ollama 目录)
|
| 80 |
-
models_archive = os.path.join(OUTPUT_DIR, 'ollama_models.tar.gz')
|
| 81 |
-
print(f" 📦 创建压缩包(这可能需要几分钟)...")
|
| 82 |
-
print(f" 正在压缩: {ollama_models_dir}")
|
| 83 |
-
|
| 84 |
-
start_time = time.time()
|
| 85 |
-
with tarfile.open(models_archive, 'w:gz') as tar:
|
| 86 |
-
tar.add(ollama_models_dir, arcname='.ollama')
|
| 87 |
-
|
| 88 |
-
elapsed = time.time() - start_time
|
| 89 |
-
archive_size = os.path.getsize(models_archive) / (1024**3)
|
| 90 |
-
|
| 91 |
-
print(f" ✅ 压缩完成")
|
| 92 |
-
print(f" 耗时: {int(elapsed)}秒")
|
| 93 |
-
print(f" 压缩包大小: {archive_size:.2f} GB")
|
| 94 |
-
else:
|
| 95 |
-
print(f" ❌ 未找到模型目录")
|
| 96 |
-
print(f" 请先运行: ollama pull {MODEL_NAME}")
|
| 97 |
-
exit(1)
|
| 98 |
-
|
| 99 |
-
# ==================== 步骤 4: 生成说明文件 ====================
|
| 100 |
-
print(f"\n📝 步骤 4/4: 生成说明文件...")
|
| 101 |
-
|
| 102 |
-
readme_content = f"""# Ollama 备份包
|
| 103 |
-
|
| 104 |
-
## 内容
|
| 105 |
-
- `ollama`: Ollama 二进制文件
|
| 106 |
-
- `ollama_models.tar.gz`: 模型文件压缩包(包含 {MODEL_NAME})
|
| 107 |
-
|
| 108 |
-
## 备份信息
|
| 109 |
-
- 备份时间: {time.strftime('%Y-%m-%d %H:%M:%S')}
|
| 110 |
-
- 模型: {MODEL_NAME}
|
| 111 |
-
- 压缩包大小: {archive_size:.2f} GB
|
| 112 |
-
|
| 113 |
-
## 使用方法
|
| 114 |
-
|
| 115 |
-
### 1. 创建 Kaggle Dataset
|
| 116 |
-
|
| 117 |
-
1. 下载此目录中的所有文件到本地
|
| 118 |
-
2. 在 Kaggle 网站创建新 Dataset:
|
| 119 |
-
- 访问: https://www.kaggle.com/datasets
|
| 120 |
-
- 点击 "New Dataset"
|
| 121 |
-
- 上传 `ollama` 和 `ollama_models.tar.gz`
|
| 122 |
-
- 命名为: `ollama-{MODEL_NAME}-backup`
|
| 123 |
-
- 设置为 Private
|
| 124 |
-
- 点击 "Create"
|
| 125 |
-
|
| 126 |
-
### 2. 在 Notebook 中加载
|
| 127 |
-
|
| 128 |
-
在 Kaggle Notebook 中:
|
| 129 |
-
|
| 130 |
-
1. 添加 Dataset:
|
| 131 |
-
- 点击右侧 "Add data" → "Your Datasets"
|
| 132 |
-
- 选择你创建的 `ollama-{MODEL_NAME}-backup`
|
| 133 |
-
|
| 134 |
-
2. 运行加载脚本:
|
| 135 |
-
```python
|
| 136 |
-
# 使用项目中的 KAGGLE_LOAD_OLLAMA.py
|
| 137 |
-
exec(open('/kaggle/working/adaptive_RAG/KAGGLE_LOAD_OLLAMA.py').read())
|
| 138 |
-
```
|
| 139 |
-
|
| 140 |
-
### 3. 验证
|
| 141 |
-
|
| 142 |
-
```bash
|
| 143 |
-
# 检查 Ollama
|
| 144 |
-
ollama --version
|
| 145 |
-
|
| 146 |
-
# 检查模型
|
| 147 |
-
ollama list
|
| 148 |
-
|
| 149 |
-
# 测试运行
|
| 150 |
-
ollama run {MODEL_NAME} "Hello"
|
| 151 |
-
```
|
| 152 |
-
|
| 153 |
-
## 文件大小参考
|
| 154 |
-
|
| 155 |
-
不同模型的压缩包大小(近似值):
|
| 156 |
-
- qwen:0.5b: ~350 MB
|
| 157 |
-
- tinyllama: ~600 MB
|
| 158 |
-
- phi: ~1.6 GB
|
| 159 |
-
- mistral: ~4 GB
|
| 160 |
-
- llama2:7b: ~3.8 GB
|
| 161 |
-
|
| 162 |
-
## 注意事项
|
| 163 |
-
|
| 164 |
-
1. ⚠️ Dataset 大小限制:
|
| 165 |
-
- 免费用户: 每个 Dataset 最大 20GB
|
| 166 |
-
- 需要确保压缩包 < 20GB
|
| 167 |
-
|
| 168 |
-
2. ⚠️ 上传时间:
|
| 169 |
-
- 取决于你的网络速度
|
| 170 |
-
- 4GB 文件可能需要 10-30 分钟
|
| 171 |
-
|
| 172 |
-
3. ✅ 优势:
|
| 173 |
-
- 只需上传一次
|
| 174 |
-
- 每次 Notebook 启动时直接加载(秒级)
|
| 175 |
-
- 节省大量时间
|
| 176 |
-
|
| 177 |
-
## 故障排除
|
| 178 |
-
|
| 179 |
-
### 问题: 上传失败
|
| 180 |
-
解决: 检查网络连接,或分多次上传
|
| 181 |
-
|
| 182 |
-
### 问题: Dataset 过大
|
| 183 |
-
解决: 使用更小的模型(如 phi 或 tinyllama)
|
| 184 |
-
|
| 185 |
-
### 问题: 加载后 Ollama 无法运行
|
| 186 |
-
解决: 检查文件权限,运行 `chmod +x /usr/local/bin/ollama`
|
| 187 |
-
"""
|
| 188 |
-
|
| 189 |
-
readme_file = os.path.join(OUTPUT_DIR, 'README.md')
|
| 190 |
-
with open(readme_file, 'w', encoding='utf-8') as f:
|
| 191 |
-
f.write(readme_content)
|
| 192 |
-
|
| 193 |
-
print(f" ✅ 说明文件已生成")
|
| 194 |
-
|
| 195 |
-
# ==================== 生成加载脚本(供参考) ====================
|
| 196 |
-
loader_script = os.path.join(OUTPUT_DIR, 'load_example.py')
|
| 197 |
-
with open(loader_script, 'w', encoding='utf-8') as f:
|
| 198 |
-
f.write(f'''"""
|
| 199 |
-
示例: 从 Kaggle Dataset 加载 Ollama
|
| 200 |
-
"""
|
| 201 |
-
import os
|
| 202 |
-
import subprocess
|
| 203 |
-
import tarfile
|
| 204 |
-
import shutil
|
| 205 |
-
|
| 206 |
-
# Dataset 路径(根据你的 Dataset 名称修改)
|
| 207 |
-
DATASET_PATH = "/kaggle/input/ollama-{MODEL_NAME}-backup"
|
| 208 |
-
|
| 209 |
-
print("📦 从 Dataset 加载 Ollama...")
|
| 210 |
-
|
| 211 |
-
# 1. 复制 Ollama 二进制文件
|
| 212 |
-
ollama_bin = os.path.join(DATASET_PATH, "ollama")
|
| 213 |
-
if os.path.exists(ollama_bin):
|
| 214 |
-
shutil.copy2(ollama_bin, "/usr/local/bin/ollama")
|
| 215 |
-
os.chmod("/usr/local/bin/ollama", 0o755)
|
| 216 |
-
print("✅ Ollama 二进制文件已安装")
|
| 217 |
-
|
| 218 |
-
# 2. 解压模型文件
|
| 219 |
-
models_archive = os.path.join(DATASET_PATH, "ollama_models.tar.gz")
|
| 220 |
-
if os.path.exists(models_archive):
|
| 221 |
-
print("📦 解压模型文件...")
|
| 222 |
-
with tarfile.open(models_archive, 'r:gz') as tar:
|
| 223 |
-
tar.extractall(os.path.expanduser("~/.ollama"))
|
| 224 |
-
print("✅ 模型已解压")
|
| 225 |
-
|
| 226 |
-
# 3. 启动 Ollama 服务
|
| 227 |
-
print("🚀 启动 Ollama 服务...")
|
| 228 |
-
subprocess.Popen(['ollama', 'serve'])
|
| 229 |
-
import time
|
| 230 |
-
time.sleep(15)
|
| 231 |
-
|
| 232 |
-
print("✅ Ollama 已准备就绪!")
|
| 233 |
-
print("\\n验证:")
|
| 234 |
-
subprocess.run(['ollama', 'list'])
|
| 235 |
-
''')
|
| 236 |
-
|
| 237 |
-
print(f" ✅ 示例脚本已生成")
|
| 238 |
-
|
| 239 |
-
# ==================== 显示文件列表 ====================
|
| 240 |
-
print(f"\n📊 备份内容:")
|
| 241 |
-
for item in os.listdir(OUTPUT_DIR):
|
| 242 |
-
item_path = os.path.join(OUTPUT_DIR, item)
|
| 243 |
-
size = os.path.getsize(item_path)
|
| 244 |
-
size_str = f"{size / (1024**3):.2f} GB" if size > 1024**3 else f"{size / (1024**2):.2f} MB"
|
| 245 |
-
print(f" • {item}: {size_str}")
|
| 246 |
-
|
| 247 |
-
# ==================== 后续步骤说明 ====================
|
| 248 |
-
print("\n" + "="*70)
|
| 249 |
-
print("✅ 备份完成!")
|
| 250 |
-
print("="*70)
|
| 251 |
-
|
| 252 |
-
print(f"\n📋 后续步骤:")
|
| 253 |
-
print(f"""
|
| 254 |
-
1. 下载备份文件到本地:
|
| 255 |
-
在 Kaggle Notebook 右侧 Output 中下载 {OUTPUT_DIR} 目录
|
| 256 |
-
|
| 257 |
-
2. 创建 Kaggle Dataset:
|
| 258 |
-
• 访问: https://www.kaggle.com/datasets
|
| 259 |
-
• 点击 "New Dataset"
|
| 260 |
-
• 上传以下文件:
|
| 261 |
-
- ollama (二进制文件)
|
| 262 |
-
- ollama_models.tar.gz (模型压缩包)
|
| 263 |
-
• 命名: ollama-{MODEL_NAME}-backup
|
| 264 |
-
• 点击 "Create"
|
| 265 |
-
|
| 266 |
-
3. 下次使用:
|
| 267 |
-
• 在 Notebook 中添加你的 Dataset
|
| 268 |
-
• 运行 KAGGLE_LOAD_OLLAMA.py 脚本
|
| 269 |
-
• 即可秒级加载,无需重新下载!
|
| 270 |
-
|
| 271 |
-
⏱️ 时间对比:
|
| 272 |
-
• 传统方式: 每次启动需要 5-10 分钟下载
|
| 273 |
-
• Dataset 方式: 每次启动只需 10-20 秒加载
|
| 274 |
-
• 节省时间: 每次节省 5-10 分钟!
|
| 275 |
-
|
| 276 |
-
💡 提示:
|
| 277 |
-
• 上传 Dataset 是一次性工作
|
| 278 |
-
• 之后每次 Notebook 启动都能快速加载
|
| 279 |
-
• 强烈推荐!
|
| 280 |
-
""")
|
| 281 |
-
|
| 282 |
-
print("\n查看详细说明: cat {}/README.md".format(OUTPUT_DIR))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|