将 OpenClaw(一个Python爬虫框架)与 Nuxt.js 应用集成,通常有两种主要方案:

API 集成(推荐)
部署 OpenClaw 为独立服务
from openclaw import OpenClaw
import asyncio
app = FastAPI()
claw = OpenClaw()
@app.get("/api/crawl")
async def crawl_data(url: str, selector: str = None):
result = await claw.crawl(url, selector=selector)
return {"data": result}
# 运行:uvicorn openclaw_api:app --port 8000
Nuxt.js 中调用 API
// plugins/openclaw.js
export default defineNuxtPlugin(() => {
const config = useRuntimeConfig()
return {
provide: {
crawl: async (url, options = {}) => {
try {
const response = await $fetch(`${config.public.openclawApi}/api/crawl`, {
method: 'POST',
body: { url, ...options }
})
return response.data
} catch (error) {
console.error('Crawl error:', error)
return null
}
}
}
}
})
// nuxt.config.js
export default defineNuxtConfig({
runtimeConfig: {
public: {
openclawApi: process.env.OPENCLAW_API_URL || 'http://localhost:8000'
}
}
})
通过服务器端集成
使用 Nuxt.js 服务器路由
// server/api/crawl.post.js
export default defineEventHandler(async (event) => {
const { url, selector } = await readBody(event)
// 调用 Python 脚本
const { exec } = require('child_process')
const util = require('util')
const execPromise = util.promisify(exec)
try {
const { stdout } = await execPromise(
`python3 openclaw_runner.py '${url}' '${selector}'`
)
return JSON.parse(stdout)
} catch (error) {
return { error: error.message }
}
})
Python 脚本
# openclaw_runner.py
import sys
import json
from openclaw import OpenClaw
async def main():
url = sys.argv[1]
selector = sys.argv[2] if len(sys.argv) > 2 else None
claw = OpenClaw()
result = await claw.crawl(url, selector=selector)
print(json.dumps(result))
if __name__ == "__main__":
import asyncio
asyncio.run(main())
Docker 容器化部署
# Dockerfile FROM node:18-alpine AS nuxt-builder WORKDIR /app COPY nuxt-app/ . RUN npm ci && npm run build FROM python:3.10-slim AS openclaw WORKDIR /app COPY openclaw-api/ . RUN pip install -r requirements.txt # 或者使用 Docker Compose
# docker-compose.yml
version: '3.8'
services:
nuxt-app:
build: ./nuxt-app
ports:
- "3000:3000"
environment:
OPENCLAW_API_URL: "http://openclaw-api:8000"
depends_on:
- openclaw-api
openclaw-api:
build: ./openclaw-api
ports:
- "8000:8000"
使用 BullMQ 实现任务队列
创建爬虫任务队列
# worker.py
from bullmq import Worker
import asyncio
from openclaw import OpenClaw
async def process_crawl(job):
url = job.data['url']
claw = OpenClaw()
result = await claw.crawl(url)
return result
worker = Worker("crawl-queue", process_crawl)
Nuxt.js API 路由
// server/api/crawl.post.js
import { Queue } from 'bullmq'
const queue = new Queue('crawl-queue', {
connection: { host: 'redis', port: 6379 }
})
export default defineEventHandler(async (event) => {
const { url } = await readBody(event)
const job = await queue.add('crawl', { url })
return {
jobId: job.id,
status: 'queued'
}
})
最佳实践建议
安全性考虑
// 验证和清理输入
const isValidUrl = (url) => {
try {
new URL(url)
return true
} catch {
return false
}
}
// 设置请求限制
rateLimit({
windowMs: 15 * 60 * 1000, // 15分钟
max: 100 // 每个IP 100次请求
})
缓存策略
// 使用 Nuxt 缓存
export default defineEventHandler(async (event) => {
const cached = await useStorage().getItem(`crawl:${url}`)
if (cached) return cached
const data = await crawlFunction(url)
await useStorage().setItem(`crawl:${url}`, data, { ttl: 3600 })
return data
})
环境配置
# .env OPENCLAW_API_URL=http://localhost:8000 CRAWL_TIMEOUT=30000 MAX_CRAWL_DEPTH=2
性能优化
-
SSR 和静态生成
// 在构建时预爬取数据 export default defineNuxtConfig({ nitro: { prerender: { crawlLinks: true } } }) -
增量静态再生
// 定期更新爬取的数据 export default defineNuxtConfig({ routeRules: { '/dynamic-data': { swr: 3600, // 每小时重新验证 prerender: true } } })
错误处理
// 统一的错误处理
export default defineEventHandler(async (event) => {
try {
const data = await crawlData(event)
return { success: true, data }
} catch (error) {
setResponseStatus(event, 500)
return {
success: false,
error: error.message,
code: error.code
}
}
})
选择哪种方案取决于:
- 项目规模:小项目用方案一,大型项目用方案三或四
- 实时性要求:实时要求高用方案一,可延迟用方案四
- 团队熟悉度:熟悉Docker用方案三,熟悉队列用方案四
- 部署环境:Serverless环境需特殊适配
版权声明:除非特别标注,否则均为本站原创文章,转载时请以链接形式注明文章出处。