🌐 OpenClaw浏览器自动化全攻略

让AI替你浏览网页、填写表单、截图抓取

世界上有一种偷懒叫"让AI帮你点网页",它就像雇了一个不知疲倦的实习生。 浏览器自动化是OpenClaw最强大的技能之一——它能看懂网页、点击按钮、填写表单、截图保存。

📋 browser工具操作一览

# 基础操作
browser(action="status")          # 查看浏览器状态
browser(action="start")           # 启动浏览器
browser(action="stop")            # 关闭浏览器
browser(action="tabs")            # 列出所有标签页
browser(action="open", url="...") # 打开网页
browser(action="navigate", url="...")  # 导航到URL
browser(action="focus", targetId="...") # 聚焦标签页
browser(action="close", targetId="...") # 关闭标签页

# 页面交互
browser(action="snapshot")        # 获取页面结构快照
browser(action="screenshot")      # 截图
browser(action="act", kind="click", ref="btn-login")     # 点击
browser(action="act", kind="type", ref="input-name", text="张三")  # 输入
browser(action="act", kind="fill", ref="input-email", text="a@b.com")  # 填写
browser(action="act", kind="press", key="Enter")  # 按键
browser(action="act", kind="hover", ref="menu-item")  # 悬停
browser(action="act", kind="select", ref="dropdown", values=["option1"])  # 下拉选择

# 高级操作
browser(action="pdf")             # 导出为PDF
browser(action="console")         # 获取控制台日志
browser(action="upload")          # 上传文件
browser(action="evaluate", fn="document.title")  # 执行JS

🎯 核心操作:Snapshot + Act

浏览器自动化的核心流程:snapshot看页面 → act做操作

Snapshot详解

# 获取页面结构快照
result = browser(action="snapshot", snapshotFormat="aria")
# 返回页面的ARIA树结构,包含元素引用ID

# 示例输出:
# [heading] "登录" ref=h1
# [textbox] "邮箱" ref=e1
# [textbox] "密码" ref=e2  
# [button] "登录" ref=e3
# [link] "忘记密码" ref=e4

# 使用refs="role"(默认)基于role+name定位
result = browser(action="snapshot", refs="role")

# 使用refs="aria"获得稳定的ARIA ref ID
result = browser(action="snapshot", refs="aria")

Act交互详解

🖱️ 点击操作

# 点击按钮
browser(action="act", kind="click", ref="e3")

# 双击
browser(action="act", kind="click", ref="e3", doubleClick=true)

# 带修饰键
browser(action="act", kind="click", ref="link", modifiers=["Ctrl", "Shift"])

⌨️ 输入操作

# 快速输入(适合短文本)
browser(action="act", kind="type", ref="e1", text="hello")

# 填写表单(适合长文本,清空后填写)
browser(action="act", kind="fill", ref="e1", text="user@example.com")

# 逐字输入(模拟真实打字)
browser(action="act", kind="type", ref="e1", text="password123", slowly=true)

# 按键
browser(action="act", kind="press", key="Enter")
browser(action="act", kind="press", key="Tab")
browser(action="act", kind="press", keys=["Control", "a"])  # Ctrl+A全选

🖱️ 鼠标操作

# 悬停
browser(action="act", kind="hover", ref="menu-item")

# 拖拽
browser(action="act", kind="drag", startRef="item1", endRef="drop-zone")

# 调整大小
browser(action="act", kind="resize", ref="window", width=800, height=600)

📋 下拉选择

# 单选
browser(action="act", kind="select", ref="country", values=["CN"])

# 多选
browser(action="act", kind="select", ref="skills", values=["Python", "AI"])

🚀 实战案例1:自动登录

# 自动登录流程
def auto_login(url, email, password):
    # 1. 打开登录页
    browser(action="open", url=url)
    
    # 2. 获取页面结构
    page = browser(action="snapshot")
    
    # 3. 填写邮箱
    browser(action="act", kind="fill", ref="email-input", text=email)
    
    # 4. 填写密码
    browser(action="act", kind="fill", ref="password-input", text=password)
    
    # 5. 点击登录按钮
    browser(action="act", kind="click", ref="login-button")
    
    # 6. 等待页面加载
    time.sleep(3)
    
    # 7. 验证登录成功
    result = browser(action="snapshot")
    if "dashboard" in result.text.lower():
        return "✅ 登录成功"
    else:
        return "❌ 登录失败"

🚀 实战案例2:网页数据采集

# 采集Hacker News热门帖子
def scrape_hackernews():
    browser(action="open", url="https://news.ycombinator.com/")
    
    # 获取页面快照
    page = browser(action="snapshot")
    
    # 方法1:使用snapshot解析
    posts = parse_snapshot_for_posts(page)
    
    # 方法2:使用JavaScript直接提取
    data = browser(action="evaluate", fn="""
        Array.from(document.querySelectorAll('.athing'))
            .slice(0, 10)
            .map(row => ({
                title: row.querySelector('.titleline > a').textContent,
                link: row.querySelector('.titleline > a').href,
                rank: row.querySelector('.rank').textContent
            }))
    """)
    
    return data

# 采集竞品定价页面
def scrape_pricing_page(url):
    browser(action="open", url=url)
    
    # 截图留档
    browser(action="screenshot", 
            path="/screenshots/pricing-{date}.png",
            fullPage=True)
    
    # 提取定价信息
    pricing = browser(action="evaluate", fn="""
        Array.from(document.querySelectorAll('.plan-card'))
            .map(card => ({
                name: card.querySelector('h3').textContent,
                price: card.querySelector('.price').textContent,
                features: Array.from(card.querySelectorAll('li'))
                    .map(li => li.textContent)
            }))
    """)
    
    return pricing

🚀 实战案例3:表单自动填写

# 自动提交表单
def submit_contact_form(url, form_data):
    browser(action="open", url=url)
    
    # 逐字段填写
    for field, value in form_data.items():
        ref = browser(action="snapshot", compact=True)
        target_ref = find_ref_for_label(ref, field)
        
        if target_ref:
            browser(action="act", kind="fill", ref=target_ref, text=value)
    
    # 处理下拉选择
    if "country" in form_data:
        browser(action="act", kind="select", 
                ref="country-select", values=[form_data["country"]])
    
    # 勾选复选框
    if "agree" in form_data:
        browser(action="act", kind="click", ref="checkbox-agree")
    
    # 提交
    browser(action="act", kind="click", ref="submit-button", submit=True)
    
    # 等待并验证
    time.sleep(2)
    result = browser(action="snapshot")
    return "提交成功" if "success" in result.text else "提交失败"

📸 截图与PDF

# 全页截图
browser(action="screenshot", 
        path="/screenshots/homepage.png",
        fullPage=True)

# 区域截图
browser(action="screenshot",
        ref="main-content",
        path="/screenshots/content.png")

# 导出PDF
browser(action="pdf",
        path="/reports/page.pdf")

# 等待特定元素出现后截图
browser(action="act", kind="wait", textGone="Loading...")
browser(action="screenshot", path="/screenshots/loaded.png")

⚡ 性能与稳定性技巧

🎯 ref稳定性
  • 优先使用refs="aria"获得稳定的ARIA ref ID
  • 跨调用保持相同ref:snapshot返回的ref在同一tab内稳定
  • 避免用CSS selector定位动态元素,优先语义化ref
# ✅ 好实践:使用targetId保持上下文
# 第一次snapshot
result1 = browser(action="snapshot", targetId="tab-123")
ref = result1.refs["submit-btn"]

# 后续操作使用相同targetId
browser(action="act", kind="click", ref=ref, targetId="tab-123")

# ✅ 好实践:高效快照
browser(action="snapshot", mode="efficient")  # 仅获取变化部分
browser(action="snapshot", maxChars=5000)     # 限制返回长度

# ✅ 好实践:避免act:wait
# 坏:无限等待
browser(action="act", kind="wait")  

# 好:等待特定文本消失
browser(action="act", kind="wait", textGone="加载中...")

# 好:等待特定时间
time.sleep(2)  # 简单但有效
⚠️ 常见坑点
  • 未等待页面加载完成就snapshot,拿到空白页
  • 跨tab操作忘记指定targetId
  • popup对话框阻塞后续操作,需要close
  • 无限滚动页面需要多次scroll才能加载完
  • 反爬虫检测:设置合理的User-Agent和延迟

🔧 Profile选择

Profile说明适用场景
默认(无profile)OpenClaw隔离浏览器日常自动化任务
profile="user"用户本地浏览器需要登录态/cookie
# 使用用户浏览器(保持登录态)
browser(action="open", url="https://gmail.com", profile="user", target="host")

# 使用隔离浏览器(干净环境)
browser(action="open", url="https://example.com")