Language Examples
cURL Examples
Copy
# Open an application (like Firefox)
curl -X POST http://localhost:9990/computer-use \
-H "Content-Type: application/json" \
-d '{"action": "move_mouse", "coordinates": {"x": 100, "y": 950}}'
curl -X POST http://localhost:9990/computer-use \
-H "Content-Type: application/json" \
-d '{"action": "click_mouse", "button": "left", "clickCount": 2}'
# Wait for application to open
curl -X POST http://localhost:9990/computer-use \
-H "Content-Type: application/json" \
-d '{"action": "wait", "duration": 150}'
# Type URL in address bar
curl -X POST http://localhost:9990/computer-use \
-H "Content-Type: application/json" \
-d '{"action": "type_text", "text": "https://example.com"}'
# Press Enter to navigate
curl -X POST http://localhost:9990/computer-use \
-H "Content-Type: application/json" \
-d '{"action": "typ_keys", "keys": ["enter"]}'
Python Examples
Copy
import requests
import time
def control_computer(action, **params):
url = "http://localhost:9990/computer-use"
data = {"action": action, **params}
response = requests.post(url, json=data)
return response.json()
def fill_web_form(): # Navigate to a form (e.g., login form)
control_computer("move_mouse", coordinates={"x": 500, "y": 300})
control_computer("click_mouse", button="left")
# Type username
control_computer("type_text", text="[email protected]")
# Tab to password field
control_computer("type_keys", keys=["tab"])
# Type password
control_computer("type_text", text="secure_password")
# Tab to login button
control_computer("type_keys", keys=["tab"])
# Press Enter to submit
control_computer("type_keys", keys=["enter"])
# Wait for page to load
control_computer("wait", duration=2000)
print("Form submitted successfully")
# Run the automation
# fill_web_form()
JavaScript/Node.js Examples
Copy
const axios = require('axios');
async function controlComputer(action, params = {}) {
const url = "http://localhost:9990/computer-use";
const data = { action, ...params };
try {
const response = await axios.post(url, data);
return response.data;
} catch (error) {
console.error('Error:', error.message);
throw error;
}
}
async function navigateToWebsite(url) {
console.log(`Navigating to ${url}...`);
// Open Firefox/Chrome by clicking on dock icon
await controlComputer("move_mouse", { coordinates: { x: 100, y: 950 } });
await controlComputer("click_mouse", { button: "left" });
// Wait for browser to open
await controlComputer("wait", { duration: 2000 });
// Click in URL bar (usually near the top)
await controlComputer("move_mouse", { coordinates: { x: 400, y: 60 } });
await controlComputer("click_mouse", { button: "left" });
// Select all existing text (Cmd+A on Mac, Ctrl+A elsewhere)
await controlComputer("press_keys", { keys: ["ctrl"], press: "down" });
await controlComputer("press_keys", { keys: ["a"], press: "down" });
await controlComputer("press_keys", { keys: ["a"], press: "up" });
await controlComputer("press_keys", { keys: ["ctrl"], press: "up" });
// Type the URL
await controlComputer("type_text", { text: url });
// Press Enter to navigate
await controlComputer("press_keys", { keys: ["enter"], press: "down" });
await controlComputer("press_keys", { keys: ["enter"], press: "up" });
// Wait for page to load
await controlComputer("wait", { duration: 3000 });
console.log("Navigation completed");
}
// Usage
// navigateToWebsite("https://example.com").catch(console.error);
Common Automation Scenarios
Browser Automation Workflow
This example demonstrates a complete browser workflow, opening a site and interacting with it:Copy
import requests
import time
def control_computer(action, **params):
url = "http://localhost:9990/computer-use"
data = {"action": action, **params}
response = requests.post(url, json=data)
return response.json()
def browser_workflow():
# Open browser (assuming browser icon is at position x=100, y=960)
control_computer("move_mouse", coordinates={"x": 100, "y": 960})
control_computer("click_mouse", button="left")
time.sleep(3) # Wait for browser to open
# Type URL and navigate
control_computer("type_text", text="https://example.com")
control_computer("press_keys", key="enter")
time.sleep(2) # Wait for page to load
# Take screenshot of the loaded page
screenshot = control_computer("screenshot")
# Click on a link (coordinates would need to be adjusted for your target)
control_computer("move_mouse", coordinates={"x": 300, "y": 400})
control_computer("click_mouse", button="left")
time.sleep(2)
# Scroll down
control_computer("scroll", direction="down", amount=500)
# Fill a search box
control_computer("move_mouse", coordinates={"x": 600, "y": 200})
control_computer("click_mouse", button="left")
control_computer("type_text", text="search query")
control_computer("press_keys", key="enter")
browser_workflow()
Form Filling Workflow
This example shows a complete form-filling process:Copy
const axios = require("axios");
async function controlComputer(action, params = {}) {
const url = "http://localhost:9990/computer-use";
const data = { action, ...params };
const response = await axios.post(url, data);
return response.data;
}
async function fillForm() {
// Navigate to form page
await controlComputer("move_mouse", { coordinates: { x: 100, y: 960 } });
await controlComputer("click_mouse", { button: "left" });
await controlComputer("wait", { duration: 3000 });
await controlComputer("type_text", { text: "https://example.com/form" });
await controlComputer("press_keys", { key: "enter" });
await controlComputer("wait", { duration: 2000 });
// Fill form
// Name field
await controlComputer("move_mouse", { coordinates: { x: 400, y: 250 } });
await controlComputer("click_mouse", { button: "left" });
// Type the value
await controlComputer("type_text", { text: "John Doe" });
// Email field (tab to next field)
await controlComputer("press_keys", { keys: ["tab"], press: "down" });
await controlComputer("press_keys", { keys: ["tab"], press: "up" });
await controlComputer("type_text", { text: "[email protected]" });
// Message field (tab to next field)
await controlComputer("press_keys", { keys: ["tab"], press: "down" });
await controlComputer("press_keys", { keys: ["tab"], press: "up" });
await controlComputer("type_text", {
text: "This is an automated message sent using Bytebot's Computer Use API",
delay: 30,
});
// Submit form
await controlComputer("press_keys", { keys: ["tab"], press: "down" });
await controlComputer("press_keys", { keys: ["tab"], press: "up" });
await controlComputer("press_keys", { key: "enter" });
// Take screenshot of confirmation page
await controlComputer("wait", { duration: 2000 });
const screenshot = await controlComputer("screenshot");
console.log("Form submitted successfully");
}
fillForm().catch(console.error);
Automation Framework Integration
You can create a reusable automation framework with Bytebot:Copy
import requests
import time
import json
class BytebotDriver:
"""A Selenium-like driver for Bytebot"""
def __init__(self, base_url="http://localhost:9990"):
self.base_url = base_url
def control_computer(self, action, **params):
url = f"{self.base_url}/computer-use"
data = {"action": action, **params}
response = requests.post(url, json=data)
return response.json()
def open_browser(self, browser_icon_coords):
"""Open a browser by clicking its icon"""
self.control_computer("move_mouse", coordinates=browser_icon_coords)
self.control_computer("click_mouse", button="left")
time.sleep(3) # Wait for browser to open
def navigate_to(self, url):
"""Navigate to a URL in the browser"""
self.control_computer("type_text", text=url)
self.control_computer("press_keys", key="enter")
time.sleep(2) # Wait for page to load
def click_element(self, coords):
"""Click an element at the specified coordinates"""
self.control_computer("move_mouse", coordinates=coords)
self.control_computer("click_mouse", button="left")
def type_text(self, text):
"""Type text at the current cursor position"""
self.control_computer("type_text", text=text)
def press_key(self, key, modifiers=None):
"""Press a keyboard key with optional modifiers"""
params = {"key": key}
if modifiers:
params["modifiers"] = modifiers
self.control_computer("press_keys", **params)
def take_screenshot(self):
"""Take a screenshot of the desktop"""
return self.control_computer("screenshot")
def scroll(self, direction, amount):
"""Scroll in the specified direction"""
self.control_computer("scroll", direction=direction, amount=amount)
# Example usage
driver = BytebotDriver()
driver.open_browser({"x": 100, "y": 960})
driver.navigate_to("https://example.com")
driver.click_element({"x": 300, "y": 400})
driver.type_text("Hello Bytebot!")
driver.press_key("enter")
result = driver.take_screenshot()
print(f"Screenshot captured: {result['success']}")