This page provides practical examples of how to use the Bytebot REST API in different programming languages and scenarios.

Language Examples

cURL Examples

# Open an application (like Firefox)
curl -X POST http://localhost:9990/computer-use \
  -H "Content-Type: application/json" \
  -d '{"action": "move_mouse", "coordinates": {"x": 100, "y": 950}}'

curl -X POST http://localhost:9990/computer-use \
 -H "Content-Type: application/json" \
 -d '{"action": "click_mouse", "button": "left", "numClicks": 2}'

# Wait for application to open

curl -X POST http://localhost:9990/computer-use \
 -H "Content-Type: application/json" \
 -d '{"action": "wait", "duration": 150}'

# Type URL in address bar

curl -X POST http://localhost:9990/computer-use \
 -H "Content-Type: application/json" \
 -d '{"action": "type_text", "text": "https://example.com"}'

# Press Enter to navigate

curl -X POST http://localhost:9990/computer-use \
 -H "Content-Type: application/json" \
 -d '{"action": "typ_keys", "keys": ["enter"]}'

Python Examples

import requests
import time

def control_computer(action, **params):
url = "http://localhost:9990/computer-use"
data = {"action": action, **params}
response = requests.post(url, json=data)
return response.json()

def fill_web_form(): # Navigate to a form (e.g., login form)
control_computer("move_mouse", coordinates={"x": 500, "y": 300})
control_computer("click_mouse", button="left")

    # Type username
    control_computer("type_text", text="[email protected]")

    # Tab to password field
    control_computer("type_keys", keys=["tab"])

    # Type password
    control_computer("type_text", text="secure_password")

    # Tab to login button
    control_computer("type_keys", keys=["tab"])

    # Press Enter to submit
    control_computer("type_keys", keys=["enter"])

    # Wait for page to load
    control_computer("wait", duration=2000)

    print("Form submitted successfully")

# Run the automation

# fill_web_form()

JavaScript/Node.js Examples

const axios = require('axios');

async function controlComputer(action, params = {}) {
const url = "http://localhost:9990/computer-use";
const data = { action, ...params };

try {
const response = await axios.post(url, data);
return response.data;
} catch (error) {
console.error('Error:', error.message);
throw error;
}
}

async function navigateToWebsite(url) {
console.log(`Navigating to ${url}...`);

// Open Firefox/Chrome by clicking on dock icon
await controlComputer("move_mouse", { coordinates: { x: 100, y: 950 } });
await controlComputer("click_mouse", { button: "left" });

// Wait for browser to open
await controlComputer("wait", { duration: 2000 });

// Click in URL bar (usually near the top)
await controlComputer("move_mouse", { coordinates: { x: 400, y: 60 } });
await controlComputer("click_mouse", { button: "left" });

// Select all existing text (Cmd+A on Mac, Ctrl+A elsewhere)
await controlComputer("press_keys", { keys: ["ctrl"], press: "down" });
await controlComputer("press_keys", { keys: ["a"], press: "down" });
await controlComputer("press_keys", { keys: ["a"], press: "up" });
await controlComputer("press_keys", { keys: ["ctrl"], press: "up" });

// Type the URL
await controlComputer("type_text", { text: url });

// Press Enter to navigate
await controlComputer("press_keys", { keys: ["enter"], press: "down" });
await controlComputer("press_keys", { keys: ["enter"], press: "up" });

// Wait for page to load
await controlComputer("wait", { duration: 3000 });

console.log("Navigation completed");
}

// Usage
// navigateToWebsite("https://example.com").catch(console.error);

Common Automation Scenarios

Browser Automation Workflow

This example demonstrates a complete browser workflow, opening a site and interacting with it:

import requests
import time

def control_computer(action, **params):
    url = "http://localhost:9990/computer-use"
    data = {"action": action, **params}
    response = requests.post(url, json=data)
    return response.json()

def browser_workflow():
    # Open browser (assuming browser icon is at position x=100, y=960)
    control_computer("move_mouse", coordinates={"x": 100, "y": 960})
    control_computer("click_mouse", button="left")
    time.sleep(3)  # Wait for browser to open

    # Type URL and navigate
    control_computer("type_text", text="https://example.com")
    control_computer("press_keys", key="enter")
    time.sleep(2)  # Wait for page to load

    # Take screenshot of the loaded page
    screenshot = control_computer("screenshot")

    # Click on a link (coordinates would need to be adjusted for your target)
    control_computer("move_mouse", coordinates={"x": 300, "y": 400})
    control_computer("click_mouse", button="left")
    time.sleep(2)

    # Scroll down
    control_computer("scroll", direction="down", amount=500)

    # Fill a search box
    control_computer("move_mouse", coordinates={"x": 600, "y": 200})
    control_computer("click_mouse", button="left")
    control_computer("type_text", text="search query")
    control_computer("press_keys", key="enter")

browser_workflow()

Form Filling Workflow

This example shows a complete form-filling process:

const axios = require("axios");

async function controlComputer(action, params = {}) {
  const url = "http://localhost:9990/computer-use";
  const data = { action, ...params };
  const response = await axios.post(url, data);
  return response.data;
}

async function fillForm() {
  // Navigate to form page
  await controlComputer("move_mouse", { coordinates: { x: 100, y: 960 } });
  await controlComputer("click_mouse", { button: "left" });
  await controlComputer("wait", { duration: 3000 });
  await controlComputer("type_text", { text: "https://example.com/form" });
  await controlComputer("press_keys", { key: "enter" });
  await controlComputer("wait", { duration: 2000 });

  // Fill form
  // Name field
  await controlComputer("move_mouse", { coordinates: { x: 400, y: 250 } });
  await controlComputer("click_mouse", { button: "left" });

  // Type the value
  await controlComputer("type_text", { text: "John Doe" });

  // Email field (tab to next field)
  await controlComputer("press_keys", { keys: ["tab"], press: "down" });
  await controlComputer("press_keys", { keys: ["tab"], press: "up" });
  await controlComputer("type_text", { text: "[email protected]" });

  // Message field (tab to next field)
  await controlComputer("press_keys", { keys: ["tab"], press: "down" });
  await controlComputer("press_keys", { keys: ["tab"], press: "up" });
  await controlComputer("type_text", {
    text: "This is an automated message sent using Bytebot's Computer Use API",
    delay: 30,
  });

  // Submit form
  await controlComputer("press_keys", { keys: ["tab"], press: "down" });
  await controlComputer("press_keys", { keys: ["tab"], press: "up" });
  await controlComputer("press_keys", { key: "enter" });

  // Take screenshot of confirmation page
  await controlComputer("wait", { duration: 2000 });
  const screenshot = await controlComputer("screenshot");

  console.log("Form submitted successfully");
}

fillForm().catch(console.error);

Automation Framework Integration

You can create a reusable automation framework with Bytebot:

import requests
import time
import json

class BytebotDriver:
    """A Selenium-like driver for Bytebot"""

    def __init__(self, base_url="http://localhost:9990"):
        self.base_url = base_url

    def control_computer(self, action, **params):
        url = f"{self.base_url}/computer-use"
        data = {"action": action, **params}
        response = requests.post(url, json=data)
        return response.json()

    def open_browser(self, browser_icon_coords):
        """Open a browser by clicking its icon"""
        self.control_computer("move_mouse", coordinates=browser_icon_coords)
        self.control_computer("click_mouse", button="left")
        time.sleep(3)  # Wait for browser to open

    def navigate_to(self, url):
        """Navigate to a URL in the browser"""
        self.control_computer("type_text", text=url)
        self.control_computer("press_keys", key="enter")
        time.sleep(2)  # Wait for page to load

    def click_element(self, coords):
        """Click an element at the specified coordinates"""
        self.control_computer("move_mouse", coordinates=coords)
        self.control_computer("click_mouse", button="left")

    def type_text(self, text):
        """Type text at the current cursor position"""
        self.control_computer("type_text", text=text)

    def press_key(self, key, modifiers=None):
        """Press a keyboard key with optional modifiers"""
        params = {"key": key}
        if modifiers:
            params["modifiers"] = modifiers
        self.control_computer("press_keys", **params)

    def take_screenshot(self):
        """Take a screenshot of the desktop"""
        return self.control_computer("screenshot")

    def scroll(self, direction, amount):
        """Scroll in the specified direction"""
        self.control_computer("scroll", direction=direction, amount=amount)

# Example usage
driver = BytebotDriver()
driver.open_browser({"x": 100, "y": 960})
driver.navigate_to("https://example.com")
driver.click_element({"x": 300, "y": 400})
driver.type_text("Hello Bytebot!")
driver.press_key("enter")
result = driver.take_screenshot()
print(f"Screenshot captured: {result['success']}")