Skip to main content

Documentation Index

Fetch the complete documentation index at: https://docs.bytebot.ai/llms.txt

Use this file to discover all available pages before exploring further.

Basic Examples

Here are some practical examples of how to use the Computer Use API in different programming languages.

Using cURL

# Move to Firefox/Chrome icon in the dock and click it
curl -X POST http://localhost:9990/computer-use \
  -H "Content-Type: application/json" \
  -d '{"action": "move_mouse", "coordinates": {"x": 100, "y": 960}}'

curl -X POST http://localhost:9990/computer-use \
 -H "Content-Type: application/json" \
 -d '{"action": "click_mouse", "button": "left", "clickCount": 1}'

Python Examples

import requests
import json
import base64
import time
from io import BytesIO
from PIL import Image

def control_computer(action, **params):
url = "http://localhost:9990/computer-use"
data = {"action": action, **params}
response = requests.post(url, json=data)
return response.json()

# Open a web browser by clicking an icon

control_computer("move_mouse", coordinates={"x": 100, "y": 960})
control_computer("click_mouse", button="left")

# Wait for the browser to open

control_computer("wait", duration=2000)

# Type a URL

control_computer("type_text", text="https://example.com")
control_computer("press_keys", key="enter")

JavaScript/Node.js Examples

const axios = require('axios');

async function controlComputer(action, params = {}) {
const url = "http://localhost:9990/computer-use";
const data = { action, ...params };

try {
const response = await axios.post(url, data);
return response.data;
} catch (error) {
console.error('Error:', error.message);
return { success: false, error: error.message };
}
}

// Example: Automate opening an application and typing
async function automateTextEditor() {
try {
// Open text editor by clicking its icon
await controlComputer("move_mouse", { coordinates: { x: 150, y: 960 } });
await controlComputer("click_mouse", { button: "left" });

    // Wait for it to open
    await controlComputer("wait", { duration: 2000 });

    // Type some text
    await controlComputer("type_text", {
      text: "This is an automated test using Node.js and Bytebot",
      delay: 30
    });

    console.log("Automation completed successfully");

} catch (error) {
console.error("Automation failed:", error);
}
}

automateTextEditor();

File Operations

Writing Files

These examples show how to write files to the desktop environment:
import requests
import base64

def write_file(path, content):
    url = "http://localhost:9990/computer-use"
    
    # Encode content to base64
    encoded_content = base64.b64encode(content.encode('utf-8')).decode('utf-8')
    
    data = {
        "action": "write_file",
        "path": path,
        "data": encoded_content
    }
    
    response = requests.post(url, json=data)
    return response.json()

# Write a text file
result = write_file("/home/user/hello.txt", "Hello, Bytebot!")
print(result)  # {'success': True, 'message': 'File written successfully...'}

# Write to desktop (relative path)
result = write_file("report.txt", "Daily report content")
print(result)  # File will be written to /home/user/Desktop/report.txt

Reading Files

These examples show how to read files from the desktop environment:
import requests
import base64

def read_file(path):
    url = "http://localhost:9990/computer-use"
    
    data = {
        "action": "read_file",
        "path": path
    }
    
    response = requests.post(url, json=data)
    result = response.json()
    
    if result['success']:
        # Decode the base64 content
        content = base64.b64decode(result['data']).decode('utf-8')
        return {
            'content': content,
            'name': result['name'],
            'size': result['size'],
            'mediaType': result['mediaType']
        }
    else:
        return result

# Read a text file
file_data = read_file("/home/user/hello.txt")
print(f"Content: {file_data['content']}")
print(f"Size: {file_data['size']} bytes")
print(f"Type: {file_data['mediaType']}")

Automation Recipes

Browser Automation

This example demonstrates how to automate browser interactions:
import requests
import time

def control_computer(action, **params):
    url = "http://localhost:9990/computer-use"
    data = {"action": action, **params}
    response = requests.post(url, json=data)
    return response.json()

def automate_browser():
    # Open browser (assuming browser icon is at position x=100, y=960)
    control_computer("move_mouse", coordinates={"x": 100, "y": 960})
    control_computer("click_mouse", button="left")
    time.sleep(3)  # Wait for browser to open

    # Type URL
    control_computer("type_text", text="https://example.com")
    control_computer("press_keys", key="enter")
    time.sleep(2)  # Wait for page to load

    # Take screenshot of the loaded page
    screenshot = control_computer("screenshot")

    # Click on a link (coordinates would need to be adjusted for your target)
    control_computer("move_mouse", coordinates={"x": 300, "y": 400})
    control_computer("click_mouse", button="left")
    time.sleep(2)

    # Scroll down
    control_computer("scroll", direction="down", scrollCount=5)

automate_browser()

Form Filling Automation

This example shows how to automate filling out a form in a web application:
const axios = require("axios");

async function controlComputer(action, params = {}) {
  const url = "http://localhost:9990/computer-use";
  const data = { action, ...params };
  const response = await axios.post(url, data);
  return response.data;
}

async function fillForm() {
  // Click first input field
  await controlComputer("move_mouse", { coordinates: { x: 400, y: 300 } });
  await controlComputer("click_mouse", { button: "left" });

  // Type name
  await controlComputer("type_text", { text: "John Doe" });

  // Tab to next field
  await controlComputer("press_keys", { key: "tab" });

  // Type email
  await controlComputer("type_text", { text: "[email protected]" });

  // Tab to next field
  await controlComputer("press_keys", { key: "tab" });

  // Type message
  await controlComputer("type_text", {
    text: "This is an automated message sent using Bytebot's Computer Use API",
    delay: 30,
  });

  // Tab to submit button
  await controlComputer("press_keys", { key: "tab" });

  // Press Enter to submit
  await controlComputer("press_keys", { key: "enter" });
}

fillForm().catch(console.error);

Integration with Testing Frameworks

The Computer Use API can be integrated with popular testing frameworks:

Selenium Alternative

Bytebot can serve as an alternative to Selenium for web testing:
import requests
import time
import json

class BytebotWebDriver:
    def __init__(self, base_url="http://localhost:9990"):
        self.base_url = base_url

    def control_computer(self, action, **params):
        url = f"{self.base_url}/computer-use"
        data = {"action": action, **params}
        response = requests.post(url, json=data)
        return response.json()

    def open_browser(self, browser_icon_coords):
        self.control_computer("move_mouse", coordinates=browser_icon_coords)
        self.control_computer("click_mouse", button="left")
        time.sleep(3)  # Wait for browser to open

    def navigate_to(self, url):
        self.control_computer("type_text", text=url)
        self.control_computer("press_keys", key="enter")
        time.sleep(2)  # Wait for page to load

    def click_element(self, coords):
        self.control_computer("move_mouse", coordinates=coords)
        self.control_computer("click_mouse", button="left")

    def type_text(self, text):
        self.control_computer("type_text", text=text)

    def press_keys(self, key, modifiers=None):
        params = {"key": key}
        if modifiers:
            params["modifiers"] = modifiers
        self.control_computer("press_keys", **params)

    def take_screenshot(self):
        return self.control_computer("screenshot")

# Usage example
driver = BytebotWebDriver()
driver.open_browser({"x": 100, "y": 960})
driver.navigate_to("https://example.com")
driver.click_element({"x": 300, "y": 400})
driver.type_text("Hello Bytebot!")