Basic Examples

Here are some practical examples of how to use the Computer Use API in different programming languages.

Using cURL

# Move to Firefox/Chrome icon in the dock and click it
curl -X POST http://localhost:9990/computer-use \
  -H "Content-Type: application/json" \
  -d '{"action": "move_mouse", "coordinates": {"x": 100, "y": 960}}'

curl -X POST http://localhost:9990/computer-use \
 -H "Content-Type: application/json" \
 -d '{"action": "click_mouse", "button": "left", "numClicks": 1}'

Python Examples

import requests
import json
import base64
import time
from io import BytesIO
from PIL import Image

def control_computer(action, **params):
url = "http://localhost:9990/computer-use"
data = {"action": action, **params}
response = requests.post(url, json=data)
return response.json()

# Open a web browser by clicking an icon

control_computer("move_mouse", coordinates={"x": 100, "y": 960})
control_computer("click_mouse", button="left")

# Wait for the browser to open

control_computer("wait", duration=2000)

# Type a URL

control_computer("type_text", text="https://example.com")
control_computer("press_keys", key="enter")

JavaScript/Node.js Examples

const axios = require('axios');

async function controlComputer(action, params = {}) {
const url = "http://localhost:9990/computer-use";
const data = { action, ...params };

try {
const response = await axios.post(url, data);
return response.data;
} catch (error) {
console.error('Error:', error.message);
return { success: false, error: error.message };
}
}

// Example: Automate opening an application and typing
async function automateTextEditor() {
try {
// Open text editor by clicking its icon
await controlComputer("move_mouse", { coordinates: { x: 150, y: 960 } });
await controlComputer("click_mouse", { button: "left" });

    // Wait for it to open
    await controlComputer("wait", { duration: 2000 });

    // Type some text
    await controlComputer("type_text", {
      text: "This is an automated test using Node.js and Bytebot",
      delay: 30
    });

    console.log("Automation completed successfully");

} catch (error) {
console.error("Automation failed:", error);
}
}

automateTextEditor();

Automation Recipes

Browser Automation

This example demonstrates how to automate browser interactions:

import requests
import time

def control_computer(action, **params):
    url = "http://localhost:9990/computer-use"
    data = {"action": action, **params}
    response = requests.post(url, json=data)
    return response.json()

def automate_browser():
    # Open browser (assuming browser icon is at position x=100, y=960)
    control_computer("move_mouse", coordinates={"x": 100, "y": 960})
    control_computer("click_mouse", button="left")
    time.sleep(3)  # Wait for browser to open

    # Type URL
    control_computer("type_text", text="https://example.com")
    control_computer("press_keys", key="enter")
    time.sleep(2)  # Wait for page to load

    # Take screenshot of the loaded page
    screenshot = control_computer("screenshot")

    # Click on a link (coordinates would need to be adjusted for your target)
    control_computer("move_mouse", coordinates={"x": 300, "y": 400})
    control_computer("click_mouse", button="left")
    time.sleep(2)

    # Scroll down
    control_computer("scroll", direction="down", amount=500)

automate_browser()

Form Filling Automation

This example shows how to automate filling out a form in a web application:

const axios = require("axios");

async function controlComputer(action, params = {}) {
  const url = "http://localhost:9990/computer-use";
  const data = { action, ...params };
  const response = await axios.post(url, data);
  return response.data;
}

async function fillForm() {
  // Click first input field
  await controlComputer("move_mouse", { coordinates: { x: 400, y: 300 } });
  await controlComputer("click_mouse", { button: "left" });

  // Type name
  await controlComputer("type_text", { text: "John Doe" });

  // Tab to next field
  await controlComputer("press_keys", { key: "tab" });

  // Type email
  await controlComputer("type_text", { text: "[email protected]" });

  // Tab to next field
  await controlComputer("press_keys", { key: "tab" });

  // Type message
  await controlComputer("type_text", {
    text: "This is an automated message sent using Bytebot's Computer Use API",
    delay: 30,
  });

  // Tab to submit button
  await controlComputer("press_keys", { key: "tab" });

  // Press Enter to submit
  await controlComputer("press_keys", { key: "enter" });
}

fillForm().catch(console.error);

Integration with Testing Frameworks

The Computer Use API can be integrated with popular testing frameworks:

Selenium Alternative

Bytebot can serve as an alternative to Selenium for web testing:

import requests
import time
import json

class BytebotWebDriver:
    def __init__(self, base_url="http://localhost:9990"):
        self.base_url = base_url

    def control_computer(self, action, **params):
        url = f"{self.base_url}/computer-use"
        data = {"action": action, **params}
        response = requests.post(url, json=data)
        return response.json()

    def open_browser(self, browser_icon_coords):
        self.control_computer("move_mouse", coordinates=browser_icon_coords)
        self.control_computer("click_mouse", button="left")
        time.sleep(3)  # Wait for browser to open

    def navigate_to(self, url):
        self.control_computer("type_text", text=url)
        self.control_computer("press_keys", key="enter")
        time.sleep(2)  # Wait for page to load

    def click_element(self, coords):
        self.control_computer("move_mouse", coordinates=coords)
        self.control_computer("click_mouse", button="left")

    def type_text(self, text):
        self.control_computer("type_text", text=text)

    def press_keys(self, key, modifiers=None):
        params = {"key": key}
        if modifiers:
            params["modifiers"] = modifiers
        self.control_computer("press_keys", **params)

    def take_screenshot(self):
        return self.control_computer("screenshot")

# Usage example
driver = BytebotWebDriver()
driver.open_browser({"x": 100, "y": 960})
driver.navigate_to("https://example.com")
driver.click_element({"x": 300, "y": 400})
driver.type_text("Hello Bytebot!")