Documentation Index
Fetch the complete documentation index at: https://docs.bytebot.ai/llms.txt
Use this file to discover all available pages before exploring further.
Basic Examples
Here are some practical examples of how to use the Computer Use API in different programming languages.Using cURL
# Move to Firefox/Chrome icon in the dock and click it
curl -X POST http://localhost:9990/computer-use \
-H "Content-Type: application/json" \
-d '{"action": "move_mouse", "coordinates": {"x": 100, "y": 960}}'
curl -X POST http://localhost:9990/computer-use \
-H "Content-Type: application/json" \
-d '{"action": "click_mouse", "button": "left", "clickCount": 1}'
Python Examples
import requests
import json
import base64
import time
from io import BytesIO
from PIL import Image
def control_computer(action, **params):
url = "http://localhost:9990/computer-use"
data = {"action": action, **params}
response = requests.post(url, json=data)
return response.json()
# Open a web browser by clicking an icon
control_computer("move_mouse", coordinates={"x": 100, "y": 960})
control_computer("click_mouse", button="left")
# Wait for the browser to open
control_computer("wait", duration=2000)
# Type a URL
control_computer("type_text", text="https://example.com")
control_computer("press_keys", key="enter")
JavaScript/Node.js Examples
const axios = require('axios');
async function controlComputer(action, params = {}) {
const url = "http://localhost:9990/computer-use";
const data = { action, ...params };
try {
const response = await axios.post(url, data);
return response.data;
} catch (error) {
console.error('Error:', error.message);
return { success: false, error: error.message };
}
}
// Example: Automate opening an application and typing
async function automateTextEditor() {
try {
// Open text editor by clicking its icon
await controlComputer("move_mouse", { coordinates: { x: 150, y: 960 } });
await controlComputer("click_mouse", { button: "left" });
// Wait for it to open
await controlComputer("wait", { duration: 2000 });
// Type some text
await controlComputer("type_text", {
text: "This is an automated test using Node.js and Bytebot",
delay: 30
});
console.log("Automation completed successfully");
} catch (error) {
console.error("Automation failed:", error);
}
}
automateTextEditor();
File Operations
Writing Files
These examples show how to write files to the desktop environment:import requests
import base64
def write_file(path, content):
url = "http://localhost:9990/computer-use"
# Encode content to base64
encoded_content = base64.b64encode(content.encode('utf-8')).decode('utf-8')
data = {
"action": "write_file",
"path": path,
"data": encoded_content
}
response = requests.post(url, json=data)
return response.json()
# Write a text file
result = write_file("/home/user/hello.txt", "Hello, Bytebot!")
print(result) # {'success': True, 'message': 'File written successfully...'}
# Write to desktop (relative path)
result = write_file("report.txt", "Daily report content")
print(result) # File will be written to /home/user/Desktop/report.txt
Reading Files
These examples show how to read files from the desktop environment:import requests
import base64
def read_file(path):
url = "http://localhost:9990/computer-use"
data = {
"action": "read_file",
"path": path
}
response = requests.post(url, json=data)
result = response.json()
if result['success']:
# Decode the base64 content
content = base64.b64decode(result['data']).decode('utf-8')
return {
'content': content,
'name': result['name'],
'size': result['size'],
'mediaType': result['mediaType']
}
else:
return result
# Read a text file
file_data = read_file("/home/user/hello.txt")
print(f"Content: {file_data['content']}")
print(f"Size: {file_data['size']} bytes")
print(f"Type: {file_data['mediaType']}")
Automation Recipes
Browser Automation
This example demonstrates how to automate browser interactions:import requests
import time
def control_computer(action, **params):
url = "http://localhost:9990/computer-use"
data = {"action": action, **params}
response = requests.post(url, json=data)
return response.json()
def automate_browser():
# Open browser (assuming browser icon is at position x=100, y=960)
control_computer("move_mouse", coordinates={"x": 100, "y": 960})
control_computer("click_mouse", button="left")
time.sleep(3) # Wait for browser to open
# Type URL
control_computer("type_text", text="https://example.com")
control_computer("press_keys", key="enter")
time.sleep(2) # Wait for page to load
# Take screenshot of the loaded page
screenshot = control_computer("screenshot")
# Click on a link (coordinates would need to be adjusted for your target)
control_computer("move_mouse", coordinates={"x": 300, "y": 400})
control_computer("click_mouse", button="left")
time.sleep(2)
# Scroll down
control_computer("scroll", direction="down", scrollCount=5)
automate_browser()
Form Filling Automation
This example shows how to automate filling out a form in a web application:const axios = require("axios");
async function controlComputer(action, params = {}) {
const url = "http://localhost:9990/computer-use";
const data = { action, ...params };
const response = await axios.post(url, data);
return response.data;
}
async function fillForm() {
// Click first input field
await controlComputer("move_mouse", { coordinates: { x: 400, y: 300 } });
await controlComputer("click_mouse", { button: "left" });
// Type name
await controlComputer("type_text", { text: "John Doe" });
// Tab to next field
await controlComputer("press_keys", { key: "tab" });
// Type email
await controlComputer("type_text", { text: "[email protected]" });
// Tab to next field
await controlComputer("press_keys", { key: "tab" });
// Type message
await controlComputer("type_text", {
text: "This is an automated message sent using Bytebot's Computer Use API",
delay: 30,
});
// Tab to submit button
await controlComputer("press_keys", { key: "tab" });
// Press Enter to submit
await controlComputer("press_keys", { key: "enter" });
}
fillForm().catch(console.error);
Integration with Testing Frameworks
The Computer Use API can be integrated with popular testing frameworks:Selenium Alternative
Bytebot can serve as an alternative to Selenium for web testing:import requests
import time
import json
class BytebotWebDriver:
def __init__(self, base_url="http://localhost:9990"):
self.base_url = base_url
def control_computer(self, action, **params):
url = f"{self.base_url}/computer-use"
data = {"action": action, **params}
response = requests.post(url, json=data)
return response.json()
def open_browser(self, browser_icon_coords):
self.control_computer("move_mouse", coordinates=browser_icon_coords)
self.control_computer("click_mouse", button="left")
time.sleep(3) # Wait for browser to open
def navigate_to(self, url):
self.control_computer("type_text", text=url)
self.control_computer("press_keys", key="enter")
time.sleep(2) # Wait for page to load
def click_element(self, coords):
self.control_computer("move_mouse", coordinates=coords)
self.control_computer("click_mouse", button="left")
def type_text(self, text):
self.control_computer("type_text", text=text)
def press_keys(self, key, modifiers=None):
params = {"key": key}
if modifiers:
params["modifiers"] = modifiers
self.control_computer("press_keys", **params)
def take_screenshot(self):
return self.control_computer("screenshot")
# Usage example
driver = BytebotWebDriver()
driver.open_browser({"x": 100, "y": 960})
driver.navigate_to("https://example.com")
driver.click_element({"x": 300, "y": 400})
driver.type_text("Hello Bytebot!")