> ## Documentation Index > Fetch the complete documentation index at: https://docs.bytebot.ai/llms.txt > Use this file to discover all available pages before exploring further. # Computer Use API Examples > Code examples for common automation scenarios using the Bytebot API ## Basic Examples Here are some practical examples of how to use the Computer Use API in different programming languages. ### Using cURL ```bash Opening a Web Browser theme={null} # Move to Firefox/Chrome icon in the dock and click it curl -X POST http://localhost:9990/computer-use \ -H "Content-Type: application/json" \ -d '{"action": "move_mouse", "coordinates": {"x": 100, "y": 960}}' curl -X POST http://localhost:9990/computer-use \ -H "Content-Type: application/json" \ -d '{"action": "click_mouse", "button": "left", "clickCount": 1}' ``` ```bash Taking and Saving a Screenshot theme={null} # Take a screenshot response=$(curl -s -X POST http://localhost:9990/computer-use \ -H "Content-Type: application/json" \ -d '{"action": "screenshot"}') # Extract the base64 image data and save to a file echo $response | jq -r '.data.image' | base64 -d > screenshot.png ``` ```bash Typing and Keyboard Shortcuts theme={null} # Type text in a text editor curl -X POST http://localhost:9990/computer-use \ -H "Content-Type: application/json" \ -d '{"action": "type_text", "text": "Hello, this is an automated test!", "delay": 30}' # Press Ctrl+S to save curl -X POST http://localhost:9990/computer-use \ -H "Content-Type: application/json" \ -d '{"action": "press_keys", "key": "s", "modifiers": ["control"]}' ``` ### Python Examples ```python Basic Automation theme={null} import requests import json import base64 import time from io import BytesIO from PIL import Image def control_computer(action, **params): url = "http://localhost:9990/computer-use" data = {"action": action, **params} response = requests.post(url, json=data) return response.json() # Open a web browser by clicking an icon control_computer("move_mouse", coordinates={"x": 100, "y": 960}) control_computer("click_mouse", button="left") # Wait for the browser to open control_computer("wait", duration=2000) # Type a URL control_computer("type_text", text="https://example.com") control_computer("press_keys", key="enter") ``` ```python Screenshot and Analysis theme={null} import requests import json import base64 import cv2 import numpy as np from PIL import Image from io import BytesIO def take_screenshot(): url = "http://localhost:9990/computer-use" data = {"action": "screenshot"} response = requests.post(url, json=data) if response.json()["success"]: img_data = base64.b64decode(response.json()["data"]["image"]) image = Image.open(BytesIO(img_data)) return np.array(image) return None # Take a screenshot img = take_screenshot() # Convert to grayscale for analysis if img is not None: gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Save the screenshot cv2.imwrite("screenshot.png", img) # Perform image analysis (example: find edges) edges = cv2.Canny(gray, 100, 200) cv2.imwrite("edges.png", edges) ``` ```python Web Form Automation theme={null} import requests import time def control_computer(action, **params): url = "http://localhost:9990/computer-use" data = {"action": action, **params} response = requests.post(url, json=data) return response.json() def fill_web_form(form_fields): # Click on the first form field control_computer("move_mouse", coordinates=form_fields[0]) control_computer("click_mouse", button="left") # Fill out each field for i, field in enumerate(form_fields): # Input the field value control_computer("type_text", text=field["value"]) # If not the last field, press Tab to move to next field if i < len(form_fields) - 1: control_computer("press_keys", key="tab") time.sleep(0.5) # Submit the form by pressing Enter control_computer("press_keys", key="enter") # Example form fields with coordinates and values form_fields = [ {"x": 500, "y": 300, "value": "John Doe"}, {"x": 500, "y": 350, "value": "john@example.com"}, {"x": 500, "y": 400, "value": "Password123"} ] fill_web_form(form_fields) ``` ### JavaScript/Node.js Examples ```javascript Basic Automation theme={null} const axios = require('axios'); async function controlComputer(action, params = {}) { const url = "http://localhost:9990/computer-use"; const data = { action, ...params }; try { const response = await axios.post(url, data); return response.data; } catch (error) { console.error('Error:', error.message); return { success: false, error: error.message }; } } // Example: Automate opening an application and typing async function automateTextEditor() { try { // Open text editor by clicking its icon await controlComputer("move_mouse", { coordinates: { x: 150, y: 960 } }); await controlComputer("click_mouse", { button: "left" }); // Wait for it to open await controlComputer("wait", { duration: 2000 }); // Type some text await controlComputer("type_text", { text: "This is an automated test using Node.js and Bytebot", delay: 30 }); console.log("Automation completed successfully"); } catch (error) { console.error("Automation failed:", error); } } automateTextEditor(); ``` ```javascript Advanced: Screenshot Comparison theme={null} const axios = require('axios'); const fs = require('fs'); const { createCanvas, loadImage } = require('canvas'); const pixelmatch = require('pixelmatch'); async function controlComputer(action, params = {}) { const url = "http://localhost:9990/computer-use"; const data = { action, ...params }; try { const response = await axios.post(url, data); return response.data; } catch (error) { console.error('Error:', error.message); return { success: false, error: error.message }; } } async function compareScreenshots() { try { // Take first screenshot const screenshot1 = await controlComputer("screenshot"); // Do some actions await controlComputer("move_mouse", { coordinates: { x: 500, y: 500 } }); await controlComputer("click_mouse", { button: "left" }); await controlComputer("wait", { duration: 1000 }); // Take second screenshot const screenshot2 = await controlComputer("screenshot"); // Compare screenshots if (screenshot1.success && screenshot2.success) { const img1Data = Buffer.from(screenshot1.data.image, 'base64'); const img2Data = Buffer.from(screenshot2.data.image, 'base64'); fs.writeFileSync('screenshot1.png', img1Data); fs.writeFileSync('screenshot2.png', img2Data); // Now you could load and compare these images // This requires additional image comparison libraries console.log('Screenshots saved for comparison'); } } catch (error) { console.error("Screenshot comparison failed:", error); } } compareScreenshots(); ``` ## File Operations ### Writing Files These examples show how to write files to the desktop environment: ```python Python theme={null} import requests import base64 def write_file(path, content): url = "http://localhost:9990/computer-use" # Encode content to base64 encoded_content = base64.b64encode(content.encode('utf-8')).decode('utf-8') data = { "action": "write_file", "path": path, "data": encoded_content } response = requests.post(url, json=data) return response.json() # Write a text file result = write_file("/home/user/hello.txt", "Hello, Bytebot!") print(result) # {'success': True, 'message': 'File written successfully...'} # Write to desktop (relative path) result = write_file("report.txt", "Daily report content") print(result) # File will be written to /home/user/Desktop/report.txt ``` ```javascript JavaScript theme={null} const axios = require('axios'); async function writeFile(path, content) { const url = "http://localhost:9990/computer-use"; // Encode content to base64 const encodedContent = Buffer.from(content, 'utf-8').toString('base64'); const data = { action: "write_file", path: path, data: encodedContent }; const response = await axios.post(url, data); return response.data; } // Write a text file writeFile("/home/user/notes.txt", "Meeting notes...") .then(result => console.log(result)) .catch(error => console.error(error)); // Write HTML file to desktop const htmlContent = '

Hello

'; writeFile("index.html", htmlContent) .then(result => console.log("HTML file created")); ``` ### Reading Files These examples show how to read files from the desktop environment: ```python Python theme={null} import requests import base64 def read_file(path): url = "http://localhost:9990/computer-use" data = { "action": "read_file", "path": path } response = requests.post(url, json=data) result = response.json() if result['success']: # Decode the base64 content content = base64.b64decode(result['data']).decode('utf-8') return { 'content': content, 'name': result['name'], 'size': result['size'], 'mediaType': result['mediaType'] } else: return result # Read a text file file_data = read_file("/home/user/hello.txt") print(f"Content: {file_data['content']}") print(f"Size: {file_data['size']} bytes") print(f"Type: {file_data['mediaType']}") ``` ```javascript JavaScript theme={null} const axios = require('axios'); async function readFile(path) { const url = "http://localhost:9990/computer-use"; const data = { action: "read_file", path: path }; const response = await axios.post(url, data); const result = response.data; if (result.success) { // Decode the base64 content const content = Buffer.from(result.data, 'base64').toString('utf-8'); return { content: content, name: result.name, size: result.size, mediaType: result.mediaType }; } else { throw new Error(result.message); } } // Read a file from desktop readFile("report.txt") .then(fileData => { console.log(`Content: ${fileData.content}`); console.log(`Size: ${fileData.size} bytes`); console.log(`Type: ${fileData.mediaType}`); }) .catch(error => console.error("Error reading file:", error)); ``` ## Automation Recipes ### Browser Automation This example demonstrates how to automate browser interactions: ```python theme={null} import requests import time def control_computer(action, **params): url = "http://localhost:9990/computer-use" data = {"action": action, **params} response = requests.post(url, json=data) return response.json() def automate_browser(): # Open browser (assuming browser icon is at position x=100, y=960) control_computer("move_mouse", coordinates={"x": 100, "y": 960}) control_computer("click_mouse", button="left") time.sleep(3) # Wait for browser to open # Type URL control_computer("type_text", text="https://example.com") control_computer("press_keys", key="enter") time.sleep(2) # Wait for page to load # Take screenshot of the loaded page screenshot = control_computer("screenshot") # Click on a link (coordinates would need to be adjusted for your target) control_computer("move_mouse", coordinates={"x": 300, "y": 400}) control_computer("click_mouse", button="left") time.sleep(2) # Scroll down control_computer("scroll", direction="down", scrollCount=5) automate_browser() ``` ### Form Filling Automation This example shows how to automate filling out a form in a web application: ```javascript theme={null} const axios = require("axios"); async function controlComputer(action, params = {}) { const url = "http://localhost:9990/computer-use"; const data = { action, ...params }; const response = await axios.post(url, data); return response.data; } async function fillForm() { // Click first input field await controlComputer("move_mouse", { coordinates: { x: 400, y: 300 } }); await controlComputer("click_mouse", { button: "left" }); // Type name await controlComputer("type_text", { text: "John Doe" }); // Tab to next field await controlComputer("press_keys", { key: "tab" }); // Type email await controlComputer("type_text", { text: "john@example.com" }); // Tab to next field await controlComputer("press_keys", { key: "tab" }); // Type message await controlComputer("type_text", { text: "This is an automated message sent using Bytebot's Computer Use API", delay: 30, }); // Tab to submit button await controlComputer("press_keys", { key: "tab" }); // Press Enter to submit await controlComputer("press_keys", { key: "enter" }); } fillForm().catch(console.error); ``` ## Integration with Testing Frameworks The Computer Use API can be integrated with popular testing frameworks: ### Selenium Alternative Bytebot can serve as an alternative to Selenium for web testing: ```python theme={null} import requests import time import json class BytebotWebDriver: def __init__(self, base_url="http://localhost:9990"): self.base_url = base_url def control_computer(self, action, **params): url = f"{self.base_url}/computer-use" data = {"action": action, **params} response = requests.post(url, json=data) return response.json() def open_browser(self, browser_icon_coords): self.control_computer("move_mouse", coordinates=browser_icon_coords) self.control_computer("click_mouse", button="left") time.sleep(3) # Wait for browser to open def navigate_to(self, url): self.control_computer("type_text", text=url) self.control_computer("press_keys", key="enter") time.sleep(2) # Wait for page to load def click_element(self, coords): self.control_computer("move_mouse", coordinates=coords) self.control_computer("click_mouse", button="left") def type_text(self, text): self.control_computer("type_text", text=text) def press_keys(self, key, modifiers=None): params = {"key": key} if modifiers: params["modifiers"] = modifiers self.control_computer("press_keys", **params) def take_screenshot(self): return self.control_computer("screenshot") # Usage example driver = BytebotWebDriver() driver.open_browser({"x": 100, "y": 960}) driver.navigate_to("https://example.com") driver.click_element({"x": 300, "y": 400}) driver.type_text("Hello Bytebot!") ```