> ## Documentation Index
> Fetch the complete documentation index at: https://docs.bytebot.ai/llms.txt
> Use this file to discover all available pages before exploring further.

# Computer Use API Examples

> Code examples for common automation scenarios using the Bytebot API

## Basic Examples

Here are some practical examples of how to use the Computer Use API in different programming languages.

### Using cURL

<CodeGroup>
  ```bash Opening a Web Browser theme={null}
  # Move to Firefox/Chrome icon in the dock and click it
  curl -X POST http://localhost:9990/computer-use \
    -H "Content-Type: application/json" \
    -d '{"action": "move_mouse", "coordinates": {"x": 100, "y": 960}}'

  curl -X POST http://localhost:9990/computer-use \
   -H "Content-Type: application/json" \
   -d '{"action": "click_mouse", "button": "left", "clickCount": 1}'

  ```

  ```bash Taking and Saving a Screenshot theme={null}
  # Take a screenshot
  response=$(curl -s -X POST http://localhost:9990/computer-use \
    -H "Content-Type: application/json" \
    -d '{"action": "screenshot"}')

  # Extract the base64 image data and save to a file
  echo $response | jq -r '.data.image' | base64 -d > screenshot.png
  ```

  ```bash Typing and Keyboard Shortcuts theme={null}
  # Type text in a text editor
  curl -X POST http://localhost:9990/computer-use \
    -H "Content-Type: application/json" \
    -d '{"action": "type_text", "text": "Hello, this is an automated test!", "delay": 30}'

  # Press Ctrl+S to save
  curl -X POST http://localhost:9990/computer-use \
    -H "Content-Type: application/json" \
    -d '{"action": "press_keys", "key": "s", "modifiers": ["control"]}'
  ```
</CodeGroup>

### Python Examples

<CodeGroup>
  ```python Basic Automation theme={null}
  import requests
  import json
  import base64
  import time
  from io import BytesIO
  from PIL import Image

  def control_computer(action, **params):
  url = "http://localhost:9990/computer-use"
  data = {"action": action, **params}
  response = requests.post(url, json=data)
  return response.json()

  # Open a web browser by clicking an icon

  control_computer("move_mouse", coordinates={"x": 100, "y": 960})
  control_computer("click_mouse", button="left")

  # Wait for the browser to open

  control_computer("wait", duration=2000)

  # Type a URL

  control_computer("type_text", text="https://example.com")
  control_computer("press_keys", key="enter")

  ```

  ```python Screenshot and Analysis theme={null}
  import requests
  import json
  import base64
  import cv2
  import numpy as np
  from PIL import Image
  from io import BytesIO

  def take_screenshot():
      url = "http://localhost:9990/computer-use"
      data = {"action": "screenshot"}
      response = requests.post(url, json=data)

      if response.json()["success"]:
          img_data = base64.b64decode(response.json()["data"]["image"])
          image = Image.open(BytesIO(img_data))
          return np.array(image)
      return None

  # Take a screenshot
  img = take_screenshot()

  # Convert to grayscale for analysis
  if img is not None:
      gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

      # Save the screenshot
      cv2.imwrite("screenshot.png", img)

      # Perform image analysis (example: find edges)
      edges = cv2.Canny(gray, 100, 200)
      cv2.imwrite("edges.png", edges)
  ```

  ```python Web Form Automation theme={null}
  import requests
  import time

  def control_computer(action, **params):
      url = "http://localhost:9990/computer-use"
      data = {"action": action, **params}
      response = requests.post(url, json=data)
      return response.json()

  def fill_web_form(form_fields):
      # Click on the first form field
      control_computer("move_mouse", coordinates=form_fields[0])
      control_computer("click_mouse", button="left")

      # Fill out each field
      for i, field in enumerate(form_fields):
          # Input the field value
          control_computer("type_text", text=field["value"])

          # If not the last field, press Tab to move to next field
          if i < len(form_fields) - 1:
              control_computer("press_keys", key="tab")
              time.sleep(0.5)

      # Submit the form by pressing Enter
      control_computer("press_keys", key="enter")

  # Example form fields with coordinates and values
  form_fields = [
      {"x": 500, "y": 300, "value": "John Doe"},
      {"x": 500, "y": 350, "value": "john@example.com"},
      {"x": 500, "y": 400, "value": "Password123"}
  ]

  fill_web_form(form_fields)
  ```
</CodeGroup>

### JavaScript/Node.js Examples

<CodeGroup>
  ```javascript Basic Automation theme={null}
  const axios = require('axios');

  async function controlComputer(action, params = {}) {
  const url = "http://localhost:9990/computer-use";
  const data = { action, ...params };

  try {
  const response = await axios.post(url, data);
  return response.data;
  } catch (error) {
  console.error('Error:', error.message);
  return { success: false, error: error.message };
  }
  }

  // Example: Automate opening an application and typing
  async function automateTextEditor() {
  try {
  // Open text editor by clicking its icon
  await controlComputer("move_mouse", { coordinates: { x: 150, y: 960 } });
  await controlComputer("click_mouse", { button: "left" });

      // Wait for it to open
      await controlComputer("wait", { duration: 2000 });

      // Type some text
      await controlComputer("type_text", {
        text: "This is an automated test using Node.js and Bytebot",
        delay: 30
      });

      console.log("Automation completed successfully");

  } catch (error) {
  console.error("Automation failed:", error);
  }
  }

  automateTextEditor();

  ```

  ```javascript Advanced: Screenshot Comparison theme={null}
  const axios = require('axios');
  const fs = require('fs');
  const { createCanvas, loadImage } = require('canvas');
  const pixelmatch = require('pixelmatch');

  async function controlComputer(action, params = {}) {
    const url = "http://localhost:9990/computer-use";
    const data = { action, ...params };

    try {
      const response = await axios.post(url, data);
      return response.data;
    } catch (error) {
      console.error('Error:', error.message);
      return { success: false, error: error.message };
    }
  }

  async function compareScreenshots() {
    try {
      // Take first screenshot
      const screenshot1 = await controlComputer("screenshot");

      // Do some actions
      await controlComputer("move_mouse", { coordinates: { x: 500, y: 500 } });
      await controlComputer("click_mouse", { button: "left" });
      await controlComputer("wait", { duration: 1000 });

      // Take second screenshot
      const screenshot2 = await controlComputer("screenshot");

      // Compare screenshots
      if (screenshot1.success && screenshot2.success) {
        const img1Data = Buffer.from(screenshot1.data.image, 'base64');
        const img2Data = Buffer.from(screenshot2.data.image, 'base64');

        fs.writeFileSync('screenshot1.png', img1Data);
        fs.writeFileSync('screenshot2.png', img2Data);

        // Now you could load and compare these images
        // This requires additional image comparison libraries
        console.log('Screenshots saved for comparison');
      }
    } catch (error) {
      console.error("Screenshot comparison failed:", error);
    }
  }

  compareScreenshots();
  ```
</CodeGroup>

## File Operations

### Writing Files

These examples show how to write files to the desktop environment:

<CodeGroup>
  ```python Python theme={null}
  import requests
  import base64

  def write_file(path, content):
      url = "http://localhost:9990/computer-use"
      
      # Encode content to base64
      encoded_content = base64.b64encode(content.encode('utf-8')).decode('utf-8')
      
      data = {
          "action": "write_file",
          "path": path,
          "data": encoded_content
      }
      
      response = requests.post(url, json=data)
      return response.json()

  # Write a text file
  result = write_file("/home/user/hello.txt", "Hello, Bytebot!")
  print(result)  # {'success': True, 'message': 'File written successfully...'}

  # Write to desktop (relative path)
  result = write_file("report.txt", "Daily report content")
  print(result)  # File will be written to /home/user/Desktop/report.txt
  ```

  ```javascript JavaScript theme={null}
  const axios = require('axios');

  async function writeFile(path, content) {
    const url = "http://localhost:9990/computer-use";
    
    // Encode content to base64
    const encodedContent = Buffer.from(content, 'utf-8').toString('base64');
    
    const data = {
      action: "write_file",
      path: path,
      data: encodedContent
    };
    
    const response = await axios.post(url, data);
    return response.data;
  }

  // Write a text file
  writeFile("/home/user/notes.txt", "Meeting notes...")
    .then(result => console.log(result))
    .catch(error => console.error(error));

  // Write HTML file to desktop
  const htmlContent = '<html><body><h1>Hello</h1></body></html>';
  writeFile("index.html", htmlContent)
    .then(result => console.log("HTML file created"));
  ```
</CodeGroup>

### Reading Files

These examples show how to read files from the desktop environment:

<CodeGroup>
  ```python Python theme={null}
  import requests
  import base64

  def read_file(path):
      url = "http://localhost:9990/computer-use"
      
      data = {
          "action": "read_file",
          "path": path
      }
      
      response = requests.post(url, json=data)
      result = response.json()
      
      if result['success']:
          # Decode the base64 content
          content = base64.b64decode(result['data']).decode('utf-8')
          return {
              'content': content,
              'name': result['name'],
              'size': result['size'],
              'mediaType': result['mediaType']
          }
      else:
          return result

  # Read a text file
  file_data = read_file("/home/user/hello.txt")
  print(f"Content: {file_data['content']}")
  print(f"Size: {file_data['size']} bytes")
  print(f"Type: {file_data['mediaType']}")
  ```

  ```javascript JavaScript theme={null}
  const axios = require('axios');

  async function readFile(path) {
    const url = "http://localhost:9990/computer-use";
    
    const data = {
      action: "read_file",
      path: path
    };
    
    const response = await axios.post(url, data);
    const result = response.data;
    
    if (result.success) {
      // Decode the base64 content
      const content = Buffer.from(result.data, 'base64').toString('utf-8');
      return {
        content: content,
        name: result.name,
        size: result.size,
        mediaType: result.mediaType
      };
    } else {
      throw new Error(result.message);
    }
  }

  // Read a file from desktop
  readFile("report.txt")
    .then(fileData => {
      console.log(`Content: ${fileData.content}`);
      console.log(`Size: ${fileData.size} bytes`);
      console.log(`Type: ${fileData.mediaType}`);
    })
    .catch(error => console.error("Error reading file:", error));
  ```
</CodeGroup>

## Automation Recipes

### Browser Automation

This example demonstrates how to automate browser interactions:

```python theme={null}
import requests
import time

def control_computer(action, **params):
    url = "http://localhost:9990/computer-use"
    data = {"action": action, **params}
    response = requests.post(url, json=data)
    return response.json()

def automate_browser():
    # Open browser (assuming browser icon is at position x=100, y=960)
    control_computer("move_mouse", coordinates={"x": 100, "y": 960})
    control_computer("click_mouse", button="left")
    time.sleep(3)  # Wait for browser to open

    # Type URL
    control_computer("type_text", text="https://example.com")
    control_computer("press_keys", key="enter")
    time.sleep(2)  # Wait for page to load

    # Take screenshot of the loaded page
    screenshot = control_computer("screenshot")

    # Click on a link (coordinates would need to be adjusted for your target)
    control_computer("move_mouse", coordinates={"x": 300, "y": 400})
    control_computer("click_mouse", button="left")
    time.sleep(2)

    # Scroll down
    control_computer("scroll", direction="down", scrollCount=5)

automate_browser()
```

### Form Filling Automation

This example shows how to automate filling out a form in a web application:

```javascript theme={null}
const axios = require("axios");

async function controlComputer(action, params = {}) {
  const url = "http://localhost:9990/computer-use";
  const data = { action, ...params };
  const response = await axios.post(url, data);
  return response.data;
}

async function fillForm() {
  // Click first input field
  await controlComputer("move_mouse", { coordinates: { x: 400, y: 300 } });
  await controlComputer("click_mouse", { button: "left" });

  // Type name
  await controlComputer("type_text", { text: "John Doe" });

  // Tab to next field
  await controlComputer("press_keys", { key: "tab" });

  // Type email
  await controlComputer("type_text", { text: "john@example.com" });

  // Tab to next field
  await controlComputer("press_keys", { key: "tab" });

  // Type message
  await controlComputer("type_text", {
    text: "This is an automated message sent using Bytebot's Computer Use API",
    delay: 30,
  });

  // Tab to submit button
  await controlComputer("press_keys", { key: "tab" });

  // Press Enter to submit
  await controlComputer("press_keys", { key: "enter" });
}

fillForm().catch(console.error);
```

## Integration with Testing Frameworks

The Computer Use API can be integrated with popular testing frameworks:

### Selenium Alternative

Bytebot can serve as an alternative to Selenium for web testing:

```python theme={null}
import requests
import time
import json

class BytebotWebDriver:
    def __init__(self, base_url="http://localhost:9990"):
        self.base_url = base_url

    def control_computer(self, action, **params):
        url = f"{self.base_url}/computer-use"
        data = {"action": action, **params}
        response = requests.post(url, json=data)
        return response.json()

    def open_browser(self, browser_icon_coords):
        self.control_computer("move_mouse", coordinates=browser_icon_coords)
        self.control_computer("click_mouse", button="left")
        time.sleep(3)  # Wait for browser to open

    def navigate_to(self, url):
        self.control_computer("type_text", text=url)
        self.control_computer("press_keys", key="enter")
        time.sleep(2)  # Wait for page to load

    def click_element(self, coords):
        self.control_computer("move_mouse", coordinates=coords)
        self.control_computer("click_mouse", button="left")

    def type_text(self, text):
        self.control_computer("type_text", text=text)

    def press_keys(self, key, modifiers=None):
        params = {"key": key}
        if modifiers:
            params["modifiers"] = modifiers
        self.control_computer("press_keys", **params)

    def take_screenshot(self):
        return self.control_computer("screenshot")

# Usage example
driver = BytebotWebDriver()
driver.open_browser({"x": 100, "y": 960})
driver.navigate_to("https://example.com")
driver.click_element({"x": 300, "y": 400})
driver.type_text("Hello Bytebot!")
```
