POST
/
boxes
/
{boxId}
/
actions
/
elements
/
detect
JavaScript
import GboxSDK from "gbox-sdk";

const gboxSDK = new GboxSDK({
  apiKey: process.env["GBOX_API_KEY"] // This is the default and can be omitted
});

async function main() {
  const box = await gboxSDK.create({ type: "linux" });

  await box.browser.openTab({
    url: "https://gbox.ai",
  });

  const { screenshot, elements } = await box.action.elements.detect({
    screenshot: {
      outputFormat: 'storageKey'
    }
  });

  console.info(`Screenshot: ${JSON.stringify(screenshot, null, 2)}`);

  console.info(`Detected elements length: ${elements.list().length}`);

  // You can send the screenshot to an LLM or Agent to decide which element to click
  const firstElement = elements.get("1");
  await box.action.click({
    // here we just click the first element
    target: firstElement,
  });

  console.info(
    `Clicked element: ${JSON.stringify(firstElement, null, 2)}`
  );
}

main();
{
  "screenshot": {
    "source": {
      "uri": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA...",
      "presignedUrl": "https://example.com/xxxxx/xxxxx/xxxxx"
    },
    "marked": {
      "uri": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA...",
      "presignedUrl": "https://example.com/xxxxx/xxxxx/xxxxx"
    }
  },
  "elements": [
    {
      "id": "1",
      "source": "chromium",
      "type": "button",
      "path": "#root > table > tbody > tr:nth-child(1) > td:nth-child(1) > div > button",
      "width": 100,
      "height": 50,
      "x": 100,
      "y": 100,
      "centerX": 150,
      "centerY": 125,
      "label": "Click me"
    }
  ]
}

Authorizations

Authorization
string
header
required

Enter your API Key in the format: Bearer <token>. Get it from https://gbox.ai

Path Parameters

boxId
string
required

Box ID

Example:

"c9bdc193-b54b-4ddb-a035-5ac0c598d32d"

Body

application/json

Detect UI elements action configuration

screenshot
object

Screenshot options Detect elements screenshot options

Example:
{
"outputFormat": "base64",
"presignedExpiresIn": "30m"
}

Response

200 - application/json

Result containing original screenshot, annotated screenshot, and detected elements

screenshot
object
required

Detected elements screenshot

elements
Detected Element · object[]
required

Detected UI elements