> ## Documentation Index
> Fetch the complete documentation index at: https://docs.gbox.ai/llms.txt
> Use this file to discover all available pages before exploring further.

# Scroll

> Performs a scroll action. Supports both advanced scroll with coordinates and simple scroll with direction.



## OpenAPI

````yaml post /boxes/{boxId}/actions/scroll
openapi: 3.0.0
info:
  title: GBOX Open API
  description: GBOX Open API Documentation
  version: '1.0'
  contact: {}
servers:
  - url: https://gbox.ai/api/v1
    description: Production Server
security: []
tags: []
paths:
  /boxes/{boxId}/actions/scroll:
    post:
      tags:
        - UI Action
      summary: Scroll
      description: >-
        Performs a scroll action. Supports both advanced scroll with coordinates
        and simple scroll with direction.
      operationId: UIActionController_scroll
      parameters:
        - name: boxId
          required: true
          in: path
          description: Box ID
          schema:
            example: c9bdc193-b54b-4ddb-a035-5ac0c598d32d
            type: string
      requestBody:
        required: true
        content:
          application/json:
            schema:
              oneOf:
                - $ref: '#/components/schemas/ScrollAdvanced'
                - $ref: '#/components/schemas/ScrollSimple'
            examples:
              ScrollAdvanced:
                summary: Advanced scroll
                value:
                  x: 100
                  'y': 100
                  scrollX: 0
                  scrollY: 100
              ScrollSimpleUp:
                summary: Simple scroll up
                value:
                  direction: up
              ScrollWithDistance:
                summary: Scroll with custom distance
                value:
                  direction: up
                  duration: 500ms
                  distance: 200
              ScrollWithEnumDistance:
                summary: Scroll with enum distance
                value:
                  direction: up
                  duration: 500ms
                  distance: medium
      responses:
        '200':
          description: Scroll action result with actual parameters used
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ScrollActionResult'
      security:
        - bearer: []
      x-codeSamples:
        - lang: JavaScript
          source: |-
            import GboxSDK from "gbox-sdk";

            const gboxSDK = new GboxSDK({
              apiKey: process.env["GBOX_API_KEY"] // This is the default and can be omitted
            });

            async function main() {
              const box = await gboxSDK.create({ type: "android" });

              // Scroll action (advanced) - with coordinates
              await box.action.scroll({
                x: 400,
                y: 300,
                scrollX: 0,
                scrollY: -100
              });

              // Scroll action (simple) - with direction
              await box.action.scroll({
                direction: "up",
                duration: "500ms",
                distance: "medium"
              });
            }

            main();
        - lang: Python
          source: |-
            import os
            from gbox_sdk import GboxSDK


            def main():
                gbox_sdk = GboxSDK(api_key=os.environ["GBOX_API_KEY"])

                # Create Android box
                box = gbox_sdk.create(type="android")

                # Scroll action
                box.action.scroll(x=400, y=300, scroll_x=0, scroll_y=-100)

                # Scroll action (simple) - with direction
                box.action.scroll(
                    direction="up",
                    duration="500ms",
                    distance="medium"
                )


            if __name__ == "__main__":
                main()
        - lang: Go
          source: "package main\n\nimport (\n\t\"context\"\n\t\"fmt\"\n\t\"log\"\n\t\"os\"\n\n\t\"github.com/gbox/gbox-sdk-go\"\n)\n\nfunc main() {\n\tgboxSDK := gbox.NewGboxSDK(os.Getenv(\"GBOX_API_KEY\"))\n\n\tbox, err := gboxSDK.Create(context.Background(), gbox.CreateBoxRequest{\n\t\tType: \"android\",\n\t})\n\tif err != nil {\n\t\tlog.Fatalf(\"Failed to create box: %v\", err)\n\t}\n\n\t// Scroll action (advanced) - with coordinates\n\terr = box.Action.Scroll(context.Background(), gbox.ScrollRequest{\n\t\tX:       400,\n\t\tY:       300,\n\t\tScrollX: 0,\n\t\tScrollY: -100,\n\t})\n\tif err != nil {\n\t\tlog.Fatalf(\"Failed to perform scroll action: %v\", err)\n\t}\n\n\t// Scroll action (simple) - with direction\n\terr = box.Action.Scroll(context.Background(), gbox.ScrollRequest{\n\t\tDirection: \"up\",\n\t\tDuration:  \"500ms\",\n\t\tDistance:  \"medium\",\n\t})\n\tif err != nil {\n\t\tlog.Fatalf(\"Failed to perform scroll action: %v\", err)\n\t}\n}"
components:
  schemas:
    ScrollAdvanced:
      type: object
      properties:
        scrollX:
          type: number
          description: >-
            Horizontal scroll amount. Positive values scroll content rightward
            (reveals content on the right), negative values scroll content
            leftward (reveals content on the left).
          example: 0
        scrollY:
          type: number
          description: >-
            Vertical scroll amount. Positive values scroll content downward
            (reveals content below), negative values scroll content upward
            (reveals content above).
          example: -100
        x:
          type: number
          description: X coordinate of the scroll position
          example: 400
        'y':
          type: number
          description: Y coordinate of the scroll position
          example: 300
        options:
          description: >-
            Action options. When `options.screenshot` is provided, ALL
            deprecated screenshot fields (outputFormat, presignedExpiresIn,
            screenshotDelay, screenshotRange, includeScreenshot) will be
            completely ignored.
          example:
            screenshot:
              outputFormat: base64
              presignedExpiresIn: 30m
              delay: 500ms
              phases:
                - before
                - after
          allOf:
            - $ref: '#/components/schemas/ActionCommonOptions'
      title: Scroll Advanced
      description: >-
        Advanced scroll action configuration. The scroll will be performed from
        the specified coordinates with the given scroll amounts. Use positive
        scrollY to scroll content downward (reveal content below), negative
        scrollY to scroll content upward (reveal content above). Use positive
        scrollX to scroll content rightward (reveal content on the right),
        negative scrollX to scroll content leftward (reveal content on the
        left).
      required:
        - scrollX
        - scrollY
        - x
        - 'y'
    ScrollSimple:
      type: object
      properties:
        direction:
          type: string
          description: >-
            Direction to scroll. The scroll will be performed from the center of
            the screen towards this direction. 'up' scrolls content upward
            (reveals content below), 'down' scrolls content downward (reveals
            content above), 'left' scrolls content leftward (reveals content on
            the right), 'right' scrolls content rightward (reveals content on
            the left).
          enum:
            - up
            - down
            - left
            - right
          example: up
        duration:
          type: string
          description: >-
            Duration of the scroll


            Supported time units: ms (milliseconds), s (seconds), m (minutes), h
            (hours)

            Example formats: "500ms", "30s", "5m", "1h"

            Default: 500ms
          example: 500ms
          default: 500ms
          title: ScrollDuration
        distance:
          description: >-
            Distance of the scroll. Can be either a number (in pixels) or a
            predefined enum value (tiny, short, medium, long). If not provided,
            the scroll will be performed from the center of the screen to the
            screen edge
          oneOf:
            - type: number
              example: 300
            - type: string
              enum:
                - tiny
                - short
                - medium
                - long
              example: medium
        location:
          type: string
          description: >-
            Natural language description of the location where the scroll should
            originate. If not provided, the scroll will be performed from the
            center of the screen.
          example: Side bar
        options:
          description: >-
            Action options. When `options.screenshot` is provided, ALL
            deprecated screenshot fields (outputFormat, presignedExpiresIn,
            screenshotDelay, screenshotRange, includeScreenshot) will be
            completely ignored.
          example:
            screenshot:
              outputFormat: base64
              presignedExpiresIn: 30m
              delay: 500ms
              phases:
                - before
                - after
          allOf:
            - $ref: '#/components/schemas/ActionCommonOptions'
      title: Scroll Simple
      description: >-
        Simple scroll action configuration. The scroll will be performed from
        the center of the screen towards the specified direction.
      required:
        - direction
    ScrollActionResult:
      type: object
      properties:
        message:
          type: string
          description: message
          example: Action executed successfully
        actionId:
          type: string
          description: >-
            Unique identifier for each action. Use this ID to locate the action
            and report issues.
          example: c9bdc193-b54b-4ddb-a035-5ac0c598d32d
        screenshot:
          description: >-
            Optional screenshot data. Only present when screenshots are
            requested via options.screenshot.phases or deprecated fields
          example:
            trace:
              uri: data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA...
            before:
              uri: data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA...
            after:
              uri: data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA...
          allOf:
            - $ref: '#/components/schemas/ActionResultScreenshot'
        actual:
          description: Actual parameters used when executing the scroll action
          example:
            x: 400
            'y': 300
            scrollX: 0
            scrollY: -100
          allOf:
            - $ref: '#/components/schemas/ScrollActionActual'
      title: Scroll Action Result
      description: Result of scroll action execution with actual parameters used
      required:
        - message
        - actionId
        - actual
    ActionCommonOptions:
      type: object
      properties:
        screenshot:
          description: >-
            Screenshot options. Can be a boolean to enable/disable screenshots,
            or an object to configure screenshot options.
          oneOf:
            - $ref: '#/components/schemas/ActionScreenshotOptions'
              example:
                outputFormat: base64
                presignedExpiresIn: 30m
                delay: 500ms
                phases:
                  - before
                  - after
            - type: boolean
              example: true
        model:
          type: string
          description: >-
            Model to use for natural-language target resolution. Defaults to
            'uitars'.
          enum:
            - gpt-5
            - gpt-4o
            - gelato
            - ui-tars
            - openai-computer-use
          default: gelato
      title: Action Common Options
      description: Action common options
    ActionResultScreenshot:
      type: object
      properties:
        trace:
          description: URI of the screenshot before the action with operation trace
          example:
            uri: data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA...
          allOf:
            - $ref: '#/components/schemas/ActionResultOperationTrace'
        before:
          description: URI of the screenshot before the action
          example:
            uri: data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA...
          allOf:
            - $ref: '#/components/schemas/ActionResultScreenshotBefore'
        after:
          description: URI of the screenshot after the action
          example:
            uri: data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA...
          allOf:
            - $ref: '#/components/schemas/ActionResultScreenshotAfter'
      title: Action Result Screenshot
      description: Complete screenshot result with operation trace, before and after images
    ScrollActionActual:
      type: object
      properties:
        x:
          type: number
          description: X coordinate of the scroll position
          example: 400
        'y':
          type: number
          description: Y coordinate of the scroll position
          example: 300
        scrollX:
          type: number
          description: Horizontal scroll amount
          example: 0
        scrollY:
          type: number
          description: Vertical scroll amount
          example: -100
      title: Scroll Action Actual Parameters
      description: Actual parameters used when executing the scroll action
      required:
        - x
        - 'y'
        - scrollX
        - scrollY
    ActionScreenshotOptions:
      type: object
      properties:
        outputFormat:
          type: string
          enum:
            - base64
            - storageKey
          description: Type of the URI. default is base64.
          default: base64
          example: base64
        presignedExpiresIn:
          type: string
          description: >-
            Presigned url expires in. Only takes effect when outputFormat is
            storageKey.


            Supported time units: ms (milliseconds), s (seconds), m (minutes), h
            (hours)

            Example formats: "500ms", "30s", "5m", "1h"

            Default: 30m
          example: 30m
          default: 30m
          title: PresignedExpiresIn
        delay:
          type: string
          description: >-
            Delay after performing the action, before taking the final
            screenshot.


            Execution flow:

            1. Take screenshot before action

            2. Perform the action

            3. Wait for screenshotDelay (this parameter)

            4. Take screenshot after action


            Example: '500ms' means wait 500ms after the action before capturing
            the final screenshot.


            Supported time units: ms (milliseconds), s (seconds), m (minutes), h
            (hours)

            Example formats: "500ms", "30s", "5m", "1h"

            Default: 500ms

            Maximum allowed: 30s
          example: 500ms
          default: 500ms
          title: Delay
        phases:
          type: array
          description: >-
            Specify which screenshot phases to capture.


            Available options:

            - before: Screenshot before the action

            - after: Screenshot after the action

            - trace: Screenshot with operation trace


            Default captures all three phases. Can specify one or multiple in an
            array.

            If empty array is provided, no screenshots will be taken.
          default:
            - before
            - after
            - trace
          example:
            - before
            - after
          items:
            type: string
            enum:
              - before
              - after
              - trace
      title: Action Screenshot Options
      description: Action screenshot options
    ActionResultOperationTrace:
      type: object
      properties:
        uri:
          type: string
          description: URI of the screenshot with operation trace
          example: data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA...
      title: Action Result Screenshot Operation Trace
      description: Screenshot with action operation trace
      required:
        - uri
    ActionResultScreenshotBefore:
      type: object
      properties:
        uri:
          type: string
          description: URI of the screenshot before the action
          example: data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA...
        presignedUrl:
          type: string
          description: Presigned url of the screenshot before the action
          example: https://example.com/xxxxx/xxxxx/xxxxx
      title: Action Result Screenshot Before
      description: Screenshot taken before action execution
      required:
        - uri
    ActionResultScreenshotAfter:
      type: object
      properties:
        uri:
          type: string
          description: URI of the screenshot after the action
          example: data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA...
        presignedUrl:
          type: string
          description: Presigned url of the screenshot before the action
          example: https://example.com/xxxxx/xxxxx/xxxxx
      title: Action Result Screenshot After
      description: Screenshot taken after action execution
      required:
        - uri
  securitySchemes:
    bearer:
      scheme: bearer
      bearerFormat: JWT
      type: http
      description: >-
        Enter your API Key in the format: Bearer <token>. Get it from
        https://gbox.ai

````