> ## Documentation Index
> Fetch the complete documentation index at: https://docs.gbox.ai/llms.txt
> Use this file to discover all available pages before exploring further.

# Swipe

> Performs a swipe in the specified direction



## OpenAPI

````yaml post /boxes/{boxId}/actions/swipe
openapi: 3.0.0
info:
  title: GBOX Open API
  description: GBOX Open API Documentation
  version: '1.0'
  contact: {}
servers:
  - url: https://gbox.ai/api/v1
    description: Production Server
security: []
tags: []
paths:
  /boxes/{boxId}/actions/swipe:
    post:
      tags:
        - UI Action
      summary: Swipe
      description: Performs a swipe in the specified direction
      operationId: UIActionController_swipe
      parameters:
        - name: boxId
          required: true
          in: path
          description: Box ID
          schema:
            example: c9bdc193-b54b-4ddb-a035-5ac0c598d32d
            type: string
      requestBody:
        required: true
        content:
          application/json:
            schema:
              oneOf:
                - $ref: '#/components/schemas/SwipeSimple'
                - $ref: '#/components/schemas/SwipeAdvanced'
            examples:
              SwipeSimpleUp:
                summary: Swipe up
                value:
                  direction: up
              SwipeWithDistance:
                summary: Swipe with custom distance
                value:
                  direction: up
                  duration: 500ms
                  distance: 200
              SwipeWithEnumDistance:
                summary: Swipe with enum distance
                value:
                  direction: up
                  duration: 500ms
                  distance: medium
              SwipeWithCustomPath:
                summary: Swipe with custom path
                value:
                  start:
                    x: 100
                    'y': 150
                  end:
                    x: 400
                    'y': 300
      responses:
        '200':
          description: Swipe action result with actual parameters used
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/SwipeActionResult'
      security:
        - bearer: []
      x-codeSamples:
        - lang: JavaScript
          source: |-
            import GboxSDK from "gbox-sdk";

            const gboxSDK = new GboxSDK({
              apiKey: process.env["GBOX_API_KEY"] // This is the default and can be omitted
            });

            async function main() {
              const box = await gboxSDK.create({ type: "android" });

              // Swipe up
              await box.action.swipe({
                direction: "up",
                duration: "200ms"
              });

              // Swipe with enum distance
              await box.action.swipe({
                direction: "up",
                distance: "medium",
                duration: "500ms"
              });

              // Swipe with custom pixel distance
              await box.action.swipe({
                direction: "up",
                distance: 300,
                duration: "500ms"
              });

              // Swipe with natural language location
              await box.action.swipe({
                direction: "up",
                distance: 300,
                duration: "500ms",
                location: "screen bottom"
              });

              // Swipe with custom path
              await box.action.swipe({
                start: { x: 100, y: 150 },
                end: { x: 400, y: 300 },
                duration: "200ms"
              });

              // Swipe with natural language location
              await box.action.swipe({
                start: "screen bottom",
                end: "screen top",
                duration: "200ms"
              });
            }

            main();
        - lang: Python
          source: |-
            import os
            from gbox_sdk import GboxSDK


            def main():
                gbox_sdk = GboxSDK(api_key=os.environ["GBOX_API_KEY"])  # This is the default and can be omitted

                # Create Android box
                box = gbox_sdk.create(type="android")

                # Swipe up
                box.action.swipe(direction="up", duration="200ms")

                # Swipe with enum distance
                box.action.swipe(direction="up", distance="medium", duration="500ms")

                # Swipe with custom pixel distance
                box.action.swipe(direction="up", distance=300, duration="500ms")

                # Swipe with natural language location
                box.action.swipe(direction="up", distance=300, duration="500ms", location="screen bottom")

                # Swipe with custom path
                box.action.swipe(
                    start={"x": 100, "y": 150},
                    end={"x": 400, "y": 300},
                    duration="200ms"
                )

                # Swipe with natural language start/end
                box.action.swipe(
                    start="screen bottom",
                    end="screen top",
                    duration="200ms"
                )


            if __name__ == "__main__":
                main()
        - lang: Go
          source: "package main\n\nimport (\n\t\"context\"\n\t\"fmt\"\n\t\"log\"\n\t\"os\"\n\n\t\"github.com/gbox/gbox-sdk-go\"\n)\n\nfunc main() {\n\tgboxSDK := gbox.NewGboxSDK(os.Getenv(\"GBOX_API_KEY\"))\n\n\tbox, err := gboxSDK.Create(context.Background(), gbox.CreateBoxRequest{\n\t\tType: \"android\",\n\t})\n\tif err != nil {\n\t\tlog.Fatalf(\"Failed to create box: %v\", err)\n\t}\n\n\t// Swipe up\n\terr = box.Action.Swipe(context.Background(), gbox.SwipeRequest{\n\t\tDirection: \"up\",\n\t\tDuration:  \"200ms\",\n\t})\n\tif err != nil {\n\t\tlog.Fatalf(\"Failed to perform swipe action: %v\", err)\n\t}\n\n\t// Swipe with enum distance\n\terr = box.Action.Swipe(context.Background(), gbox.SwipeRequest{\n\t\tDirection: \"up\",\n\t\tDistance:  \"medium\",\n\t\tDuration:  \"500ms\",\n\t})\n\tif err != nil {\n\t\tlog.Fatalf(\"Failed to perform swipe action: %v\", err)\n\t}\n\n\t// Swipe with custom pixel distance\n\terr = box.Action.Swipe(context.Background(), gbox.SwipeRequest{\n\t\tDirection: \"up\",\n\t\tDistance:  300,\n\t\tDuration:  \"500ms\",\n\t})\n\tif err != nil {\n\t\tlog.Fatalf(\"Failed to perform swipe action: %v\", err)\n\t}\n\n\t// Swipe with natural language location\n\terr = box.Action.Swipe(context.Background(), gbox.SwipeRequest{\n\t\tDirection: \"up\",\n\t\tDistance:  300,\n\t\tDuration:  \"500ms\",\n\t\tLocation:  \"screen bottom\",\n\t})\n\tif err != nil {\n\t\tlog.Fatalf(\"Failed to perform swipe action: %v\", err)\n\t}\n\n\t// Swipe with custom path\n\terr = box.Action.Swipe(context.Background(), gbox.SwipeRequest{\n\t\tStart:    gbox.Point{X: 100, Y: 150},\n\t\tEnd:      gbox.Point{X: 400, Y: 300},\n\t\tDuration: \"200ms\",\n\t})\n\tif err != nil {\n\t\tlog.Fatalf(\"Failed to perform swipe action: %v\", err)\n\t}\n}"
components:
  schemas:
    SwipeSimple:
      type: object
      properties:
        direction:
          type: string
          description: >-
            Direction to swipe. The gesture will be performed from the center of
            the screen towards this direction.
          enum:
            - up
            - down
            - left
            - right
            - upLeft
            - upRight
            - downLeft
            - downRight
          example: up
        duration:
          type: string
          description: >-
            Duration of the swipe


            Supported time units: ms (milliseconds), s (seconds), m (minutes), h
            (hours)

            Example formats: "500ms", "30s", "5m", "1h"

            Default: 500ms
          example: 500ms
          default: 500ms
          title: SwipeDuration
        distance:
          description: >-
            Distance of the swipe. Can be either a number (in pixels) or a
            predefined enum value (tiny, short, medium, long). If not provided,
            the swipe will be performed from the center of the screen to the
            screen edge
          oneOf:
            - type: number
              example: 300
            - type: string
              enum:
                - tiny
                - short
                - medium
                - long
              example: medium
        location:
          type: string
          description: >-
            Natural language description of the location where the swipe should
            originate. If not provided, the swipe will be performed from the
            center of the screen.
          example: Chrome App
        options:
          description: >-
            Action options. When `options.screenshot` is provided, ALL
            deprecated screenshot fields (outputFormat, presignedExpiresIn,
            screenshotDelay, screenshotRange, includeScreenshot) will be
            completely ignored.
          example:
            screenshot:
              outputFormat: base64
              presignedExpiresIn: 30m
              delay: 500ms
              phases:
                - before
                - after
          allOf:
            - $ref: '#/components/schemas/ActionCommonOptions'
      title: Swipe Simple
      description: >-
        Simple swipe action configuration. The gesture will be performed from
        the center of the screen towards the specified direction.
      required:
        - direction
    SwipeAdvanced:
      type: object
      properties:
        start:
          description: Start point of the swipe path (coordinates or natural language)
          oneOf:
            - $ref: '#/components/schemas/SwipePath'
              example:
                x: 100
                'y': 150
            - type: string
              example: Top-left corner of the screen
        end:
          description: End point of the swipe path (coordinates or natural language)
          oneOf:
            - $ref: '#/components/schemas/SwipePath'
              example:
                x: 400
                'y': 300
            - type: string
              example: Bottom-right corner of the screen
        duration:
          type: string
          description: >-
            Duration of the swipe


            Supported time units: ms (milliseconds), s (seconds), m (minutes), h
            (hours)

            Example formats: "500ms", "30s", "5m", "1h"

            Default: 500ms
          example: 500ms
          default: 500ms
          title: SwipeDuration
        options:
          description: >-
            Action options. When `options.screenshot` is provided, ALL
            deprecated screenshot fields (outputFormat, presignedExpiresIn,
            screenshotDelay, screenshotRange, includeScreenshot) will be
            completely ignored.
          example:
            screenshot:
              outputFormat: base64
              presignedExpiresIn: 30m
              delay: 500ms
              phases:
                - before
                - after
          allOf:
            - $ref: '#/components/schemas/ActionCommonOptions'
      title: Swipe Advanced
      description: >-
        Swipe action configuration. The gesture will start from the specified
        start point and move towards the end point.
      required:
        - start
        - end
    SwipeActionResult:
      type: object
      properties:
        message:
          type: string
          description: message
          example: Action executed successfully
        actionId:
          type: string
          description: >-
            Unique identifier for each action. Use this ID to locate the action
            and report issues.
          example: c9bdc193-b54b-4ddb-a035-5ac0c598d32d
        screenshot:
          description: >-
            Optional screenshot data. Only present when screenshots are
            requested via options.screenshot.phases or deprecated fields
          example:
            trace:
              uri: data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA...
            before:
              uri: data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA...
            after:
              uri: data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA...
          allOf:
            - $ref: '#/components/schemas/ActionResultScreenshot'
        actual:
          description: Actual parameters used when executing the swipe action
          example:
            start:
              x: 540
              'y': 1000
            end:
              x: 540
              'y': 500
            duration: 500ms
          allOf:
            - $ref: '#/components/schemas/SwipeActionActual'
      title: Swipe Action Result
      description: Result of swipe action execution with actual parameters used
      required:
        - message
        - actionId
        - actual
    ActionCommonOptions:
      type: object
      properties:
        screenshot:
          description: >-
            Screenshot options. Can be a boolean to enable/disable screenshots,
            or an object to configure screenshot options.
          oneOf:
            - $ref: '#/components/schemas/ActionScreenshotOptions'
              example:
                outputFormat: base64
                presignedExpiresIn: 30m
                delay: 500ms
                phases:
                  - before
                  - after
            - type: boolean
              example: true
        model:
          type: string
          description: >-
            Model to use for natural-language target resolution. Defaults to
            'uitars'.
          enum:
            - gpt-5
            - gpt-4o
            - gelato
            - ui-tars
            - openai-computer-use
          default: gelato
      title: Action Common Options
      description: Action common options
    SwipePath:
      type: object
      properties:
        x:
          type: number
          description: Start/end x coordinate of the swipe path
          example: 100
        'y':
          type: number
          description: Start/end y coordinate of the swipe path
          example: 150
      title: Swipe Path
      description: Swipe path
      required:
        - x
        - 'y'
    ActionResultScreenshot:
      type: object
      properties:
        trace:
          description: URI of the screenshot before the action with operation trace
          example:
            uri: data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA...
          allOf:
            - $ref: '#/components/schemas/ActionResultOperationTrace'
        before:
          description: URI of the screenshot before the action
          example:
            uri: data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA...
          allOf:
            - $ref: '#/components/schemas/ActionResultScreenshotBefore'
        after:
          description: URI of the screenshot after the action
          example:
            uri: data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA...
          allOf:
            - $ref: '#/components/schemas/ActionResultScreenshotAfter'
      title: Action Result Screenshot
      description: Complete screenshot result with operation trace, before and after images
    SwipeActionActual:
      type: object
      properties:
        start:
          description: Start point of the swipe
          example:
            x: 540
            'y': 1000
          allOf:
            - $ref: '#/components/schemas/SwipePath'
        end:
          description: End point of the swipe
          example:
            x: 540
            'y': 500
          allOf:
            - $ref: '#/components/schemas/SwipePath'
        duration:
          type: string
          description: >-
            Duration of the swipe


            Supported time units: ms (milliseconds), s (seconds), m (minutes), h
            (hours)

            Example formats: "500ms", "30s", "5m", "1h"

            Default: 500ms
          example: 500ms
          default: 500ms
          title: SwipeDuration
      title: Swipe Action Actual Parameters
      description: Actual parameters used when executing the swipe action
      required:
        - start
        - end
        - duration
    ActionScreenshotOptions:
      type: object
      properties:
        outputFormat:
          type: string
          enum:
            - base64
            - storageKey
          description: Type of the URI. default is base64.
          default: base64
          example: base64
        presignedExpiresIn:
          type: string
          description: >-
            Presigned url expires in. Only takes effect when outputFormat is
            storageKey.


            Supported time units: ms (milliseconds), s (seconds), m (minutes), h
            (hours)

            Example formats: "500ms", "30s", "5m", "1h"

            Default: 30m
          example: 30m
          default: 30m
          title: PresignedExpiresIn
        delay:
          type: string
          description: >-
            Delay after performing the action, before taking the final
            screenshot.


            Execution flow:

            1. Take screenshot before action

            2. Perform the action

            3. Wait for screenshotDelay (this parameter)

            4. Take screenshot after action


            Example: '500ms' means wait 500ms after the action before capturing
            the final screenshot.


            Supported time units: ms (milliseconds), s (seconds), m (minutes), h
            (hours)

            Example formats: "500ms", "30s", "5m", "1h"

            Default: 500ms

            Maximum allowed: 30s
          example: 500ms
          default: 500ms
          title: Delay
        phases:
          type: array
          description: >-
            Specify which screenshot phases to capture.


            Available options:

            - before: Screenshot before the action

            - after: Screenshot after the action

            - trace: Screenshot with operation trace


            Default captures all three phases. Can specify one or multiple in an
            array.

            If empty array is provided, no screenshots will be taken.
          default:
            - before
            - after
            - trace
          example:
            - before
            - after
          items:
            type: string
            enum:
              - before
              - after
              - trace
      title: Action Screenshot Options
      description: Action screenshot options
    ActionResultOperationTrace:
      type: object
      properties:
        uri:
          type: string
          description: URI of the screenshot with operation trace
          example: data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA...
      title: Action Result Screenshot Operation Trace
      description: Screenshot with action operation trace
      required:
        - uri
    ActionResultScreenshotBefore:
      type: object
      properties:
        uri:
          type: string
          description: URI of the screenshot before the action
          example: data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA...
        presignedUrl:
          type: string
          description: Presigned url of the screenshot before the action
          example: https://example.com/xxxxx/xxxxx/xxxxx
      title: Action Result Screenshot Before
      description: Screenshot taken before action execution
      required:
        - uri
    ActionResultScreenshotAfter:
      type: object
      properties:
        uri:
          type: string
          description: URI of the screenshot after the action
          example: data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA...
        presignedUrl:
          type: string
          description: Presigned url of the screenshot before the action
          example: https://example.com/xxxxx/xxxxx/xxxxx
      title: Action Result Screenshot After
      description: Screenshot taken after action execution
      required:
        - uri
  securitySchemes:
    bearer:
      scheme: bearer
      bearerFormat: JWT
      type: http
      description: >-
        Enter your API Key in the format: Bearer <token>. Get it from
        https://gbox.ai

````