> ## Documentation Index
> Fetch the complete documentation index at: https://docs.kadoa.com/llms.txt
> Use this file to discover all available pages before exploring further.

# Start crawl

> Create a crawling configuration and start a session in one operation (equivalent to v4/crawl)



## OpenAPI

````yaml post /v4/crawl/
openapi: 3.0.3
info:
  title: Kadoa API
  version: 3.0.0
  contact:
    name: Support
    email: support@kadoa.com
servers:
  - url: https://api.kadoa.com
security: []
paths:
  /v4/crawl/:
    post:
      tags:
        - Crawler
      summary: Start crawl
      description: >-
        Create a crawling configuration and start a session in one operation
        (equivalent to v4/crawl)
      parameters: []
      requestBody:
        description: Body
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/StartCrawlerSessionRequest'
      responses:
        '200':
          description: '200'
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/StartCrawlerSessionResponse'
        '400':
          description: '400'
          content:
            application/json:
              schema:
                type: object
                properties:
                  error:
                    type: boolean
                    description: Indicates an error occurred
                  message:
                    type: string
                    description: Error message
                  details:
                    nullable: true
                    description: Additional error details (e.g., validation errors)
                required:
                  - error
                  - message
        '401':
          description: '401'
          content:
            application/json:
              schema:
                type: object
                properties:
                  error:
                    type: boolean
                    description: Indicates an error occurred
                  message:
                    type: string
                    description: Error message
                  details:
                    nullable: true
                    description: Additional error details (e.g., validation errors)
                required:
                  - error
                  - message
        '404':
          description: '404'
          content:
            application/json:
              schema:
                type: object
                properties:
                  error:
                    type: boolean
                    description: Indicates an error occurred
                  message:
                    type: string
                    description: Error message
                  details:
                    nullable: true
                    description: Additional error details (e.g., validation errors)
                required:
                  - error
                  - message
        '500':
          description: '500'
          content:
            application/json:
              schema:
                type: object
                properties:
                  error:
                    type: boolean
                    description: Indicates an error occurred
                  message:
                    type: string
                    description: Error message
                  details:
                    nullable: true
                    description: Additional error details (e.g., validation errors)
                required:
                  - error
                  - message
      security:
        - ApiKeyAuth: []
components:
  schemas:
    StartCrawlerSessionRequest:
      type: object
      properties:
        url:
          type: string
          format: uri
          description: Single URL to start crawling (for backward compatibility)
        startUrls:
          type: array
          items:
            type: string
            format: uri
            minLength: 1
          description: List of URLs for crawling
        pathsFilterIn:
          type: array
          items:
            type: string
            minLength: 1
          description: >-
            Regex patterns to include specific full URLs. Accepts array or
            JSON-stringified array of regex source strings.
        pathsFilterOut:
          type: array
          items:
            type: string
            minLength: 1
          description: >-
            Regex patterns to exclude specific full URLs. Accepts array or
            JSON-stringified array of regex source strings.
        proxyType:
          type: string
          nullable: true
          description: Type of proxy to use
        proxyCountry:
          type: string
          nullable: true
          description: Country for proxy selection
        timeout:
          type: number
          minimum: 1
          description: Timeout in milliseconds
        maxDepth:
          type: number
          minimum: 1
          description: Maximum crawling depth
        maxPages:
          type: number
          minimum: 1
          description: Maximum number of pages to crawl
        maxMatches:
          type: number
          minimum: 1
          description: Maximum number of matched pages to crawl before stopping
        concurrency:
          type: number
          minimum: 1
          description: Number of concurrent crawlers
        strictDomain:
          default: true
          type: boolean
          description: Whether to stay within the same domain
        loadImages:
          default: true
          type: boolean
          description: Whether to load images during crawling
        safeMode:
          default: false
          type: boolean
          description: Enable safe mode for crawling
        callbackUrl:
          type: string
          format: uri
          nullable: true
          description: Webhook URL for completion notifications
        processDuringCrawl:
          type: boolean
          description: Whether to run preprocessing and extraction during the crawl phase
        crawlMethod:
          type: object
          properties:
            mode:
              default: auto
              type: string
              enum:
                - auto
                - browser
              description: >-
                Worker selection mode. 'auto' (default) picks cheapest
                compatible worker, 'browser' forces browser.
            worker:
              type: string
              enum:
                - auto
                - curl-cffi
                - patchright
                - camoufox
                - cloakbrowser
                - brightdata
              description: >-
                Explicit Scrape V2 worker override. BrightData is blocked in
                crawler runtime.
            proxy:
              type: string
              enum:
                - auto
                - dc
                - resi
                - isp
              description: Explicit proxy routing override for Scrape V2
          additionalProperties:
            nullable: true
          description: Crawl method configuration
        matchThreshold:
          type: number
          minimum: 0
          maximum: 1
          description: Match threshold override for blueprint filtering
        blueprint:
          type: array
          items:
            type: object
            properties:
              name:
                type: string
              description:
                type: string
              selector:
                type: string
              type:
                type: string
            additionalProperties:
              nullable: true
            description: Blueprint field definition
          description: Blueprint fields applied during crawling
        extractionOptions:
          type: object
          properties:
            extractions:
              type: object
              additionalProperties:
                type: boolean
              description: Feature toggles for available extraction helpers
            schema:
              nullable: true
              description: Schema definition for structured extractions
            entity:
              nullable: true
              description: Entity metadata used for extraction mappings
            mainContextSelector:
              type: string
              description: CSS selector anchoring focus-driven extractors
            xhrExtractorConfigs:
              type: array
              items:
                nullable: true
              description: XHR extractor configuration entries
          additionalProperties: false
          description: Extraction-related options derived from legacy launch summary
        navigationOptions:
          type: object
          properties:
            browserActions:
              type: array
              items:
                type: object
                additionalProperties:
                  nullable: true
              description: >-
                Ordered list of scripted browser actions executed after
                navigation
            preBrowserActions:
              type: array
              items:
                type: object
                additionalProperties:
                  nullable: true
              description: Browser actions performed before the main action chain
            scrollHtml:
              type: boolean
              description: Enable HTML capture while scrolling through the page
            scrollHtmlTimeout:
              type: number
              minimum: 0
              description: Maximum scroll duration in milliseconds
            visualHtml:
              type: boolean
              description: Enable visual HTML capture for navigation heuristics
            navigationStrategy:
              type: string
              description: Primary navigation strategy identifier
            navigationStrategies:
              type: array
              items:
                type: string
              description: Fallback navigation strategy order
            limit:
              type: number
              minimum: 0
              description: Maximum number of navigation steps
            disableNavigation:
              type: boolean
              description: Skip automated navigation heuristics
            ignoreIframes:
              type: boolean
              description: Disable iframe traversal during navigation
            navigationExploration:
              type: object
              additionalProperties:
                nullable: true
              description: Navigation exploration tuning payload
            loadHtmlOnly:
              type: boolean
              description: Skip scripted interactions and only capture HTML
            acceptCookies:
              type: boolean
              description: Force cookie acceptance automation
            cachedCookieAccept:
              oneOf:
                - type: boolean
                - type: string
              description: Reuse cached cookie acceptance flow
          additionalProperties: false
          description: Navigation-related options derived from legacy launch summary
        artifactOptions:
          type: object
          properties:
            screenshot:
              type: boolean
              description: Capture standard screenshot
            screenshotFull:
              type: boolean
              description: Capture full-page screenshot
            screenshotCache:
              type: boolean
              description: Re-use cached screenshot when available
            screenshotPublic:
              type: boolean
              description: Make captured screenshot publicly accessible
            screenshotLink:
              type: string
              description: Stored screenshot link for follow-up processing
            archivePdf:
              type: boolean
              description: Generate archival PDF for crawled pages
          additionalProperties: false
          description: Artifact capture options derived from legacy launch summary
        rawMode:
          type: boolean
          description: Whether this is a raw data mode crawl
        outputOptions:
          type: object
          properties:
            includeHtml:
              type: boolean
            includeMarkdown:
              type: boolean
            includeScreenshots:
              type: boolean
            includeJson:
              type: boolean
          description: Output options for raw mode
        jobId:
          type: string
          description: 'Internal: Job ID for workflow tracking'
        dataKey:
          type: string
          description: 'Internal: Data key for Parquet storage path'
        billingSource:
          type: string
          description: 'Internal: Billing source identifier'
      title: StartCrawlerSessionRequest
      description: >-
        Schema for starting a crawling session with support for both single URL
        and multiple URLs
    StartCrawlerSessionResponse:
      type: object
      properties:
        message:
          type: string
          description: Response message
        sessionId:
          type: string
          description: Session ID
        configId:
          type: string
          description: Config ID (included when creating config and starting session)
        error:
          type: string
          description: Error message if any
          nullable: true
      required:
        - message
        - sessionId
        - error
      title: StartCrawlerSessionResponse
      description: Response schema for starting a crawling session
  securitySchemes:
    ApiKeyAuth:
      type: apiKey
      in: header
      name: x-api-key
      description: API key for authentication

````