openapi: 3.1.0
info:
  title: OpenFPGA Inference API
  description: |
    OpenAI-compatible inference API powered by FPGA hardware acceleration.
    Drop-in replacement for OpenAI's Chat Completions API — just change the base URL and API key.
    
    Base URL: https://api.openfpga.ai/v1
    
    ## Authentication
    All requests require a Bearer token in the Authorization header:
    ```
    Authorization: Bearer ofpga_sk_...
    ```
    
    ## OpenAI SDK Compatibility
    ```python
    from openai import OpenAI
    
    client = OpenAI(
        base_url="https://api.openfpga.ai/v1",
        api_key="ofpga_sk_..."
    )
    
    response = client.chat.completions.create(
        model="llama-3.1-8b-fpga",
        messages=[{"role": "user", "content": "Hello!"}]
    )
    ```
  version: 1.0.0-beta
  contact:
    name: OpenFPGA
    url: https://openfpga.ai
  license:
    name: Proprietary

servers:
  - url: https://api.openfpga.ai/v1
    description: Production
  - url: http://localhost:5000/v1
    description: Local development

security:
  - bearerAuth: []

paths:
  /chat/completions:
    post:
      operationId: createChatCompletion
      summary: Create a chat completion
      description: |
        Creates a model response for the given conversation. Compatible with
        OpenAI's Chat Completions API format.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ChatCompletionRequest'
            examples:
              basic:
                summary: Basic chat completion
                value:
                  model: "llama-3.1-8b-fpga"
                  messages:
                    - role: "user"
                      content: "Explain FPGAs in one sentence."
              streaming:
                summary: Streaming chat completion
                value:
                  model: "llama-3.1-8b-fpga"
                  messages:
                    - role: "system"
                      content: "You are a helpful assistant."
                    - role: "user"
                      content: "What is FPGA inference?"
                  stream: true
                  max_tokens: 512
      responses:
        '200':
          description: Chat completion response (non-streaming)
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ChatCompletionResponse'
            text/event-stream:
              schema:
                description: Server-sent events stream of ChatCompletionChunk objects
        '400':
          description: Invalid request
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        '401':
          description: Invalid or missing API key
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        '429':
          description: Rate limit exceeded
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
              example:
                error:
                  message: "Rate limit exceeded. Please retry after 1 second."
                  type: "rate_limit_error"
                  code: "rate_limit_exceeded"
        '503':
          description: Service unavailable (no FPGA capacity)
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'

  /models:
    get:
      operationId: listModels
      summary: List available models
      description: Returns a list of models currently available on FPGA hardware.
      responses:
        '200':
          description: List of models
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ModelListResponse'

  /models/{model_id}:
    get:
      operationId: getModel
      summary: Get model details
      description: Returns details about a specific model including FPGA performance metrics.
      parameters:
        - name: model_id
          in: path
          required: true
          schema:
            type: string
          example: "llama-3.1-8b-fpga"
      responses:
        '200':
          description: Model details
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Model'
        '404':
          description: Model not found

  # --- Portal API (non-OpenAI, for dashboard) ---

  /portal/api-keys:
    get:
      operationId: listApiKeys
      summary: List API keys
      description: Returns all API keys for the authenticated user. Keys are masked except the last 4 characters.
      tags: [Portal]
      responses:
        '200':
          description: List of API keys
          content:
            application/json:
              schema:
                type: object
                properties:
                  data:
                    type: array
                    items:
                      $ref: '#/components/schemas/ApiKeyInfo'
    post:
      operationId: createApiKey
      summary: Create a new API key
      description: |
        Creates a new API key. The full key is returned ONLY in this response.
        Store it securely — it cannot be retrieved again.
      tags: [Portal]
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              required: [name]
              properties:
                name:
                  type: string
                  description: Human-readable label for this key
                  example: "production-backend"
      responses:
        '201':
          description: API key created
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ApiKeyCreated'

  /portal/api-keys/{key_id}:
    delete:
      operationId: revokeApiKey
      summary: Revoke an API key
      description: Permanently revokes an API key. This cannot be undone.
      tags: [Portal]
      parameters:
        - name: key_id
          in: path
          required: true
          schema:
            type: string
      responses:
        '200':
          description: Key revoked
          content:
            application/json:
              schema:
                type: object
                properties:
                  message:
                    type: string
                    example: "API key revoked"

  /portal/usage:
    get:
      operationId: getUsage
      summary: Get usage statistics
      description: Returns usage stats for the authenticated user over a time range.
      tags: [Portal]
      parameters:
        - name: start_date
          in: query
          schema:
            type: string
            format: date
          example: "2026-03-01"
        - name: end_date
          in: query
          schema:
            type: string
            format: date
          example: "2026-03-15"
      responses:
        '200':
          description: Usage statistics
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/UsageResponse'

components:
  securitySchemes:
    bearerAuth:
      type: http
      scheme: bearer
      bearerFormat: "ofpga_sk_*"

  schemas:
    # ---- OpenAI-compatible schemas ----

    ChatCompletionRequest:
      type: object
      required: [model, messages]
      properties:
        model:
          type: string
          description: Model ID.
          example: "llama-3.1-8b-fpga"
        messages:
          type: array
          items:
            $ref: '#/components/schemas/Message'
          minItems: 1
        temperature:
          type: number
          minimum: 0
          maximum: 2
          default: 0.7
          description: Sampling temperature.
        top_p:
          type: number
          minimum: 0
          maximum: 1
          default: 1
          description: Nucleus sampling parameter.
        max_tokens:
          type: integer
          minimum: 1
          maximum: 8192
          default: 1024
          description: Maximum tokens to generate.
        stream:
          type: boolean
          default: false
          description: If true, returns a stream of SSE events.
        stop:
          oneOf:
            - type: string
            - type: array
              items:
                type: string
          description: Stop sequences.
        frequency_penalty:
          type: number
          minimum: -2
          maximum: 2
          default: 0
        presence_penalty:
          type: number
          minimum: -2
          maximum: 2
          default: 0
        n:
          type: integer
          minimum: 1
          maximum: 1
          default: 1
          description: Number of completions. Currently only 1 is supported.
        user:
          type: string
          description: Unique identifier for the end-user (for abuse tracking).

    Message:
      type: object
      required: [role, content]
      properties:
        role:
          type: string
          enum: [system, user, assistant]
        content:
          type: string

    ChatCompletionResponse:
      type: object
      properties:
        id:
          type: string
          description: Unique completion ID
          example: "ofpga-chat-abc123"
        object:
          type: string
          enum: [chat.completion]
        created:
          type: integer
          description: Unix timestamp
        model:
          type: string
          example: "llama-3.1-8b-fpga"
        choices:
          type: array
          items:
            type: object
            properties:
              index:
                type: integer
              message:
                $ref: '#/components/schemas/Message'
              finish_reason:
                type: string
                enum: [stop, length, content_filter]
        usage:
          $ref: '#/components/schemas/Usage'
        # OpenFPGA extension
        x_openfpga:
          type: object
          description: FPGA-specific performance metadata
          properties:
            hardware:
              type: string
              example: "xilinx-alveo-u280"
            latency_ms:
              type: number
              description: End-to-end inference latency in milliseconds
              example: 12.4
            tokens_per_second:
              type: number
              example: 185.3
            power_watts:
              type: number
              description: Estimated power consumption for this request
              example: 28.5

    ChatCompletionChunk:
      type: object
      description: A single chunk in a streaming response (SSE data field)
      properties:
        id:
          type: string
        object:
          type: string
          enum: [chat.completion.chunk]
        created:
          type: integer
        model:
          type: string
        choices:
          type: array
          items:
            type: object
            properties:
              index:
                type: integer
              delta:
                type: object
                properties:
                  role:
                    type: string
                  content:
                    type: string
              finish_reason:
                type: string
                nullable: true

    Usage:
      type: object
      properties:
        prompt_tokens:
          type: integer
        completion_tokens:
          type: integer
        total_tokens:
          type: integer

    Model:
      type: object
      properties:
        id:
          type: string
          example: "llama-3.1-8b-fpga"
        object:
          type: string
          enum: [model]
        created:
          type: integer
        owned_by:
          type: string
          example: "openfpga"
        # OpenFPGA extensions
        x_openfpga:
          type: object
          properties:
            display_name:
              type: string
              example: "Llama 3.1 8B"
            base_model:
              type: string
              example: "meta-llama/Llama-3.1-8B-Instruct"
            context_length:
              type: integer
              example: 131072
            pricing:
              type: object
              properties:
                input_per_million:
                  type: number
                  description: Price per 1M input tokens in USD
                  example: 0.06
                output_per_million:
                  type: number
                  description: Price per 1M output tokens in USD
                  example: 0.08
            performance:
              type: object
              properties:
                avg_latency_ms:
                  type: number
                  example: 8.5
                throughput_tps:
                  type: number
                  description: Average tokens per second
                  example: 185
                power_watts:
                  type: number
                  example: 28
            hardware:
              type: string
              example: "Xilinx Alveo U280"
            status:
              type: string
              enum: [available, beta, coming_soon]
              example: "beta"

    ModelListResponse:
      type: object
      properties:
        object:
          type: string
          enum: [list]
        data:
          type: array
          items:
            $ref: '#/components/schemas/Model'

    # ---- Portal schemas ----

    ApiKeyInfo:
      type: object
      properties:
        id:
          type: string
          example: "key_abc123"
        name:
          type: string
          example: "production-backend"
        key_hint:
          type: string
          description: Masked key showing only prefix and last 4 chars
          example: "ofpga_sk_...a1b2"
        created_at:
          type: string
          format: date-time
        last_used_at:
          type: string
          format: date-time
          nullable: true

    ApiKeyCreated:
      type: object
      properties:
        id:
          type: string
        name:
          type: string
        key:
          type: string
          description: "Full API key. Store securely — shown only once."
          example: "ofpga_sk_live_a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6"
        created_at:
          type: string
          format: date-time

    UsageResponse:
      type: object
      properties:
        total_requests:
          type: integer
        total_tokens:
          type: integer
        total_prompt_tokens:
          type: integer
        total_completion_tokens:
          type: integer
        total_cost_usd:
          type: number
        daily:
          type: array
          items:
            type: object
            properties:
              date:
                type: string
                format: date
              requests:
                type: integer
              tokens:
                type: integer
              cost_usd:
                type: number
              avg_latency_ms:
                type: number

    ErrorResponse:
      type: object
      properties:
        error:
          type: object
          properties:
            message:
              type: string
            type:
              type: string
              enum: [invalid_request_error, authentication_error, rate_limit_error, server_error]
            code:
              type: string