{
  "openapi": "3.1.0",
  "info": {
    "title": "OpenFPGA Inference Gateway",
    "version": "1.0.0",
    "description": "FPGA-accelerated AI inference API. OpenAI-compatible drop-in replacement — change the base URL, keep everything else the same. Runs open-source LLMs on Intel Agilex FPGA hardware for deterministic latency and lower energy per token.",
    "contact": {
      "name": "OpenFPGA Support",
      "email": "support@openfpga.ai",
      "url": "https://openfpga.ai"
    },
    "license": {
      "name": "Proprietary",
      "url": "https://openfpga.ai/terms"
    }
  },
  "servers": [
    {
      "url": "https://app.openfpga.ai/api/v1",
      "description": "Production"
    }
  ],
  "security": [
    {
      "BearerAuth": []
    }
  ],
  "paths": {
    "/chat/completions": {
      "post": {
        "operationId": "createChatCompletion",
        "summary": "Create a chat completion",
        "description": "Generates a model response for the given conversation. Supports streaming via SSE, function calling via the tools parameter, and structured JSON output. OpenAI-compatible — use any OpenAI SDK with base_url set to https://app.openfpga.ai/api/v1.",
        "tags": ["Chat"],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/ChatCompletionRequest"
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "Chat completion response. If stream is false, returns a single JSON object. If stream is true, returns a stream of SSE events.",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ChatCompletionResponse"
                }
              },
              "text/event-stream": {
                "schema": {
                  "type": "string",
                  "description": "Server-Sent Events stream. Each event is a JSON object with choices[0].delta containing incremental content."
                }
              }
            }
          },
          "400": {
            "description": "Invalid request — malformed body, missing required fields, or unsupported parameter values.",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ErrorResponse"
                }
              }
            }
          },
          "401": {
            "description": "Authentication failed — missing, invalid, or expired API key.",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ErrorResponse"
                }
              }
            }
          },
          "404": {
            "description": "Model not found — the requested model ID does not exist.",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ErrorResponse"
                }
              }
            }
          },
          "429": {
            "description": "Rate limit exceeded. Check the Retry-After header for seconds to wait.",
            "headers": {
              "Retry-After": {
                "schema": {
                  "type": "integer"
                },
                "description": "Seconds to wait before retrying."
              },
              "X-RateLimit-Limit": {
                "schema": {
                  "type": "integer"
                },
                "description": "Maximum requests per minute."
              },
              "X-RateLimit-Remaining": {
                "schema": {
                  "type": "integer"
                },
                "description": "Requests remaining in current window."
              },
              "X-RateLimit-Reset": {
                "schema": {
                  "type": "integer"
                },
                "description": "Unix timestamp when the rate limit resets."
              }
            },
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ErrorResponse"
                }
              }
            }
          },
          "500": {
            "description": "Internal server error. Safe to retry with exponential backoff.",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ErrorResponse"
                }
              }
            }
          }
        }
      }
    },
    "/embeddings": {
      "post": {
        "operationId": "createEmbedding",
        "summary": "Create embeddings",
        "description": "Generate text embeddings. OpenAI-compatible.",
        "tags": ["Embeddings"],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/EmbeddingRequest"
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "Embedding response containing a list of embedding vectors.",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/EmbeddingResponse"
                }
              }
            }
          },
          "400": {
            "description": "Invalid request — missing or malformed input.",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ErrorResponse"
                }
              }
            }
          },
          "401": {
            "description": "Authentication failed — missing, invalid, or expired API key.",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ErrorResponse"
                }
              }
            }
          },
          "402": {
            "description": "Payment required. This endpoint supports x402 micropayments. Include an X-PAYMENT header with a valid payment proof, or use a Bearer API key.",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ErrorResponse"
                }
              }
            }
          },
          "429": {
            "description": "Rate limit exceeded.",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ErrorResponse"
                }
              }
            }
          },
          "500": {
            "description": "Internal server error. Safe to retry with exponential backoff.",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ErrorResponse"
                }
              }
            }
          }
        }
      }
    },
    "/models": {
      "get": {
        "operationId": "listModels",
        "summary": "List available models",
        "description": "Returns a list of all models currently available for inference on OpenFPGA. Use the model id value in chat completion requests.",
        "tags": ["Models"],
        "responses": {
          "200": {
            "description": "List of available models.",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ModelListResponse"
                }
              }
            }
          },
          "401": {
            "description": "Authentication failed.",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ErrorResponse"
                }
              }
            }
          }
        }
      }
    },
    "/models/{model_id}": {
      "get": {
        "operationId": "getModel",
        "summary": "Get model details",
        "description": "Returns details about a specific model, including context length, supported features, and pricing.",
        "tags": ["Models"],
        "parameters": [
          {
            "name": "model_id",
            "in": "path",
            "required": true,
            "description": "The model identifier, e.g. llama-3.1-8b-instruct",
            "schema": {
              "type": "string"
            }
          }
        ],
        "responses": {
          "200": {
            "description": "Model details.",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/Model"
                }
              }
            }
          },
          "404": {
            "description": "Model not found.",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ErrorResponse"
                }
              }
            }
          }
        }
      }
    }
  },
  "components": {
    "securitySchemes": {
      "BearerAuth": {
        "type": "http",
        "scheme": "bearer",
        "description": "API key from https://app.openfpga.ai. Pass as: Authorization: Bearer YOUR_API_KEY"
      }
    },
    "schemas": {
      "ChatCompletionRequest": {
        "type": "object",
        "required": ["model", "messages"],
        "properties": {
          "model": {
            "type": "string",
            "description": "Model ID to use for inference. Use GET /v1/models for available options.",
            "example": "llama-3.1-8b-instruct"
          },
          "messages": {
            "type": "array",
            "description": "Conversation history. Each message has a role (system, user, assistant, or tool) and content.",
            "items": {
              "$ref": "#/components/schemas/ChatMessage"
            },
            "minItems": 1
          },
          "temperature": {
            "type": "number",
            "minimum": 0,
            "maximum": 2,
            "default": 1,
            "description": "Sampling temperature. Higher values produce more random output."
          },
          "top_p": {
            "type": "number",
            "minimum": 0,
            "maximum": 1,
            "default": 1,
            "description": "Nucleus sampling threshold."
          },
          "max_tokens": {
            "type": "integer",
            "minimum": 1,
            "description": "Maximum number of tokens to generate."
          },
          "stream": {
            "type": "boolean",
            "default": false,
            "description": "If true, returns a stream of SSE events with incremental content."
          },
          "stop": {
            "oneOf": [
              { "type": "string" },
              { "type": "array", "items": { "type": "string" }, "maxItems": 4 }
            ],
            "description": "Up to 4 sequences where the model will stop generating."
          },
          "tools": {
            "type": "array",
            "description": "List of tools (functions) the model may call. The model will output a tool_calls array when it decides to invoke one.",
            "items": {
              "$ref": "#/components/schemas/Tool"
            }
          },
          "tool_choice": {
            "oneOf": [
              { "type": "string", "enum": ["none", "auto", "required"] },
              {
                "type": "object",
                "properties": {
                  "type": { "type": "string", "enum": ["function"] },
                  "function": {
                    "type": "object",
                    "properties": {
                      "name": { "type": "string" }
                    },
                    "required": ["name"]
                  }
                }
              }
            ],
            "description": "Controls function calling behavior. 'auto' lets the model decide, 'required' forces a tool call, 'none' disables."
          },
          "response_format": {
            "type": "object",
            "properties": {
              "type": {
                "type": "string",
                "enum": ["text", "json_object"],
                "description": "Set to 'json_object' to enforce valid JSON output."
              }
            },
            "description": "Output format. Use json_object for structured outputs."
          },
          "n": {
            "type": "integer",
            "minimum": 1,
            "maximum": 1,
            "default": 1,
            "description": "Number of completions to generate. Currently only 1 is supported."
          },
          "user": {
            "type": "string",
            "description": "Optional end-user identifier for abuse tracking."
          }
        }
      },
      "ChatMessage": {
        "type": "object",
        "required": ["role", "content"],
        "properties": {
          "role": {
            "type": "string",
            "enum": ["system", "user", "assistant", "tool"],
            "description": "The role of the message author."
          },
          "content": {
            "oneOf": [
              { "type": "string" },
              { "type": "null" }
            ],
            "description": "Message content. Null when assistant returns tool_calls."
          },
          "name": {
            "type": "string",
            "description": "Optional name for the participant."
          },
          "tool_calls": {
            "type": "array",
            "description": "Tool calls generated by the model (assistant messages only).",
            "items": {
              "$ref": "#/components/schemas/ToolCall"
            }
          },
          "tool_call_id": {
            "type": "string",
            "description": "The ID of the tool call this message is responding to (tool messages only)."
          }
        }
      },
      "Tool": {
        "type": "object",
        "required": ["type", "function"],
        "properties": {
          "type": {
            "type": "string",
            "enum": ["function"],
            "description": "The type of tool. Currently only 'function' is supported."
          },
          "function": {
            "$ref": "#/components/schemas/FunctionDefinition"
          }
        }
      },
      "FunctionDefinition": {
        "type": "object",
        "required": ["name"],
        "properties": {
          "name": {
            "type": "string",
            "description": "The name of the function."
          },
          "description": {
            "type": "string",
            "description": "A description of what the function does. Used by the model to decide when to call it."
          },
          "parameters": {
            "type": "object",
            "description": "JSON Schema object describing the function's parameters.",
            "additionalProperties": true
          }
        }
      },
      "ToolCall": {
        "type": "object",
        "properties": {
          "id": {
            "type": "string",
            "description": "Unique identifier for this tool call."
          },
          "type": {
            "type": "string",
            "enum": ["function"]
          },
          "function": {
            "type": "object",
            "properties": {
              "name": {
                "type": "string",
                "description": "The name of the function to call."
              },
              "arguments": {
                "type": "string",
                "description": "JSON string of function arguments."
              }
            }
          }
        }
      },
      "ChatCompletionResponse": {
        "type": "object",
        "properties": {
          "id": {
            "type": "string",
            "description": "Unique identifier for this completion.",
            "example": "chatcmpl-abc123"
          },
          "object": {
            "type": "string",
            "enum": ["chat.completion"],
            "example": "chat.completion"
          },
          "created": {
            "type": "integer",
            "description": "Unix timestamp of when the completion was created."
          },
          "model": {
            "type": "string",
            "description": "The model used for this completion.",
            "example": "llama-3.1-8b-instruct"
          },
          "choices": {
            "type": "array",
            "items": {
              "$ref": "#/components/schemas/ChatCompletionChoice"
            }
          },
          "usage": {
            "$ref": "#/components/schemas/Usage"
          }
        }
      },
      "ChatCompletionChoice": {
        "type": "object",
        "properties": {
          "index": {
            "type": "integer"
          },
          "message": {
            "$ref": "#/components/schemas/ChatMessage"
          },
          "finish_reason": {
            "type": "string",
            "enum": ["stop", "length", "tool_calls", "content_filter"],
            "description": "Why the model stopped generating. 'stop' = natural end or stop sequence, 'length' = max_tokens reached, 'tool_calls' = model wants to call a function."
          }
        }
      },
      "Usage": {
        "type": "object",
        "properties": {
          "prompt_tokens": {
            "type": "integer",
            "description": "Tokens in the input messages."
          },
          "completion_tokens": {
            "type": "integer",
            "description": "Tokens generated by the model."
          },
          "total_tokens": {
            "type": "integer",
            "description": "Total tokens (prompt + completion)."
          }
        }
      },
      "Model": {
        "type": "object",
        "properties": {
          "id": {
            "type": "string",
            "description": "Model identifier. Use this in chat completion requests.",
            "example": "llama-3.1-8b-instruct"
          },
          "object": {
            "type": "string",
            "enum": ["model"],
            "example": "model"
          },
          "created": {
            "type": "integer",
            "description": "Unix timestamp of when the model was added."
          },
          "owned_by": {
            "type": "string",
            "example": "openfpga"
          }
        }
      },
      "EmbeddingRequest": {
        "type": "object",
        "required": ["input"],
        "properties": {
          "input": {
            "oneOf": [
              { "type": "string" },
              { "type": "array", "items": { "type": "string" } }
            ],
            "description": "Input text to embed. Can be a single string or an array of strings."
          },
          "model": {
            "type": "string",
            "description": "Model ID to use for embeddings.",
            "default": "text-embedding-3-small",
            "example": "text-embedding-3-small"
          },
          "encoding_format": {
            "type": "string",
            "enum": ["float", "base64"],
            "default": "float",
            "description": "The format to return the embeddings in."
          }
        }
      },
      "EmbeddingResponse": {
        "type": "object",
        "properties": {
          "object": {
            "type": "string",
            "enum": ["list"],
            "example": "list"
          },
          "data": {
            "type": "array",
            "items": {
              "$ref": "#/components/schemas/EmbeddingObject"
            }
          },
          "model": {
            "type": "string",
            "description": "The model used for the embeddings.",
            "example": "text-embedding-3-small"
          },
          "usage": {
            "type": "object",
            "properties": {
              "prompt_tokens": {
                "type": "integer",
                "description": "Tokens in the input text."
              },
              "total_tokens": {
                "type": "integer",
                "description": "Total tokens used."
              }
            }
          }
        }
      },
      "EmbeddingObject": {
        "type": "object",
        "properties": {
          "object": {
            "type": "string",
            "enum": ["embedding"],
            "example": "embedding"
          },
          "embedding": {
            "type": "array",
            "items": {
              "type": "number"
            },
            "description": "The embedding vector."
          },
          "index": {
            "type": "integer",
            "description": "The index of the input this embedding corresponds to."
          }
        }
      },
      "ModelListResponse": {
        "type": "object",
        "properties": {
          "object": {
            "type": "string",
            "enum": ["list"]
          },
          "data": {
            "type": "array",
            "items": {
              "$ref": "#/components/schemas/Model"
            }
          }
        }
      },
      "ErrorResponse": {
        "type": "object",
        "properties": {
          "error": {
            "type": "object",
            "properties": {
              "code": {
                "type": "string",
                "description": "Machine-readable error code.",
                "enum": [
                  "INVALID_API_KEY",
                  "RATE_LIMIT_EXCEEDED",
                  "MODEL_NOT_FOUND",
                  "INVALID_REQUEST",
                  "SERVER_ERROR"
                ],
                "example": "INVALID_API_KEY"
              },
              "message": {
                "type": "string",
                "description": "Human-readable error description with actionable guidance.",
                "example": "The API key provided is invalid. Get a valid key at https://app.openfpga.ai"
              },
              "type": {
                "type": "string",
                "description": "Error category.",
                "enum": [
                  "authentication_error",
                  "rate_limit_error",
                  "not_found_error",
                  "invalid_request_error",
                  "server_error"
                ]
              }
            },
            "required": ["code", "message", "type"]
          }
        }
      }
    }
  },
  "tags": [
    {
      "name": "Chat",
      "description": "Chat completion endpoints. Generate text responses from conversations."
    },
    {
      "name": "Embeddings",
      "description": "Generate text embeddings from input strings."
    },
    {
      "name": "Models",
      "description": "List and inspect available models."
    }
  ]
}