{
  "name": "OpenFPGA Inference Gateway",
  "description": "FPGA-accelerated AI inference API. OpenAI-compatible drop-in replacement for any provider — change the base URL, keep everything else the same. Runs open-source LLMs on Intel Agilex FPGA hardware for deterministic latency, lower energy per token, and consistent performance without GPU queuing delays. Supports chat completions, function calling, structured JSON output, and streaming.",
  "url": "https://app.openfpga.ai/a2a",
  "provider": {
    "organization": "OpenFPGA",
    "url": "https://openfpga.ai"
  },
  "version": "0.1.0",
  "documentationUrl": "https://app.openfpga.ai/#/docs",
  "capabilities": {
    "streaming": true,
    "pushNotifications": false,
    "stateTransitionHistory": false
  },
  "securitySchemes": {
    "apiKey": {
      "type": "apiKey",
      "in": "header",
      "name": "Authorization",
      "description": "Bearer token. Get your API key at https://app.openfpga.ai/keys — a dedicated agent credential portal. Agents are first-class citizens: request a key scoped to your agent's identity and permissions."
    }
  },
  "security": [
    {
      "apiKey": []
    }
  ],
  "defaultInputModes": ["application/json", "text/plain"],
  "defaultOutputModes": ["application/json", "text/event-stream"],
  "skills": [
    {
      "id": "chat-completion",
      "name": "Chat Completion",
      "description": "Generate text responses from conversations using FPGA-accelerated open-source LLMs. Supports system/user/assistant/tool message roles, temperature and top_p sampling, stop sequences, and max_tokens limits. OpenAI-compatible request and response format.",
      "tags": ["inference", "llm", "chat", "text-generation", "openai-compatible"],
      "examples": [
        "Generate a response to a user question using llama-3.1-8b-instruct",
        "{\"model\": \"llama-3.1-8b-instruct\", \"messages\": [{\"role\": \"user\", \"content\": \"Explain FPGAs in one sentence.\"}]}"
      ],
      "inputModes": ["application/json"],
      "outputModes": ["application/json", "text/event-stream"]
    },
    {
      "id": "function-calling",
      "name": "Function Calling",
      "description": "Invoke tools and functions through the model. Pass a tools array with function definitions — the model returns tool_calls when it decides to invoke a function. Supports auto, required, and none tool_choice modes. Use this to chain inference with external tools in agentic workflows.",
      "tags": ["inference", "tools", "function-calling", "agents", "openai-compatible"],
      "examples": [
        "Call a weather function based on user input",
        "{\"model\": \"llama-3.1-8b-instruct\", \"messages\": [{\"role\": \"user\", \"content\": \"What's the weather in SF?\"}], \"tools\": [{\"type\": \"function\", \"function\": {\"name\": \"get_weather\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"type\": \"string\"}}}}}]}"
      ],
      "inputModes": ["application/json"],
      "outputModes": ["application/json", "text/event-stream"]
    },
    {
      "id": "structured-output",
      "name": "Structured JSON Output",
      "description": "Enforce valid JSON output from the model by setting response_format to json_object. Guarantees parseable JSON responses for deterministic agent pipelines and data extraction workflows.",
      "tags": ["inference", "json", "structured-output", "openai-compatible"],
      "examples": [
        "Extract structured data from text as JSON",
        "{\"model\": \"llama-3.1-8b-instruct\", \"messages\": [{\"role\": \"user\", \"content\": \"Extract the name and age from: John is 30 years old.\"}], \"response_format\": {\"type\": \"json_object\"}}"
      ],
      "inputModes": ["application/json"],
      "outputModes": ["application/json"]
    },
    {
      "id": "list-models",
      "name": "List Available Models",
      "description": "Returns all models currently available for inference on OpenFPGA. Use the model id in chat completion requests. New models (MiniMax, Qwen, Kimi, and others) are added over time without API changes.",
      "tags": ["models", "discovery", "openai-compatible"],
      "examples": [
        "What models are available on OpenFPGA?",
        "GET /v1/models"
      ],
      "inputModes": ["text/plain"],
      "outputModes": ["application/json"]
    }
  ],
  "supportsAuthenticatedExtendedCard": true
}
