Get Started

Building a demo or prototyping an MVP but don’t want to pay API costs just to validate an idea?

OpenRouter's free tier is generous for early development, but free models come with maintenance trade-offs. They can get rate limited, hit capacity, or disappear without notice—leaving you to juggle fallbacks and slow down shipping.

We maintain a live-updated list of available free models so you don't have to track availability yourself. Set your preferences using use case and sorting, fetch the list from our API, and pass the model IDs to OpenRouter. It will automatically try each model in the order you specified until one responds. No need to manage fallbacks or check which models are currently working.

Preview Your Live Model List

Configure use case and sorting to preview the live, health-scored list your app will fetch dynamically.

Use Case
Sort
Top N
Health Filter
%
Live
Loading...
1

Set Up OpenRouter

OpenRouter provides a unified API for accessing many LLM providers. Sign up for free and create an API key.

2

Get Your API Key

Sign in with GitHub to create your API key. All keys share a per-user limit of 200 requests per 24 hours (with SDK caching, this is plenty).

3

Copy free-llm-router.ts

This helper fetches free model IDs from our API, reports both successes and issues back, and handles caching automatically. It's a single file with no dependencies.

/**
 * Free LLM Router helper with built-in 15-minute caching
 * Set FREE_MODELS_API_KEY in your environment.
 *
 * Caching behavior:
 *   - In-memory cache with 15-minute TTL (matches server refresh rate)
 *   - Cache is per-instance (resets on serverless cold starts)
 *   - Use { cache: 'no-store' } to bypass cache (mirrors fetch semantics)
 *   - Falls back to stale cache on API errors (resilient to outages)
 *
 * Usage:
 *   const ids = await getModelIds(['tools']);
 *   const fresh = await getModelIds(['chat'], 'contextLength', 5, { maxErrorRate: 20, timeRange: '24h', myReports: true, cache: 'no-store' });
 */

const API = 'https://free-LLM-router.pages.dev/api/v1';
const API_KEY = process.env.FREE_MODELS_API_KEY;

/**
 * Type definitions for SDK parameters.
 * IMPORTANT: Keep these in sync with src/lib/api-definitions.ts
 * - UseCase: see VALID_USE_CASES
 * - Sort: see VALID_SORTS
 * - TimeRange: see VALID_TIME_RANGES
 */
type UseCase = 'chat' | 'vision' | 'tools' | 'longContext' | 'reasoning';
type Sort = 'contextLength' | 'maxOutput' | 'capable' | 'leastIssues' | 'newest';
type CacheMode = 'default' | 'no-store';
type TimeRange = '15m' | '30m' | '1h' | '6h' | '24h' | '7d' | '30d' | 'all';

// In-memory cache - 15 minute TTL (matches server refresh rate)
// NOTE: Cache is per-instance and resets on serverless cold starts
const CACHE_TTL = 15 * 60 * 1000; // 15 minutes in milliseconds
const cache = new Map<string, { data: string[]; timestamp: number }>();

/**
 * Get available free model IDs with optional filtering and sorting.
 * Default sort is 'contextLength' (largest context window first).
 * Default maxErrorRate is undefined (no filtering), timeRange is '24h', myReports is false.
 */
export async function getModelIds(
  useCase?: UseCase[],
  sort: Sort = 'contextLength',
  topN?: number,
  options?: {
    cache?: CacheMode;
    maxErrorRate?: number;
    timeRange?: TimeRange;
    myReports?: boolean;
  }
): Promise<string[]> {
  // Sort useCase array for deterministic cache keys (avoid fragmentation)
  const normalizedUseCase = useCase ? [...useCase].sort() : undefined;

  // Generate cache key from normalized params
  const cacheKey = JSON.stringify({
    useCase: normalizedUseCase,
    sort,
    topN,
    maxErrorRate: options?.maxErrorRate,
    timeRange: options?.timeRange,
    myReports: options?.myReports,
  });

  const cached = cache.get(cacheKey);
  const cacheMode = options?.cache ?? 'default';

  // Return cached data if fresh and cache is enabled
  if (cacheMode === 'default' && cached && Date.now() - cached.timestamp < CACHE_TTL) {
    return cached.data;
  }

  // Fetch fresh data
  try {
    const params = new URLSearchParams({ sort });
    if (normalizedUseCase) params.set('useCase', normalizedUseCase.join(','));
    if (topN) params.set('topN', String(topN));
    if (options?.maxErrorRate !== undefined) {
      params.set('maxErrorRate', String(options.maxErrorRate));
    }
    if (options?.timeRange) {
      params.set('timeRange', options.timeRange);
    }
    if (options?.myReports) {
      params.set('myReports', 'true');
    }

    const { ids } = await fetch(`${API}/models/ids?${params}`, {
      headers: { Authorization: `Bearer ${API_KEY}` },
    }).then((r) => r.json());

    // Store in cache
    cache.set(cacheKey, { data: ids, timestamp: Date.now() });

    return ids;
  } catch (error) {
    // Fall back to stale cache if available (resilient to API outages)
    if (cached) {
      // Only log in development to avoid serverless noise
      if (process.env.NODE_ENV !== 'production') {
        console.warn('API request failed, using stale cached data', error);
      }
      return cached.data;
    }
    throw error;
  }
}

// Report issues to help improve model health data.
// This does NOT count towards your rate limit - you're contributing!
export function reportIssue(
  modelId: string,
  issue: 'error' | 'rate_limited' | 'unavailable',
  details?: string
) {
  fetch(`${API}/models/feedback`, {
    method: 'POST',
    headers: {
      'Authorization': `Bearer ${API_KEY}`,
      'Content-Type': 'application/json',
    },
    body: JSON.stringify({ modelId, issue, details }),
  }).catch(() => {}); // Fire-and-forget, don't block on errors
}

// Report successful model usage to improve health metrics.
// This does NOT count towards your rate limit - you're contributing!
export function reportSuccess(modelId: string, details?: string) {
  fetch(`${API}/models/feedback`, {
    method: 'POST',
    headers: {
      'Authorization': `Bearer ${API_KEY}`,
      'Content-Type': 'application/json',
    },
    body: JSON.stringify({ modelId, success: true, details }),
  }).catch(() => {}); // Fire-and-forget, don't block on errors
}

// Helper: detect issue type from HTTP status code
export function issueFromStatus(status: number): 'rate_limited' | 'unavailable' | 'error' {
  if (status === 429) return 'rate_limited';
  if (status === 503) return 'unavailable';
  return 'error';
}
4

Use It

This is the exact `getModelIds` call for your current use case, sort, and top N.

Use Case
Sort
Top N
Health Filter
%
// This is how you fetch free model IDs
getModelIds([], 'contextLength', 5, { maxErrorRate: 20, timeRange: '1h' })

Loop through models until one succeeds. Free models may be rate-limited, so we try multiple and optionally fall back to stable models you trust. See Code Examples for more patterns.

// 1. Fetch free models and try each until one succeeds
try {
  const freeModels = await getModelIds([], 'contextLength', 5, { maxErrorRate: 20, timeRange: '1h' });

  // 2. (Optional) Add stable fallback models you trust (usually paid)
  const stableFallback = ['anthropic/claude-3.5-sonnet'];
  const models = [...freeModels, ...stableFallback];

  // 3. Try models until one succeeds
  for (const id of models) {
    try {
      const res = await client.chat.completions.create({ model: id, messages });
      reportSuccess(id); // Helps improve health metrics
      return res;
    } catch (e) {
      const status = e.status || e.response?.status;
      reportIssue(id, issueFromStatus(status), e.message); // Helps improve health metrics
    }
  }
} catch {
  // API unavailable - fall back to hardcoded models
  // E.g. return await client.chat.completions.create({ model: 'anthropic/claude-3.5-sonnet', messages });
}
throw new Error('All models failed');

Query Parameters

Customize your requests by combining these parameters. All parameters are optional and can be mixed and matched.

useCase

Select models by use case. Pass one or more as a comma-separated list: ?useCase=vision,tools

ValueDescription
chatText-to-text models optimized for conversation
visionModels that accept image inputs
toolsModels that support function/tool calling
longContextModels with 100k+ token context windows
reasoningModels with advanced reasoning capabilities (e.g., o1, QwQ, DeepSeek R1)

sort

Control the order models are returned. This determines fallback priority when iterating through the list. Example: ?sort=contextLength

ValueLabelDescription
contextLengthContext LengthLargest context window first - best for long documents
maxOutputMax OutputHighest output token limit first - best for long-form generation
capableMost CapableMost supported features first - good default
leastIssuesLeast Reported IssuesFewest user-reported issues first - best for stability
newestNewest FirstMost recently added models first - best for trying new models

topN

Return only the top N models based on sort order. Range: 1-100. Default: unlimited. Example: ?topN=10

maxErrorRate

Exclude models with error rate above this percentage (0-100). Error rate = errors / (errors + successes). Example: ?maxErrorRate=20 excludes models with more than 20% error rate.

timeRange

Time window for calculating error rates. Options: 15m, 30m, 1h, 6h, 24h, 7d, 30d, all. Default: 1h.

myReports

When set to true, calculate error rates from only your own reported issues instead of all community reports. Requires API key authentication. Default: false. Example: ?myReports=true

API Reference

Complete reference for all available endpoints. See Query Parameters for parameter details.

GET

/api/v1/models/ids

Lightweight endpoint returning only model IDs. Fast and small payload - use this in production.

Query Parameters

ParameterTypeDescription
useCasestringComma-separated: chat, vision, tools, longContext, reasoning
sortstringOne of: contextLength, maxOutput, capable, leastIssues, newest
topNnumberReturn top N models based on sort order (1-100)
maxErrorRatenumberExclude models with error rate above this percentage (0-100)
timeRangestringTime window for error rates: 15m, 30m, 1h, 6h, 24h, 7d, 30d. Default: 1h.
myReportsbooleanIf true, calculate error rates from only your own reports (requires API key). Default: false.

Response

FieldTypeDescription
idsstring[]Array of model IDs
countnumberNumber of IDs returned

Errors

  • 500 - Server error

Cache-Control: private, max-age=60 - Responses are cached for 60 seconds at the HTTP layer and 15 minutes in the SDK.

Request

API Key

Required to send requests.

Use Case
Sort
Top N
Health Filter
%
curl https://free-LLM-router.pages.dev/api/v1/models/ids?sort=contextLength&topN=5&maxErrorRate=20&timeRange=1h \
  -H "Authorization: Bearer YOUR_API_KEY" \

Response

{
  "ids": [
    "google/gemini-2.0-flash-exp:free",
    "meta-llama/llama-3.3-70b-instruct:free",
    "deepseek/deepseek-chat:free"
  ],
  "count": 15
}
GET

/api/v1/models/full

Full model objects with metadata, feedback counts, and timestamps. Use for browsing or debugging.

Query Parameters

Same parameters as /models/ids: useCase, sort, topN, maxErrorRate, timeRange, and myReports.

See /models/ids documentation above for parameter details.

Response

FieldTypeDescription
modelsModel[]Full model objects with all metadata
feedbackCountsobjectPer-model feedback: issue counts, success count, and error rate (percentage). Error rate shows % of failed requests.
lastUpdatedstringISO 8601 timestamp of last sync
useCasesstring[]Applied use case values
sortstringApplied sort value
countnumberTotal number of models returned

Cache-Control: private, max-age=60 - Responses are cached for 60 seconds at the HTTP layer and 15 minutes in the SDK.

Request

API Key

Required to send requests.

Use Case
Sort
Top N
Health Filter
%
curl https://free-LLM-router.pages.dev/api/v1/models/full?sort=contextLength&topN=5&maxErrorRate=20&timeRange=1h \
  -H "Authorization: Bearer YOUR_API_KEY" \

Response

{
  "models": [
    {
      "id": "google/gemini-2.0-flash-exp:free",
      "name": "Gemini 2.0 Flash",
      "contextLength": 1000000,
      "maxCompletionTokens": 8192,
      "description": "...",
      "inputModalities": ["text", "image"],
      "outputModalities": ["text"],
      "supportedParameters": ["tools", "reasoning"]
    }
  ],
  "feedbackCounts": { ... },
  "lastUpdated": "2024-12-29T10:00:00Z",
  "filters": ["vision"],
  "sort": "contextLength",
  "count": 15
}
POST

/api/v1/models/feedback

Report model feedback: successes or issues (rate limiting, errors, unavailability). Does not count towards your rate limit.

Request Body

ParameterTypeRequiredDescription
modelIdstringYesThe model ID to report
successbooleanNoSet to true to report successful request. If omitted, reports an issue (requires issue field).
issuestringYesRequired if success is false/omitted. One of: rate_limited, unavailable, error
detailsstringNoOptional description of the issue
dryRunbooleanNoIf true, validates request but doesn't save (for testing)

Response

FieldTypeDescription
receivedbooleanWhether feedback was recorded

Errors

  • 400 - Missing modelId or invalid issue type
  • 500 - Server error

Request

API Key

Required to send requests.

curl -X POST https://free-LLM-router.pages.dev/api/v1/models/feedback \
  -H "Authorization: Bearer YOUR_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "modelId": "google/gemini-2.0-flash-exp:free",
    "success": true,
    "dryRun": true
  }'

Response

{ "received": true }

Code Examples

Ready-to-use patterns for common use cases.

One-off API Call

Simple single prompt completion - perfect for scripts, CLI tools, or serverless functions.

import { getModelIds, reportSuccess, reportIssue, issueFromStatus } from './free-llm-router';

const prompt = 'Summarize this article in 3 bullet points: ...';

try {
  // Get top 3 models with both chat and vision capabilities
  // SDK has built-in 15-min cache, so this won't hit the API on every call
  const models = await getModelIds(['chat', 'vision'], 'capable', 3);

  // Try each model until one succeeds
  for (const id of models) {
    try {
      const res = await fetch('https://openrouter.ai/api/v1/chat/completions', {
        method: 'POST',
        headers: {
          'Authorization': `Bearer ${process.env.OPENROUTER_API_KEY}`,
          'Content-Type': 'application/json',
        },
        body: JSON.stringify({
          model: id,
          messages: [{ role: 'user', content: prompt }],
        }),
      });
      if (!res.ok) {
        // Report the right issue type - free, doesn't use quota
        reportIssue(id, issueFromStatus(res.status), `HTTP ${res.status}`);
        continue;
      }
      const data = await res.json();
      console.log(data.choices[0].message.content);
      // Report success - helps other users know this model works!
      reportSuccess(id);
      break; // Success - exit loop
    } catch (e) {
      reportIssue(id, 'error', e.message); // Free - doesn't use quota
    }
  }
} catch {
  // API unavailable - handle gracefully
  console.error('Failed to fetch models');
}

Chatbot

Multi-turn conversation with message history - ideal for chat interfaces.

import { getModelIds, reportSuccess, reportIssue, issueFromStatus } from './free-llm-router';
import OpenAI from 'openai';

// OpenAI SDK works with OpenRouter's API
const client = new OpenAI({
  baseURL: 'https://openrouter.ai/api/v1',
  apiKey: process.env.OPENROUTER_API_KEY,
});

// Store conversation history for multi-turn chat
const messages: OpenAI.ChatCompletionMessageParam[] = [];

async function chat(userMessage: string) {
  messages.push({ role: 'user', content: userMessage });

  try {
    // SDK has built-in 15-min cache, so this won't hit the API on every call
    const models = await getModelIds(['chat'], 'capable', 5);

    for (const id of models) {
      try {
        const res = await client.chat.completions.create({
          model: id,
          messages, // Include full history
        });
        const reply = res.choices[0].message.content;
        messages.push({ role: 'assistant', content: reply });
        // Report success - helps other users know this model works!
        reportSuccess(id);
        return reply;
      } catch (e) {
        // Report with correct issue type - free, doesn't use quota
        reportIssue(id, issueFromStatus(e.status), e.message);
      }
    }
  } catch {
    // API unavailable
  }
  throw new Error('All models failed');
}

Tool Calling

Let the model call functions - for agents, data fetching, or structured outputs.

import { getModelIds, reportIssue, issueFromStatus } from './free-llm-router';
import { createOpenAI } from '@ai-sdk/openai';
import { generateText, tool } from 'ai';
import { z } from 'zod';

// Vercel AI SDK with OpenRouter
const openrouter = createOpenAI({
  baseURL: 'https://openrouter.ai/api/v1',
  apiKey: process.env.OPENROUTER_API_KEY,
});

// Define tools with Zod schemas
const tools = {
  getWeather: tool({
    description: 'Get current weather for a location',
    parameters: z.object({ location: z.string() }),
    execute: async ({ location }) => `72°F and sunny in ${location}`,
  }),
};

async function askWithTools(prompt: string) {
  try {
    // Filter for models that support tool calling
    // SDK has built-in 15-min cache, so this won't hit the API on every call
    const models = await getModelIds(['tools'], 'capable', 3);

    for (const id of models) {
      try {
        const { text, toolCalls } = await generateText({
          model: openrouter(id),
          prompt,
          tools,
        });
        return { text, toolCalls };
      } catch (e) {
        // Report with correct issue type - free, doesn't use quota
        reportIssue(id, issueFromStatus(e.status), e.message);
      }
    }
  } catch {
    // API unavailable
  }
  throw new Error('All models failed');
}