Get Started
Building a demo or prototyping an MVP but don’t want to pay API costs just to validate an idea?
OpenRouter's free tier is generous for early development, but free models come with maintenance trade-offs. They can get rate limited, hit capacity, or disappear without notice—leaving you to juggle fallbacks and slow down shipping.
We maintain a live-updated list of available free models so you don't have to track availability yourself. Set your preferences using use case and sorting, fetch the list from our API, and pass the model IDs to OpenRouter. It will automatically try each model in the order you specified until one responds. No need to manage fallbacks or check which models are currently working.
Preview Your Live Model List
Configure use case and sorting to preview the live, health-scored list your app will fetch dynamically.
0 free models
Set Up OpenRouter
OpenRouter provides a unified API for accessing many LLM providers. Sign up for free and create an API key.
Get Your API Key
Sign in with GitHub to create your API key. All keys share a per-user limit of 200 requests per 24 hours (with SDK caching, this is plenty).
Copy free-llm-router.ts
This helper fetches free model IDs from our API, reports both successes and issues back, and handles caching automatically. It's a single file with no dependencies.
/**
* Free LLM Router helper with built-in 15-minute caching
* Set FREE_MODELS_API_KEY in your environment.
*
* Caching behavior:
* - In-memory cache with 15-minute TTL (matches server refresh rate)
* - Cache is per-instance (resets on serverless cold starts)
* - Use { cache: 'no-store' } to bypass cache (mirrors fetch semantics)
* - Falls back to stale cache on API errors (resilient to outages)
*
* Usage:
* const ids = await getModelIds(['tools']);
* const fresh = await getModelIds(['chat'], 'contextLength', 5, { maxErrorRate: 20, timeRange: '24h', myReports: true, cache: 'no-store' });
*/
const API = 'https://free-LLM-router.pages.dev/api/v1';
const API_KEY = process.env.FREE_MODELS_API_KEY;
/**
* Type definitions for SDK parameters.
* IMPORTANT: Keep these in sync with src/lib/api-definitions.ts
* - UseCase: see VALID_USE_CASES
* - Sort: see VALID_SORTS
* - TimeRange: see VALID_TIME_RANGES
*/
type UseCase = 'chat' | 'vision' | 'tools' | 'longContext' | 'reasoning';
type Sort = 'contextLength' | 'maxOutput' | 'capable' | 'leastIssues' | 'newest';
type CacheMode = 'default' | 'no-store';
type TimeRange = '15m' | '30m' | '1h' | '6h' | '24h' | '7d' | '30d' | 'all';
// In-memory cache - 15 minute TTL (matches server refresh rate)
// NOTE: Cache is per-instance and resets on serverless cold starts
const CACHE_TTL = 15 * 60 * 1000; // 15 minutes in milliseconds
const cache = new Map<string, { data: string[]; timestamp: number }>();
/**
* Get available free model IDs with optional filtering and sorting.
* Default sort is 'contextLength' (largest context window first).
* Default maxErrorRate is undefined (no filtering), timeRange is '24h', myReports is false.
*/
export async function getModelIds(
useCase?: UseCase[],
sort: Sort = 'contextLength',
topN?: number,
options?: {
cache?: CacheMode;
maxErrorRate?: number;
timeRange?: TimeRange;
myReports?: boolean;
}
): Promise<string[]> {
// Sort useCase array for deterministic cache keys (avoid fragmentation)
const normalizedUseCase = useCase ? [...useCase].sort() : undefined;
// Generate cache key from normalized params
const cacheKey = JSON.stringify({
useCase: normalizedUseCase,
sort,
topN,
maxErrorRate: options?.maxErrorRate,
timeRange: options?.timeRange,
myReports: options?.myReports,
});
const cached = cache.get(cacheKey);
const cacheMode = options?.cache ?? 'default';
// Return cached data if fresh and cache is enabled
if (cacheMode === 'default' && cached && Date.now() - cached.timestamp < CACHE_TTL) {
return cached.data;
}
// Fetch fresh data
try {
const params = new URLSearchParams({ sort });
if (normalizedUseCase) params.set('useCase', normalizedUseCase.join(','));
if (topN) params.set('topN', String(topN));
if (options?.maxErrorRate !== undefined) {
params.set('maxErrorRate', String(options.maxErrorRate));
}
if (options?.timeRange) {
params.set('timeRange', options.timeRange);
}
if (options?.myReports) {
params.set('myReports', 'true');
}
const { ids } = await fetch(`${API}/models/ids?${params}`, {
headers: { Authorization: `Bearer ${API_KEY}` },
}).then((r) => r.json());
// Store in cache
cache.set(cacheKey, { data: ids, timestamp: Date.now() });
return ids;
} catch (error) {
// Fall back to stale cache if available (resilient to API outages)
if (cached) {
// Only log in development to avoid serverless noise
if (process.env.NODE_ENV !== 'production') {
console.warn('API request failed, using stale cached data', error);
}
return cached.data;
}
throw error;
}
}
// Report issues to help improve model health data.
// This does NOT count towards your rate limit - you're contributing!
export function reportIssue(
modelId: string,
issue: 'error' | 'rate_limited' | 'unavailable',
details?: string
) {
fetch(`${API}/models/feedback`, {
method: 'POST',
headers: {
'Authorization': `Bearer ${API_KEY}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({ modelId, issue, details }),
}).catch(() => {}); // Fire-and-forget, don't block on errors
}
// Report successful model usage to improve health metrics.
// This does NOT count towards your rate limit - you're contributing!
export function reportSuccess(modelId: string, details?: string) {
fetch(`${API}/models/feedback`, {
method: 'POST',
headers: {
'Authorization': `Bearer ${API_KEY}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({ modelId, success: true, details }),
}).catch(() => {}); // Fire-and-forget, don't block on errors
}
// Helper: detect issue type from HTTP status code
export function issueFromStatus(status: number): 'rate_limited' | 'unavailable' | 'error' {
if (status === 429) return 'rate_limited';
if (status === 503) return 'unavailable';
return 'error';
}
Use It
This is the exact `getModelIds` call for your current use case, sort, and top N.
// This is how you fetch free model IDs
getModelIds([], 'contextLength', 5, { maxErrorRate: 20, timeRange: '1h' })Loop through models until one succeeds. Free models may be rate-limited, so we try multiple and optionally fall back to stable models you trust. See Code Examples for more patterns.
// 1. Fetch free models and try each until one succeeds
try {
const freeModels = await getModelIds([], 'contextLength', 5, { maxErrorRate: 20, timeRange: '1h' });
// 2. (Optional) Add stable fallback models you trust (usually paid)
const stableFallback = ['anthropic/claude-3.5-sonnet'];
const models = [...freeModels, ...stableFallback];
// 3. Try models until one succeeds
for (const id of models) {
try {
const res = await client.chat.completions.create({ model: id, messages });
reportSuccess(id); // Helps improve health metrics
return res;
} catch (e) {
const status = e.status || e.response?.status;
reportIssue(id, issueFromStatus(status), e.message); // Helps improve health metrics
}
}
} catch {
// API unavailable - fall back to hardcoded models
// E.g. return await client.chat.completions.create({ model: 'anthropic/claude-3.5-sonnet', messages });
}
throw new Error('All models failed');Query Parameters
Customize your requests by combining these parameters. All parameters are optional and can be mixed and matched.
useCase
Select models by use case. Pass one or more as a comma-separated list: ?useCase=vision,tools
| Value | Description |
|---|---|
| chat | Text-to-text models optimized for conversation |
| vision | Models that accept image inputs |
| tools | Models that support function/tool calling |
| longContext | Models with 100k+ token context windows |
| reasoning | Models with advanced reasoning capabilities (e.g., o1, QwQ, DeepSeek R1) |
sort
Control the order models are returned. This determines fallback priority when iterating through the list. Example: ?sort=contextLength
| Value | Label | Description |
|---|---|---|
| contextLength | Context Length | Largest context window first - best for long documents |
| maxOutput | Max Output | Highest output token limit first - best for long-form generation |
| capable | Most Capable | Most supported features first - good default |
| leastIssues | Least Reported Issues | Fewest user-reported issues first - best for stability |
| newest | Newest First | Most recently added models first - best for trying new models |
topN
Return only the top N models based on sort order. Range: 1-100. Default: unlimited. Example: ?topN=10
maxErrorRate
Exclude models with error rate above this percentage (0-100). Error rate = errors / (errors + successes). Example: ?maxErrorRate=20 excludes models with more than 20% error rate.
timeRange
Time window for calculating error rates. Options: 15m, 30m, 1h, 6h, 24h, 7d, 30d, all. Default: 1h.
myReports
When set to true, calculate error rates from only your own reported issues instead of all community reports. Requires API key authentication. Default: false. Example: ?myReports=true
API Reference
Complete reference for all available endpoints. See Query Parameters for parameter details.
/api/v1/models/ids
Lightweight endpoint returning only model IDs. Fast and small payload - use this in production.
Query Parameters
| Parameter | Type | Description |
|---|---|---|
| useCase | string | Comma-separated: chat, vision, tools, longContext, reasoning |
| sort | string | One of: contextLength, maxOutput, capable, leastIssues, newest |
| topN | number | Return top N models based on sort order (1-100) |
| maxErrorRate | number | Exclude models with error rate above this percentage (0-100) |
| timeRange | string | Time window for error rates: 15m, 30m, 1h, 6h, 24h, 7d, 30d. Default: 1h. |
| myReports | boolean | If true, calculate error rates from only your own reports (requires API key). Default: false. |
Response
| Field | Type | Description |
|---|---|---|
| ids | string[] | Array of model IDs |
| count | number | Number of IDs returned |
Errors
500- Server error
Cache-Control: private, max-age=60 - Responses are cached for 60 seconds at the HTTP layer and 15 minutes in the SDK.
Request
Required to send requests.
curl https://free-LLM-router.pages.dev/api/v1/models/ids?sort=contextLength&topN=5&maxErrorRate=20&timeRange=1h \
-H "Authorization: Bearer YOUR_API_KEY" \Response
{
"ids": [
"google/gemini-2.0-flash-exp:free",
"meta-llama/llama-3.3-70b-instruct:free",
"deepseek/deepseek-chat:free"
],
"count": 15
}/api/v1/models/full
Full model objects with metadata, feedback counts, and timestamps. Use for browsing or debugging.
Query Parameters
Same parameters as /models/ids: useCase, sort, topN, maxErrorRate, timeRange, and myReports.
See /models/ids documentation above for parameter details.
Response
| Field | Type | Description |
|---|---|---|
| models | Model[] | Full model objects with all metadata |
| feedbackCounts | object | Per-model feedback: issue counts, success count, and error rate (percentage). Error rate shows % of failed requests. |
| lastUpdated | string | ISO 8601 timestamp of last sync |
| useCases | string[] | Applied use case values |
| sort | string | Applied sort value |
| count | number | Total number of models returned |
Cache-Control: private, max-age=60 - Responses are cached for 60 seconds at the HTTP layer and 15 minutes in the SDK.
Request
Required to send requests.
curl https://free-LLM-router.pages.dev/api/v1/models/full?sort=contextLength&topN=5&maxErrorRate=20&timeRange=1h \
-H "Authorization: Bearer YOUR_API_KEY" \Response
{
"models": [
{
"id": "google/gemini-2.0-flash-exp:free",
"name": "Gemini 2.0 Flash",
"contextLength": 1000000,
"maxCompletionTokens": 8192,
"description": "...",
"inputModalities": ["text", "image"],
"outputModalities": ["text"],
"supportedParameters": ["tools", "reasoning"]
}
],
"feedbackCounts": { ... },
"lastUpdated": "2024-12-29T10:00:00Z",
"filters": ["vision"],
"sort": "contextLength",
"count": 15
}/api/v1/models/feedback
Report model feedback: successes or issues (rate limiting, errors, unavailability). Does not count towards your rate limit.
Request Body
| Parameter | Type | Required | Description |
|---|---|---|---|
| modelId | string | Yes | The model ID to report |
| success | boolean | No | Set to true to report successful request. If omitted, reports an issue (requires issue field). |
| issue | string | Yes | Required if success is false/omitted. One of: rate_limited, unavailable, error |
| details | string | No | Optional description of the issue |
| dryRun | boolean | No | If true, validates request but doesn't save (for testing) |
Response
| Field | Type | Description |
|---|---|---|
| received | boolean | Whether feedback was recorded |
Errors
400- Missing modelId or invalid issue type500- Server error
Request
Required to send requests.
curl -X POST https://free-LLM-router.pages.dev/api/v1/models/feedback \
-H "Authorization: Bearer YOUR_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"modelId": "google/gemini-2.0-flash-exp:free",
"success": true,
"dryRun": true
}'Response
{ "received": true }Code Examples
Ready-to-use patterns for common use cases.
One-off API Call
Simple single prompt completion - perfect for scripts, CLI tools, or serverless functions.
import { getModelIds, reportSuccess, reportIssue, issueFromStatus } from './free-llm-router';
const prompt = 'Summarize this article in 3 bullet points: ...';
try {
// Get top 3 models with both chat and vision capabilities
// SDK has built-in 15-min cache, so this won't hit the API on every call
const models = await getModelIds(['chat', 'vision'], 'capable', 3);
// Try each model until one succeeds
for (const id of models) {
try {
const res = await fetch('https://openrouter.ai/api/v1/chat/completions', {
method: 'POST',
headers: {
'Authorization': `Bearer ${process.env.OPENROUTER_API_KEY}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: id,
messages: [{ role: 'user', content: prompt }],
}),
});
if (!res.ok) {
// Report the right issue type - free, doesn't use quota
reportIssue(id, issueFromStatus(res.status), `HTTP ${res.status}`);
continue;
}
const data = await res.json();
console.log(data.choices[0].message.content);
// Report success - helps other users know this model works!
reportSuccess(id);
break; // Success - exit loop
} catch (e) {
reportIssue(id, 'error', e.message); // Free - doesn't use quota
}
}
} catch {
// API unavailable - handle gracefully
console.error('Failed to fetch models');
}Chatbot
Multi-turn conversation with message history - ideal for chat interfaces.
import { getModelIds, reportSuccess, reportIssue, issueFromStatus } from './free-llm-router';
import OpenAI from 'openai';
// OpenAI SDK works with OpenRouter's API
const client = new OpenAI({
baseURL: 'https://openrouter.ai/api/v1',
apiKey: process.env.OPENROUTER_API_KEY,
});
// Store conversation history for multi-turn chat
const messages: OpenAI.ChatCompletionMessageParam[] = [];
async function chat(userMessage: string) {
messages.push({ role: 'user', content: userMessage });
try {
// SDK has built-in 15-min cache, so this won't hit the API on every call
const models = await getModelIds(['chat'], 'capable', 5);
for (const id of models) {
try {
const res = await client.chat.completions.create({
model: id,
messages, // Include full history
});
const reply = res.choices[0].message.content;
messages.push({ role: 'assistant', content: reply });
// Report success - helps other users know this model works!
reportSuccess(id);
return reply;
} catch (e) {
// Report with correct issue type - free, doesn't use quota
reportIssue(id, issueFromStatus(e.status), e.message);
}
}
} catch {
// API unavailable
}
throw new Error('All models failed');
}Tool Calling
Let the model call functions - for agents, data fetching, or structured outputs.
import { getModelIds, reportIssue, issueFromStatus } from './free-llm-router';
import { createOpenAI } from '@ai-sdk/openai';
import { generateText, tool } from 'ai';
import { z } from 'zod';
// Vercel AI SDK with OpenRouter
const openrouter = createOpenAI({
baseURL: 'https://openrouter.ai/api/v1',
apiKey: process.env.OPENROUTER_API_KEY,
});
// Define tools with Zod schemas
const tools = {
getWeather: tool({
description: 'Get current weather for a location',
parameters: z.object({ location: z.string() }),
execute: async ({ location }) => `72°F and sunny in ${location}`,
}),
};
async function askWithTools(prompt: string) {
try {
// Filter for models that support tool calling
// SDK has built-in 15-min cache, so this won't hit the API on every call
const models = await getModelIds(['tools'], 'capable', 3);
for (const id of models) {
try {
const { text, toolCalls } = await generateText({
model: openrouter(id),
prompt,
tools,
});
return { text, toolCalls };
} catch (e) {
// Report with correct issue type - free, doesn't use quota
reportIssue(id, issueFromStatus(e.status), e.message);
}
}
} catch {
// API unavailable
}
throw new Error('All models failed');
}