API Security¶

Protect your LLM API with authentication, rate limiting, and access control.

Overview¶

Security measures for exposed LLM APIs:

Authentication - Verify client identity
Rate limiting - Prevent abuse and overload
Access control - Restrict endpoints and models
Logging - Audit access patterns
Encryption - Protect data in transit

Threat Model¶

Threat	Risk	Mitigation
Unauthorized access	High	Authentication
Resource exhaustion	High	Rate limiting
Data exfiltration	Medium	Logging, access control
Prompt injection	Medium	Input validation
Model theft	Low	Endpoint restrictions

Authentication¶

API Key Authentication¶

Using Caddy as reverse proxy:

# Caddyfile
:8080 {
    @valid_key header Authorization "Bearer {$API_KEY}"

    handle @valid_key {
        reverse_proxy localhost:11434
    }

    handle {
        respond "Unauthorized" 401
    }
}

# Set environment variable
export API_KEY=your-secret-key

# Start Caddy
caddy run --config Caddyfile

Basic Authentication¶

:8080 {
    basicauth {
        user1 $2a$14$...  # bcrypt hash
        user2 $2a$14$...
    }
    reverse_proxy localhost:11434
}

Generate password hash:

caddy hash-password
# Enter password when prompted

nginx Basic Auth¶

# /etc/nginx/conf.d/llm.conf
server {
    listen 8080;

    auth_basic "LLM API";
    auth_basic_user_file /etc/nginx/.htpasswd;

    location / {
        proxy_pass http://localhost:11434;
    }
}

# Create password file
sudo htpasswd -c /etc/nginx/.htpasswd user1

Rate Limiting¶

Caddy Rate Limiting¶

Using rate_limit plugin:

{
    order rate_limit before reverse_proxy
}

:8080 {
    rate_limit {
        zone dynamic {
            key {remote_host}
            events 60
            window 1m
        }
    }

    reverse_proxy localhost:11434
}

nginx Rate Limiting¶

limit_req_zone $binary_remote_addr zone=llm_limit:10m rate=10r/s;
limit_conn_zone $binary_remote_addr zone=llm_conn:10m;

server {
    listen 8080;

    # Rate limit requests
    limit_req zone=llm_limit burst=20 nodelay;

    # Limit concurrent connections
    limit_conn llm_conn 5;

    location / {
        proxy_pass http://localhost:11434;
    }
}

Traefik Rate Limiting¶

labels:
  - "traefik.http.middlewares.ratelimit.ratelimit.average=10"
  - "traefik.http.middlewares.ratelimit.ratelimit.burst=20"
  - "traefik.http.middlewares.ratelimit.ratelimit.period=1s"
  - "traefik.http.routers.ollama.middlewares=ratelimit"

Endpoint Restrictions¶

Allow Only Read Endpoints¶

server {
    listen 8080;

    # Allow chat/completion endpoints
    location ~ ^/v1/(chat/completions|completions|models|embeddings)$ {
        proxy_pass http://localhost:11434;
    }

    # Allow health checks
    location /health {
        proxy_pass http://localhost:11434;
    }

    # Block everything else
    location / {
        return 403;
    }
}

Block Administrative Endpoints¶

# Block Ollama management endpoints
location ~ ^/api/(pull|push|delete|copy|create)$ {
    return 403;
}

Caddy Endpoint Filtering¶

:8080 {
    @allowed {
        path /v1/chat/completions
        path /v1/completions
        path /v1/models
        path /v1/embeddings
        path /health
    }

    handle @allowed {
        reverse_proxy localhost:11434
    }

    handle {
        respond "Forbidden" 403
    }
}

Request Validation¶

Size Limits¶

# Limit request body size
client_max_body_size 1m;

# Limit header size
large_client_header_buffers 4 16k;

Timeout Protection¶

# Prevent long-running requests from blocking
proxy_read_timeout 120s;
proxy_connect_timeout 10s;
proxy_send_timeout 60s;

Logging¶

nginx Access Logs¶

log_format llm_json escape=json '{'
    '"time":"$time_iso8601",'
    '"remote_addr":"$remote_addr",'
    '"request":"$request",'
    '"status":"$status",'
    '"body_bytes_sent":"$body_bytes_sent",'
    '"request_time":"$request_time",'
    '"user":"$remote_user"'
'}';

access_log /var/log/nginx/llm_access.log llm_json;

Caddy Logging¶

:8080 {
    log {
        output file /var/log/caddy/llm_access.log
        format json
    }

    reverse_proxy localhost:11434
}

Log Analysis¶

# Recent requests
tail -f /var/log/nginx/llm_access.log | jq .

# Top users
jq -r '.remote_addr' /var/log/nginx/llm_access.log | sort | uniq -c | sort -rn

# Failed requests
jq 'select(.status != "200")' /var/log/nginx/llm_access.log

Full Secure Setup¶

Docker Compose with Caddy¶

version: '3.8'

services:
  caddy:
    image: caddy:latest
    container_name: caddy
    ports:
      - "8080:8080"
    volumes:
      - ./Caddyfile:/etc/caddy/Caddyfile:ro
      - caddy_data:/data
    environment:
      - API_KEY=${API_KEY}
    restart: unless-stopped

  ollama:
    image: ollama/ollama
    container_name: ollama
    volumes:
      - /tank/ai/models/ollama:/root/.ollama
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]
    restart: unless-stopped

volumes:
  caddy_data:

# Caddyfile
:8080 {
    # Logging
    log {
        output file /data/access.log
        format json
    }

    # API Key authentication
    @valid_key header Authorization "Bearer {$API_KEY}"

    # Rate limiting per IP
    @rate_exceeded expression {http.rate_limit.exceeded}

    handle @rate_exceeded {
        respond "Too Many Requests" 429
    }

    handle @valid_key {
        # Only allow safe endpoints
        @allowed {
            path /v1/chat/completions
            path /v1/completions
            path /v1/models
            path /v1/embeddings
            path /health
            path /
        }

        handle @allowed {
            reverse_proxy ollama:11434
        }

        handle {
            respond "Forbidden" 403
        }
    }

    handle {
        respond "Unauthorized" 401
    }
}

nginx Full Configuration¶

# /etc/nginx/conf.d/llm-secure.conf

# Rate limiting zones
limit_req_zone $binary_remote_addr zone=llm_burst:10m rate=10r/s;
limit_conn_zone $binary_remote_addr zone=llm_conn:10m;

# Upstream
upstream ollama {
    server 127.0.0.1:11434;
    keepalive 32;
}

server {
    listen 8080;
    server_name _;

    # Logging
    access_log /var/log/nginx/llm_access.log;
    error_log /var/log/nginx/llm_error.log;

    # Rate limiting
    limit_req zone=llm_burst burst=20 nodelay;
    limit_conn llm_conn 10;

    # Size limits
    client_max_body_size 1m;

    # Timeouts
    proxy_read_timeout 120s;
    proxy_connect_timeout 10s;

    # Basic auth
    auth_basic "LLM API";
    auth_basic_user_file /etc/nginx/.htpasswd;

    # Allowed endpoints
    location ~ ^/v1/(chat/completions|completions|models|embeddings)$ {
        proxy_pass http://ollama;
        proxy_http_version 1.1;
        proxy_set_header Connection "";
        proxy_set_header Host $host;
        proxy_buffering off;  # For streaming
    }

    location /health {
        auth_basic off;  # Allow unauthenticated health checks
        proxy_pass http://ollama;
    }

    location / {
        return 403;
    }
}

Testing Security¶

Test Authentication¶

# Without auth (should fail)
curl -I http://localhost:8080/v1/models
# Expect: 401 Unauthorized

# With auth
curl -H "Authorization: Bearer your-key" http://localhost:8080/v1/models
# Expect: 200 OK

Test Rate Limiting¶

# Send many requests
for i in {1..50}; do
    curl -s -o /dev/null -w "%{http_code}\n" \
         -H "Authorization: Bearer your-key" \
         http://localhost:8080/v1/models
done
# Should see 429 responses after limit

Test Endpoint Restrictions¶

# Try blocked endpoint
curl -H "Authorization: Bearer your-key" \
     -X POST http://localhost:8080/api/pull \
     -d '{"name": "malicious-model"}'
# Expect: 403 Forbidden

Monitoring¶

Failed Auth Attempts¶

# grep for 401 responses
grep '"status":"401"' /var/log/nginx/llm_access.log | wc -l

Rate Limited Requests¶

# grep for 429 responses
grep '"status":"429"' /var/log/nginx/llm_access.log

Unusual Patterns¶

# Large request counts from single IP
awk '{print $1}' /var/log/nginx/llm_access.log | \
    sort | uniq -c | sort -rn | head -20

API Security¶

Overview¶

Threat Model¶

Authentication¶

API Key Authentication¶

Basic Authentication¶

nginx Basic Auth¶

Rate Limiting¶

Caddy Rate Limiting¶

nginx Rate Limiting¶

Traefik Rate Limiting¶

Endpoint Restrictions¶

Allow Only Read Endpoints¶

Block Administrative Endpoints¶

Caddy Endpoint Filtering¶

Request Validation¶

Size Limits¶

Timeout Protection¶

Logging¶

nginx Access Logs¶

Caddy Logging¶

Log Analysis¶

Full Secure Setup¶

Docker Compose with Caddy¶

nginx Full Configuration¶

Testing Security¶

Test Authentication¶

Test Rate Limiting¶

Test Endpoint Restrictions¶

Monitoring¶

Failed Auth Attempts¶

Rate Limited Requests¶

Unusual Patterns¶

See Also¶