Understand API rate limits, quotas, and best practices for handling limits gracefully

Rate Limiting

The invoice management system API implements rate limiting to ensure fair usage and maintain system performance. This guide explains the rate limiting rules, how to monitor your usage, and best practices for handling limits.

Rate Limit Overview

Default Limits

Requests per minute: 1,000 requests
Requests per hour: 10,000 requests
Requests per day: 100,000 requests

Premium Tier Limits

Organizations with premium tiers may have higher limits:

Premium: 5,000 requests/minute, 50,000 requests/hour
Enterprise: 10,000 requests/minute, 100,000 requests/hour

Rate Limit Headers

Every API response includes rate limit information in the headers:

X-RateLimit-Limit: 1000
X-RateLimit-Remaining: 999
X-RateLimit-Reset: 2024-01-15T15:30:00Z
X-RateLimit-Window: 60

Header Descriptions

Header	Description
`X-RateLimit-Limit`	Maximum requests allowed in the current window
`X-RateLimit-Remaining`	Number of requests remaining in current window
`X-RateLimit-Reset`	Time when the rate limit window resets
`X-RateLimit-Window`	Duration of the rate limit window in seconds

Rate Limit Exceeded Response

When you exceed the rate limit, you'll receive a 429 Too Many Requests response:

{
  "error": "Rate Limit Exceeded",
  "message": "Too many requests. Please try again later.",
  "details": {
    "retry_after": 60,
    "limit": 1000,
    "remaining": 0,
    "reset_time": "2024-01-15T15:30:00Z",
    "window": 60
  }
}

The response also includes a Retry-After header indicating how long to wait before making another request.

Monitoring Rate Limits

JavaScript Example

class RateLimitMonitor {
  constructor() {
    this.limits = {};
  }
  
  updateFromResponse(response) {
    this.limits = {
      limit: parseInt(response.headers.get('X-RateLimit-Limit')),
      remaining: parseInt(response.headers.get('X-RateLimit-Remaining')),
      reset: new Date(response.headers.get('X-RateLimit-Reset')),
      window: parseInt(response.headers.get('X-RateLimit-Window'))
    };
  }
  
  shouldWait() {
    return this.limits.remaining <= 10; // Buffer of 10 requests
  }
  
  getWaitTime() {
    if (!this.limits.reset) return 0;
    return Math.max(0, this.limits.reset.getTime() - Date.now());
  }
  
  getUsagePercentage() {
    if (!this.limits.limit) return 0;
    return ((this.limits.limit - this.limits.remaining) / this.limits.limit) * 100;
  }
}

// Usage
const monitor = new RateLimitMonitor();

async function makeApiRequest(url, options) {
  const response = await fetch(url, options);
  monitor.updateFromResponse(response);
  
  console.log(`Rate limit usage: ${monitor.getUsagePercentage().toFixed(1)}%`);
  
  if (monitor.shouldWait()) {
    console.warn('Approaching rate limit. Consider slowing down requests.');
  }
  
  return response;
}

Python Example

import time
import requests
from datetime import datetime

class RateLimitMonitor:
    def __init__(self):
        self.limit = None
        self.remaining = None
        self.reset = None
        self.window = None
    
    def update_from_response(self, response):
        self.limit = int(response.headers.get('X-RateLimit-Limit', 0))
        self.remaining = int(response.headers.get('X-RateLimit-Remaining', 0))
        reset_str = response.headers.get('X-RateLimit-Reset')
        if reset_str:
            self.reset = datetime.fromisoformat(reset_str.replace('Z', '+00:00'))
        self.window = int(response.headers.get('X-RateLimit-Window', 60))
    
    def should_wait(self):
        return self.remaining is not None and self.remaining <= 10
    
    def get_wait_time(self):
        if not self.reset:
            return 0
        return max(0, (self.reset - datetime.now()).total_seconds())
    
    def get_usage_percentage(self):
        if not self.limit:
            return 0
        return ((self.limit - self.remaining) / self.limit) * 100

# Usage
monitor = RateLimitMonitor()

def make_api_request(url, **kwargs):
    response = requests.get(url, **kwargs)
    monitor.update_from_response(response)
    
    print(f"Rate limit usage: {monitor.get_usage_percentage():.1f}%")
    
    if monitor.should_wait():
        print("Approaching rate limit. Consider slowing down requests.")
    
    return response

Handling Rate Limits

Exponential Backoff

Implement exponential backoff when rate limits are exceeded:

async function requestWithBackoff(url, options, maxRetries = 3) {
  for (let attempt = 0; attempt < maxRetries; attempt++) {
    try {
      const response = await fetch(url, options);
      
      if (response.status === 429) {
        const errorData = await response.json();
        const retryAfter = errorData.details?.retry_after || 60;
        const backoffTime = Math.min(retryAfter * 1000, Math.pow(2, attempt) * 1000);
        
        console.log(`Rate limited. Waiting ${backoffTime}ms before retry ${attempt + 1}/${maxRetries}`);
        await new Promise(resolve => setTimeout(resolve, backoffTime));
        continue;
      }
      
      return response;
    } catch (error) {
      if (attempt === maxRetries - 1) throw error;
      
      const backoffTime = Math.pow(2, attempt) * 1000;
      console.log(`Request failed. Retrying in ${backoffTime}ms...`);
      await new Promise(resolve => setTimeout(resolve, backoffTime));
    }
  }
}

Request Queuing

Implement a request queue to manage API calls:

class ApiRequestQueue {
  constructor(requestsPerSecond = 10) {
    this.queue = [];
    this.processing = false;
    this.interval = 1000 / requestsPerSecond; // ms between requests
    this.lastRequestTime = 0;
  }
  
  async enqueue(requestFn) {
    return new Promise((resolve, reject) => {
      this.queue.push({ requestFn, resolve, reject });
      this.processQueue();
    });
  }
  
  async processQueue() {
    if (this.processing || this.queue.length === 0) return;
    
    this.processing = true;
    
    while (this.queue.length > 0) {
      const now = Date.now();
      const timeSinceLastRequest = now - this.lastRequestTime;
      
      if (timeSinceLastRequest < this.interval) {
        await new Promise(resolve => 
          setTimeout(resolve, this.interval - timeSinceLastRequest)
        );
      }
      
      const { requestFn, resolve, reject } = this.queue.shift();
      
      try {
        const result = await requestFn();
        resolve(result);
      } catch (error) {
        reject(error);
      }
      
      this.lastRequestTime = Date.now();
    }
    
    this.processing = false;
  }
}

// Usage
const apiQueue = new ApiRequestQueue(15); // 15 requests per second

async function queuedApiRequest(url, options) {
  return apiQueue.enqueue(() => fetch(url, options));
}

Best Practices

1. Monitor Usage Proactively

function setupRateLimitAlerts(monitor) {
  setInterval(() => {
    const usage = monitor.getUsagePercentage();
    
    if (usage > 80) {
      console.warn(`High rate limit usage: ${usage.toFixed(1)}%`);
      // Send alert to monitoring system
    }
    
    if (usage > 95) {
      console.error(`Critical rate limit usage: ${usage.toFixed(1)}%`);
      // Implement emergency throttling
    }
  }, 30000); // Check every 30 seconds
}

2. Implement Caching

Reduce API calls by caching responses:

class ApiCache {
  constructor(ttl = 300000) { // 5 minutes default TTL
    this.cache = new Map();
    this.ttl = ttl;
  }
  
  get(key) {
    const item = this.cache.get(key);
    if (!item) return null;
    
    if (Date.now() > item.expiry) {
      this.cache.delete(key);
      return null;
    }
    
    return item.data;
  }
  
  set(key, data) {
    this.cache.set(key, {
      data,
      expiry: Date.now() + this.ttl
    });
  }
  
  clear() {
    this.cache.clear();
  }
}

// Usage
const cache = new ApiCache();

async function cachedApiRequest(url, options) {
  const cacheKey = `${url}-${JSON.stringify(options)}`;
  const cached = cache.get(cacheKey);
  
  if (cached) {
    console.log('Returning cached response');
    return cached;
  }
  
  const response = await fetch(url, options);
  const data = await response.json();
  
  cache.set(cacheKey, data);
  return data;
}

3. Batch Requests When Possible

Instead of making multiple individual requests, batch them when the API supports it:

// Instead of multiple individual requests
const users = await Promise.all([
  fetchUser('user-1'),
  fetchUser('user-2'),
  fetchUser('user-3')
]);

// Use pagination to get multiple items in one request
const usersResponse = await fetch('/api/integrations/users?per_page=100');
const users = usersResponse.data;

4. Use Webhooks for Real-time Updates

Instead of polling for changes, use webhooks when available:

// Avoid frequent polling
setInterval(async () => {
  const updates = await fetchUpdates();
  processUpdates(updates);
}, 30000); // Every 30 seconds

// Use webhooks instead
app.post('/webhook/updates', (req, res) => {
  const updates = req.body;
  processUpdates(updates);
  res.status(200).send('OK');
});

Rate Limit Strategies by Use Case

High-Volume Data Sync

For applications that need to sync large amounts of data:

class DataSyncer {
  constructor(apiClient, batchSize = 50) {
    this.apiClient = apiClient;
    this.batchSize = batchSize;
  }
  
  async syncUsers() {
    let page = 1;
    let hasMore = true;
    
    while (hasMore) {
      const response = await this.apiClient.getUsers({
        page,
        per_page: this.batchSize
      });
      
      await this.processUsers(response.data);
      
      hasMore = page < response.meta.last_page;
      page++;
      
      // Add delay between batches to respect rate limits
      if (hasMore) {
        await new Promise(resolve => setTimeout(resolve, 1000));
      }
    }
  }
}

Real-time Applications

For applications that need real-time updates:

class RealTimeSync {
  constructor(apiClient) {
    this.apiClient = apiClient;
    this.updateQueue = [];
    this.processing = false;
  }
  
  queueUpdate(update) {
    this.updateQueue.push(update);
    this.processUpdates();
  }
  
  async processUpdates() {
    if (this.processing) return;
    this.processing = true;
    
    while (this.updateQueue.length > 0) {
      const batch = this.updateQueue.splice(0, 10); // Process 10 at a time
      
      await Promise.all(
        batch.map(update => this.apiClient.updateUser(update))
      );
      
      // Small delay between batches
      if (this.updateQueue.length > 0) {
        await new Promise(resolve => setTimeout(resolve, 100));
      }
    }
    
    this.processing = false;
  }
}

Troubleshooting Rate Limit Issues

Common Issues and Solutions

Sudden rate limit spikes
- Check for infinite loops in your code
- Verify that retry logic isn't causing cascading requests
- Monitor for concurrent request patterns
Inconsistent rate limiting
- Rate limits are per API key, not per application instance
- Multiple servers using the same API key share the limit
- Consider using different API keys for different services
Rate limits lower than expected
- Check your organization's premium tier status
- Verify API key permissions and associated limits
- Contact support if limits seem incorrect

Next Steps

Error Handling - Handle rate limit errors properly
Code Examples - See complete implementation examples
Authentication - Understand API key management

Rate Limiting

On this page