Skip to main content

Error Handling

import { KugelAudio } from 'kugelaudio';
import {
  KugelAudioError,
  AuthenticationError,
  RateLimitError,
  InsufficientCreditsError,
  ValidationError,
  NotFoundError,
  ConnectionError,
} from 'kugelaudio';

try {
  const audio = await client.tts.generate({ text: 'Hello!' });
} catch (error) {
  if (error instanceof AuthenticationError) {
    console.error('Invalid API key');
  } else if (error instanceof RateLimitError) {
    console.error('Rate limit exceeded, please wait');
  } else if (error instanceof InsufficientCreditsError) {
    console.error('Not enough credits, please top up');
  } else if (error instanceof NotFoundError) {
    console.error('Voice, model, or dictionary not found');
  } else if (error instanceof ValidationError) {
    console.error(`Invalid request: ${error.message}`);
  } else if (error instanceof ConnectionError) {
    console.error('Failed to connect to server');
  } else if (error instanceof KugelAudioError) {
    console.error(`API error: ${error.message}`);
  }
}
All errors extend KugelAudioError and carry a machine-readable code. The package also exports the ErrorCodes and WsCloseCodes constant maps for matching specific codes when you need finer-grained handling than the classes above.

KugelAudioOptions

type Region = 'eu' | 'us' | 'global';

interface KugelAudioOptions {
  apiKey: string;                   // Required (can be prefixed with 'eu-' for EU)
  isMasterKey?: boolean;            // Treat apiKey as a master key (server-side)
  isToken?: boolean;                // Treat apiKey as a JWT token
  orgId?: number;                   // Org to bill against (token auth)
  region?: Region;                  // 'eu' selects the direct EU endpoint
  apiUrl?: string;                  // Default: https://api.kugelaudio.com
  ttsUrl?: string;                  // Default: same as apiUrl
  timeout?: number;                 // Default: 60000 (ms)
  keepalivePingInterval?: number | null;  // Default: 20000 (ms); 0/null disables
}

GenerateOptions

interface GenerateOptions {
  text: string;              // Required: Text to synthesize
  modelId?: string;          // Default: 'kugel-3'
  voiceId?: number;          // Optional: Voice ID
  cfgScale?: number;         // Default: 2.0
  temperature?: number;      // Sampling variance 0.0-1.0; default ~0.5
  maxNewTokens?: number;     // Default: 2048
  sampleRate?: number;       // Default: 24000
  outputFormat?: string;     // 'pcm_24000' | 'ulaw_8000' | 'alaw_8000' | ...
  normalize?: boolean;       // Default: true - Enable text normalization
  language?: string;         // ISO 639-1 code for normalization (e.g., 'en', 'de')
  wordTimestamps?: boolean;  // Default: false - Request word-level timestamps
  speed?: number;            // Playback speed 0.8-1.2 (default 1.0); pitch-preserving
  projectId?: number;        // Project-scoped features (master-key callers)
  dictionaryIds?: number[];  // Per-request dictionary selection: omit = all
                             // active dictionaries, [] = none, list = exactly
                             // those (incl. inactive), bypassing language filter
}
Using normalize: true without language may cause incorrect normalizations. Always specify language when you know it.

AudioChunk

interface AudioChunk {
  audio: string;       // Base64-encoded PCM16 audio
  encoding: string;    // 'pcm_s16le' | 'mulaw' | 'alaw' (G.711 when outputFormat set)
  index: number;       // Chunk index (0-based)
  sampleRate: number;  // Sample rate (24000)
  samples: number;     // Number of samples in chunk
}

WordTimestamp

interface WordTimestamp {
  word: string;      // The word
  startMs: number;   // Start time in milliseconds
  endMs: number;     // End time in milliseconds
  charStart: number; // Character start index in original text
  charEnd: number;   // Character end index in original text
  score: number;     // Alignment confidence score (0.0 - 1.0)
}

AudioResponse

interface AudioResponse {
  audio: ArrayBuffer;              // Complete PCM16 audio
  sampleRate: number;              // Sample rate (24000)
  samples: number;                 // Total samples
  durationMs: number;              // Duration in milliseconds
  generationMs: number;            // Generation time in milliseconds
  rtf: number;                     // Real-time factor
  wordTimestamps: WordTimestamp[];  // Per-word timing (when wordTimestamps: true)
}

GenerationStats

interface GenerationStats {
  final: true;
  chunks: number;         // Number of chunks generated
  totalSamples: number;   // Total samples generated
  durationMs: number;     // Audio duration in ms
  generationMs: number;   // Generation time in ms
  rtf: number;           // Real-time factor
  usage?: SessionUsage;  // Per-request usage (audio time + charge); undefined if not reported
}

SessionUsage

Per-conversation usage for billing your own customers. Available on StreamingSession.lastUsage (per session), MultiContextSession.usageFor(...) and the onContextClosed callback (per context), and GenerationStats.usage (per one-shot stream() request).
interface SessionUsage {
  audioSeconds: number;    // Audio generated (the unit we bill on)
  costCents: number | null; // Actual charge in EUR cents; null if undetermined
  currency?: string;       // Currency of costCents ('eur'); set only when costCents is
  characters?: number;     // Input characters; omitted on multi-context per-context usage
  modelId?: string;        // Model that produced the audio
  costAvailable: boolean;  // true when an authoritative charge was returned
}
costCents is null (and costAvailable is false) when the charge cannot be determined at session end — e.g. a transient billing error or an internal session. It is never a misleading 0. audioSeconds is always reported.

StreamCallbacks

Used with the one-shot client.tts.stream() endpoint:
interface StreamCallbacks {
  onOpen?: () => void;
  onChunk?: (chunk: AudioChunk) => void;
  onWordTimestamps?: (timestamps: WordTimestamp[]) => void;
  onFinal?: (stats: GenerationStats) => void;
  onError?: (error: Error) => void;
  onClose?: () => void;
}

StreamConfig

Configuration for client.tts.streamingSession() (LLM integration endpoint):
interface StreamConfig {
  voiceId?: number;
  modelId?: string;           // Default: 'kugel-3'
  cfgScale?: number;
  temperature?: number;       // Sampling variance 0.0-1.0; default ~0.5
  maxNewTokens?: number;
  sampleRate?: number;
  outputFormat?: string;      // Combined codec + rate token
  flushTimeoutMs?: number;
  maxBufferLength?: number;
  normalize?: boolean;
  language?: string;          // ISO 639-1 code — specify to avoid auto-detect latency
  wordTimestamps?: boolean;
  /**
   * Minimum buffer sizes (chars) before each successive chunk is auto-emitted.
   * Smaller = lower TTFA; larger = better prosody context.
   * Default: [5, 80, 150, 250]
   */
  chunkLengthSchedule?: number[];
  /**
   * When true, start generating at the very first clean sentence boundary.
   * Equivalent to ElevenLabs auto_mode=true. Lowest possible TTFA.
   */
  autoMode?: boolean;
  speed?: number;             // Playback speed 0.8-1.2 (default 1.0); pitch-preserving
  dictionaryIds?: number[];   // Per-session dictionary selection (see GenerateOptions)
}

StreamingSessionCallbacks

interface StreamingSessionCallbacks {
  onChunk?: (chunk: AudioChunk) => void;
  onChunkComplete?: (chunkId: number, audioSeconds: number, genMs: number) => void;
  // End of audio for the turn (ElevenLabs isFinal equivalent) — fires after
  // the last audio frame, right before onSessionClosed. Not on barge-in.
  onFinal?: (totalAudioSeconds: number, totalTextChunks: number, totalAudioChunks: number) => void;
  onSessionClosed?: (totalAudioSeconds: number, totalTextChunks: number, totalAudioChunks: number) => void;
  onGenerationStarted?: (chunkId: number, text: string) => void;
  onWordTimestamps?: (timestamps: WordTimestamp[]) => void;
  onError?: (error: Error) => void;
}

Model

interface Model {
  id: string;             // e.g. 'kugel-3'
  name: string;           // Human-readable name
  description: string;    // Model description
  parameters: string;     // Parameter-count label (e.g. '7B')
  maxInputLength: number; // Maximum input characters
  sampleRate: number;     // Output sample rate
}

VoiceListResponse

Paginated response from voices.list():
interface VoiceListResponse {
  voices: Voice[];   // Voices on this page
  total: number;     // Total number of matching voices
  limit: number;     // Page size used
  offset: number;    // Offset used
}

Voice

type VoiceCategory = 'premade' | 'cloned' | 'designed' | 'conversational' | 'narrative' | 'narrative_story' | 'characters';
type VoiceSex = 'male' | 'female' | 'neutral';
type VoiceAge = 'young' | 'middle_aged' | 'old';
type VoiceQuality = 'low' | 'mid' | 'high';

interface Voice {
  id: number;                    // Voice ID
  name: string;                  // Voice name
  description?: string;          // Description
  category?: VoiceCategory;
  sex?: VoiceSex;
  age?: VoiceAge;
  supportedLanguages: string[];  // ['en', 'de', ...]
  sampleText?: string;           // Text used for sample generation
  avatarUrl?: string;            // Avatar image URL
  sampleUrl?: string;            // Sample audio URL
  isPublic: boolean;
  verified: boolean;
}

VoiceDetail

Extended voice information (returned by create, update, get, publish, generateSample):
interface VoiceDetail {
  id: number;
  name: string;
  description: string;
  generativeVoiceDescription: string;
  supportedLanguages: string[];
  category: string;
  age?: string;
  sex?: string;
  quality: string;                 // 'low' | 'mid' | 'high'
  isPublic: boolean;
  verified: boolean;
  pendingVerification: boolean;
  sampleUrl?: string;
  avatarUrl?: string;
  sampleText: string;
}

VoiceReference

interface VoiceReference {
  id: number;
  voiceId: number;
  name: string;
  referenceText: string;
  s3Path: string;
  audioUrl?: string;
  isGenerated: boolean;
}

CreateVoiceOptions

interface CreateVoiceOptions {
  name: string;
  sex: string;
  description?: string;
  category?: string;
  age?: string;
  quality?: string;
  supportedLanguages?: string[];
  isPublic?: boolean;
  sampleText?: string;
  referenceFiles?: Array<File | Blob>;
}

UpdateVoiceOptions

interface UpdateVoiceOptions {
  name?: string;
  description?: string;
  category?: string;
  age?: string;
  sex?: string;
  quality?: string;
  supportedLanguages?: string[];
  isPublic?: boolean;
  sampleText?: string;
}

Dictionary types live on the Dictionaries page; multi-context types live on the Streaming Sessions page.