audio = client.tts.generate(
text="Hello, this is a test of the KugelAudio text-to-speech system.",
model_id="kugel-3", # Canonical production model (see /models)
voice_id=1071, # Optional: specific voice ID
cfg_scale=2.0, # Guidance scale (1.0-5.0)
temperature=None, # Sampling variance 0.0-1.0; None = server default (~0.5)
max_new_tokens=2048, # Maximum tokens to generate
sample_rate=24000, # Output sample rate
normalize=True, # Enable text normalization (default)
language="en", # Language for normalization (see /sdks/python/normalization)
word_timestamps=False, # Request word-level timestamps (default: False)
speed=1.0, # Playback speed 0.8-1.2 (pitch-preserving WSOLA)
)
# Audio properties
print(f"Duration: {audio.duration_seconds:.2f}s")
print(f"Samples: {audio.samples}")
print(f"Sample rate: {audio.sample_rate} Hz")
print(f"Generation time: {audio.generation_ms:.0f}ms")
print(f"RTF: {audio.rtf:.2f}") # Real-time factor
# Save to WAV file
audio.save("output.wav")
# Get raw PCM bytes
pcm_data = audio.audio
# Get WAV bytes (with header)
wav_bytes = audio.to_wav_bytes()
# Get float32 samples in [-1.0, 1.0]
samples = audio.to_float32()
# Save raw PCM instead of WAV
audio.save("output.pcm", format="raw")