What is Sample Rate?
The sample rate (or sampling rate) is the number of audio samples captured per second. It is measured in Hz (hertz) or kHz (kilohertz):
| Sample Rate | Typical Use |
|---|---|
| 8,000 Hz | Telephony, voice (minimum quality) |
| 22,050 Hz | Legacy web multimedia |
| 44,100 Hz | Audio CD, standard music |
| 48,000 Hz | Professional video, DAW, broadcast |
| 88,200 Hz | High-resolution mastering |
| 96,000 Hz | Studio audio, Hi-Res |
| 192,000 Hz | SACD, high-resolution recording |
The Nyquist theorem states that to faithfully reproduce frequencies up to F Hz, we must sample at least at 2F Hz. At 44,100 Hz we reproduce up to 22,050 Hz (above the human hearing limit of ~20 kHz).
Why Convert?
- 44,100 → 48,000 Hz: for video projects (most video DAWs work at 48 kHz)
- 48,000 → 44,100 Hz: export audio for CD or music distribution
- Any → 22,050 Hz: reduce file size for web/mobile apps
- Mass downsampling: prepare datasets for ML/AI (speech recognition, etc.)
Installation
pip install scipy soundfile librosa numpy
For MP3 and compressed formats:
pip install pydub
# macOS: brew install ffmpeg
# Ubuntu: sudo apt install ffmpeg
# Windows: download ffmpeg and add to PATH
Method 1: scipy.signal.resample_poly (Recommended)
resample_poly uses polyphase filters — the best quality/speed tradeoff:
import soundfile as sf
import numpy as np
from scipy import signal
from math import gcd
def convert_sample_rate(input_path, output_path, target_sr):
"""
Convert the sample rate of an audio file.
Supports WAV, FLAC, OGG, AIFF and other lossless formats.
"""
audio, source_sr = sf.read(input_path, always_2d=True)
# always_2d=True → always (samples, channels), even for mono
if source_sr == target_sr:
print(f"Sample rate is already {target_sr} Hz — no conversion needed")
if input_path != output_path:
import shutil; shutil.copy2(input_path, output_path)
return
# Compute irreducible ratio
divisor = gcd(source_sr, target_sr)
up = target_sr // divisor
down = source_sr // divisor
print(f"Converting: {source_sr} Hz → {target_sr} Hz "
f"(ratio {up}/{down})")
# Resample channel by channel
converted_channels = []
for ch in range(audio.shape[1]):
resampled = signal.resample_poly(
audio[:, ch],
up, down,
window=('kaiser', 5.0) # Kaiser window: high quality
)
converted_channels.append(resampled)
audio_out = np.column_stack(converted_channels)
# Clip to prevent overflow distortion
audio_out = np.clip(audio_out, -1.0, 1.0)
sf.write(output_path, audio_out, target_sr)
dur_orig = len(audio) / source_sr
dur_conv = len(audio_out) / target_sr
print(f" Duration: {dur_orig:.3f}s → {dur_conv:.3f}s")
print(f" Saved: {output_path}")
# Common conversions
convert_sample_rate('recording_44k.wav', 'recording_48k.wav', 48000)
convert_sample_rate('music_48k.flac', 'music_44k.flac', 44100)
convert_sample_rate('podcast_44k.wav', 'podcast_22k.wav', 22050)
convert_sample_rate('hires_96k.wav', 'cd_44k.wav', 44100)
Method 2: librosa (Best for Music Analysis)
import librosa
import soundfile as sf
import numpy as np
def resample_with_librosa(input_path, output_path, target_sr):
"""
Resample using librosa — ideal when you already have audio
loaded for music analysis.
"""
audio, source_sr = librosa.load(input_path, sr=None, mono=False)
# sr=None → preserves original sample rate
if source_sr == target_sr:
print("Sample rate already correct")
return
if audio.ndim == 1:
audio_out = librosa.resample(audio, orig_sr=source_sr,
target_sr=target_sr)
else:
channels = []
for ch in audio:
channels.append(librosa.resample(ch, orig_sr=source_sr,
target_sr=target_sr))
audio_out = np.array(channels)
if audio_out.ndim > 1:
sf.write(output_path, audio_out.T, target_sr)
else:
sf.write(output_path, audio_out, target_sr)
print(f"librosa: {source_sr}→{target_sr} Hz → {output_path}")
Batch Conversion
from pathlib import Path
def batch_convert_sr(directory, target_sr, output_format='wav',
pattern='*.wav', output_dir=None):
"""
Convert all audio files in a directory to the target sample rate.
"""
folder = Path(directory)
files = list(folder.glob(pattern))
if output_dir:
dest = Path(output_dir)
dest.mkdir(parents=True, exist_ok=True)
else:
dest = folder
results = {'converted': 0, 'already_correct': 0, 'errors': 0}
print(f"Processing {len(files)} files → {target_sr} Hz")
for file in sorted(files):
try:
info = sf.info(str(file))
src_sr = info.samplerate
out_name = dest / f"{file.stem}_{target_sr}hz.{output_format}"
if src_sr == target_sr:
print(f" ✓ (no change) {file.name}")
results['already_correct'] += 1
continue
audio, _ = sf.read(str(file), always_2d=True)
div = gcd(src_sr, target_sr)
up = target_sr // div
down = src_sr // div
channels = []
for ch in range(audio.shape[1]):
channels.append(signal.resample_poly(
audio[:, ch], up, down,
window=('kaiser', 5.0)
))
audio_out = np.clip(np.column_stack(channels), -1.0, 1.0)
sf.write(str(out_name), audio_out, target_sr)
print(f" ✓ {src_sr}→{target_sr} Hz {file.name}")
results['converted'] += 1
except Exception as e:
print(f" ✗ Error in {file.name}: {e}")
results['errors'] += 1
print(f"\nResults: {results['converted']} converted, "
f"{results['already_correct']} unchanged, "
f"{results['errors']} errors")
return results
# Examples
# Convert all WAV from 44.1k to 48k for a video project
batch_convert_sr('audio/music/', 48000, pattern='*.wav',
output_dir='audio/48k/')
# Downsample to 16 kHz for a Whisper/ML dataset
batch_convert_sr('recordings/', 16000, pattern='*.flac',
output_dir='dataset_16k/')
Converting MP3 with pydub
from pydub import AudioSegment
from pathlib import Path
def mp3_to_wav_with_sr(input_mp3, output_wav, target_sr=44100):
"""Convert MP3 to WAV at the specified sample rate."""
audio = AudioSegment.from_mp3(input_mp3)
audio.set_frame_rate(target_sr).export(output_wav, format='wav')
print(f"MP3→WAV {target_sr} Hz: {output_wav}")
return output_wav
def normalize_collection_sr(directory, target_sr=44100):
"""
Normalize all MP3/FLAC/WAV files in a directory to the same sample rate.
Useful for preparing ML datasets or audio projects.
"""
folder = Path(directory)
formats = {'.mp3': 'mp3', '.flac': 'flac', '.ogg': 'ogg', '.wav': 'wav'}
for file in folder.iterdir():
if file.suffix.lower() not in formats:
continue
try:
fmt = formats[file.suffix.lower()]
audio = AudioSegment.from_file(str(file), format=fmt)
sr = audio.frame_rate
if sr != target_sr:
out_name = file.stem + f'_{target_sr}hz.wav'
out_path = folder / out_name
audio.set_frame_rate(target_sr).export(str(out_path), format='wav')
print(f" {sr}→{target_sr} Hz: {file.name} → {out_name}")
else:
print(f" ✓ (already {target_sr} Hz): {file.name}")
except Exception as e:
print(f" ✗ Error: {file.name}: {e}")
Resampling Quality Comparison
def compare_resampling_methods(audio, source_sr, target_sr):
"""
Compare quality of different resampling algorithms
using RMS error and dynamic range.
"""
div = gcd(source_sr, target_sr)
up = target_sr // div
down = source_sr // div
# Method 1: resample_poly with Kaiser window (high quality)
kaiser = signal.resample_poly(audio, up, down, window=('kaiser', 5.0))
# Method 2: resample_poly with Hann window (smoother)
hann = signal.resample_poly(audio, up, down, window='hann')
# Method 3: FFT-based resample (slower but accurate)
n_target = int(len(audio) * target_sr / source_sr)
fft_based = signal.resample(audio, n_target)
for name, result in [('Kaiser', kaiser), ('Hann', hann), ('FFT', fft_based)]:
rms = np.sqrt(np.mean(result**2))
print(f" {name:6s}: {len(result):,} samples, RMS={rms:.4f}")
return kaiser # Kaiser usually provides the best quality/speed balance
Sample Rate Selection Guide
| Project Type | Recommended SR | Reason |
|---|---|---|
| Music for Spotify/Apple Music | 44,100 Hz | Distribution standard |
| Video soundtrack | 48,000 Hz | Frame sync (25/30 fps) |
| Podcast/voice | 44,100 or 48,000 Hz | DAW compatibility |
| ML dataset (speech) | 16,000 Hz | Whisper, wav2vec2, BERT audio |
| Game sound effects | 44,100 Hz | DirectX/OpenAL standard |
| Professional broadcast | 48,000 Hz | EBU R68 |
| Audiophile mastering | 96,000+ Hz | Hi-Res Audio |
Conclusion
For sample rate conversion in Python, scipy.signal.resample_poly with Kaiser window offers the best quality/speed balance. librosa is ideal if you're already doing music analysis. For MP3 and compressed formats, pydub (with FFmpeg) simplifies the workflow. Always clip to [-1.0, 1.0] after resampling to prevent overflow distortion, and choose the target sample rate based on the audio's final destination (video, music, ML, or broadcast).
Related conversions
Audio format pairs that come up most often: