Generate a spectrogram from an audio file with Praat/Parselmouth
For giggles? Mostly generated
!pip install praat-parselmouth
import librosa
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import wavfile
from scipy.signal import spectrogram as scipy_spectrogram, windows, lfilter
import parselmouth
import os
def generate_python_spectrogram(
sample_rate,
samples,
output_path,
freq_range=(0, 5000),
window_length=0.005,
dynamic_range=50,
dynamic_compression=0.3
):
"""
Generates a Praat-like spectrogram using pure Python libraries.
"""
if samples.ndim > 1:
samples = samples.mean(axis=1)
# Pre-emphasis
pre_emphasis_from = 50
k = np.exp(-2 * np.pi * pre_emphasis_from / sample_rate)
emphasized_samples = lfilter([1, -k], 1, samples)
# Spectrogram Calculation
nperseg = int(window_length * sample_rate)
hop_length = int(0.001 * sample_rate) # 1ms time step
noverlap = nperseg - hop_length
window = windows.gaussian(nperseg, std=nperseg / 8)
frequencies, times, Sxx = scipy_spectrogram(
emphasized_samples, fs=sample_rate, window=window,
nperseg=nperseg, noverlap=noverlap, scaling='density'
)
# Power, dB Conversion, and Dynamic Range
Sxx_db = 10 * np.log10(np.maximum(Sxx, 1e-10))
max_db = np.percentile(Sxx_db, 99.9)
min_db = max_db - dynamic_range
clipped_Sxx_db = np.clip(Sxx_db, min_db, max_db)
# Dynamic Compression
normalized_Sxx = (clipped_Sxx_db - min_db) / dynamic_range
compressed_Sxx = normalized_Sxx ** (1 - dynamic_compression)
# Plotting
fig, ax = plt.subplots(figsize=(14, 7))
img = ax.imshow(
compressed_Sxx, interpolation='bilinear', origin='lower',
aspect='auto', extent=(times.min(), times.max(), frequencies.min(), frequencies.max()),
cmap='gray_r'
)
ax.set_ylim(freq_range)
ax.set_xlabel("Time (s)")
ax.set_ylabel("Frequency (Hz)")
ax.set_title("Python-Generated Phonetic Spectrogram")
plt.tight_layout()
plt.savefig(output_path, dpi=300)
plt.close()
print(f"Python spectrogram saved to {output_path}")
def generate_praat_spectrogram(
audio_path,
output_path,
freq_range=(0, 5000),
window_length=0.005,
dynamic_range=50
):
"""
Generates a spectrogram using the Praat engine via Parselmouth.
"""
# Save the loaded audio as a temporary WAV file for Praat
temp_wav_path = "temp_audio.wav"
# Assuming 'samples' and 'sample_rate' are available in the global scope
wavfile.write(temp_wav_path, sample_rate, samples.astype(np.float32)) # Ensure correct dtype for wavfile.write
try:
snd = parselmouth.Sound(temp_wav_path)
# The time_step is 0.001 for a 1000Hz analysis rate
spectrogram = snd.to_spectrogram(
window_length=window_length,
maximum_frequency=freq_range[1], # Added maximum_frequency
time_step=0.001,
window_shape=parselmouth.SpectralAnalysisWindowShape.GAUSSIAN # Corrected: Use enum
)
# Extract data for plotting
X, Y = spectrogram.x_grid(), spectrogram.y_grid()
sg_db = 10 * np.log10(spectrogram.as_array())
# Plotting with the same dynamic range and colormap
fig, ax = plt.subplots(figsize=(14, 7))
# Note: Praat's dynamic range is handled differently in the plotting call
img = ax.pcolormesh(
X, Y, sg_db,
vmin=sg_db.max() - dynamic_range,
cmap='gray_r',
shading='auto'
)
ax.set_ylim(freq_range)
ax.set_xlabel("Time (s)")
ax.set_ylabel("Frequency (Hz)")
ax.set_title("Praat-Generated Spectrogram (via Parselmouth)")
plt.tight_layout()
plt.savefig(output_path, dpi=300)
plt.close()
print(f"Praat spectrogram saved to {output_path}")
finally:
# Clean up the temporary WAV file
if os.path.exists(temp_wav_path):
os.remove(temp_wav_path)
# --- Main Execution ---
# Load the example audio file from Librosa
audio_file_path = librosa.ex('libri1')
samples, sample_rate = librosa.load(audio_file_path, sr=None)
# Define output paths
python_spec_path = "python_libri1_spectrogram.png"
praat_spec_path = "praat_libri1_spectrogram.png"
# Generate both spectrograms
generate_python_spectrogram(sample_rate, samples, python_spec_path)
generate_praat_spectrogram(audio_file_path, praat_spec_path)