Source code for multivoice.register_voice

# src/multivoice/register_voice.py

import argparse
import os
import pickle
import torchaudio
from speechbrain.inference.speaker import SpeakerRecognition


EMBED_FILE = "speaker_vectors.pkl"


[docs] class SpeakerVerifierCLI: def __init__(self): # Load existing speaker embeddings and initialize the speaker recognition model. self.speakers = self.load_embeddings() self.verifier = SpeakerRecognition.from_hparams( source="speechbrain/spkrec-ecapa-voxceleb", savedir="pretrained_models/spkrec-ecapa-voxceleb", )
[docs] def load_embeddings(self): """ Loads speaker embeddings from a file if it exists. Returns: dict: A dictionary of user IDs mapped to their speaker embeddings. Returns an empty dictionary if the file does not exist. """ if os.path.exists(EMBED_FILE): with open(EMBED_FILE, "rb") as f: return pickle.load(f) return {}
[docs] def save_embeddings(self): """ Saves the current speaker embeddings to a file. """ with open(EMBED_FILE, "wb") as f: pickle.dump(self.speakers, f)
[docs] def extract_embedding(self, audio_path): """ Extracts a speaker embedding from an audio file. Args: audio_path (str): The path to the audio file. Returns: numpy.ndarray or None: A numpy array containing the speaker embedding, or None if extraction fails. """ try: signal, fs = torchaudio.load(audio_path) if signal.shape[1] < 32000: print("Voice is too short, at least 2 seconds required.") return None if fs != 16000: signal = torchaudio.functional.resample(signal, fs, 16000) emb = self.verifier.encode_batch(signal).squeeze(0).detach().cpu().numpy() except Exception as e: print(f"Error extracting embedding: {e}") emb = None return emb
[docs] def register_voice(self, file_path, user_id): """ Registers a new speaker by extracting their voice embedding and storing it. Args: file_path (str): The path to the audio file for registration. user_id (str): The user ID for the speaker being registered. """ emb = self.extract_embedding(file_path) if emb is not None: self.speakers[user_id] = emb self.save_embeddings() print(f"Registered speaker: {user_id}") else: print("Failed to extract speaker embedding.")
[docs] def main(): """ Main function to register a new voice/file using command line arguments. """ parser = argparse.ArgumentParser(description="Register a New Voice/File") parser.add_argument( "file_path", metavar="FILE_PATH", help="Path to the audio file for registration", ) parser.add_argument( "user_id", metavar="USER_ID", help="User ID for the speaker being registered", ) args = parser.parse_args() verifier_cli = SpeakerVerifierCLI() verifier_cli.register_voice(args.file_path, args.user_id)
if __name__ == "__main__": main()