# src/multivoice/register_voice.py
import argparse
import os
import pickle
import torchaudio
from speechbrain.inference.speaker import SpeakerRecognition
EMBED_FILE = "speaker_vectors.pkl"
[docs]
class SpeakerVerifierCLI:
def __init__(self):
# Load existing speaker embeddings and initialize the speaker recognition model.
self.speakers = self.load_embeddings()
self.verifier = SpeakerRecognition.from_hparams(
source="speechbrain/spkrec-ecapa-voxceleb",
savedir="pretrained_models/spkrec-ecapa-voxceleb",
)
[docs]
def load_embeddings(self):
"""
Loads speaker embeddings from a file if it exists.
Returns:
dict: A dictionary of user IDs mapped to their speaker embeddings.
Returns an empty dictionary if the file does not exist.
"""
if os.path.exists(EMBED_FILE):
with open(EMBED_FILE, "rb") as f:
return pickle.load(f)
return {}
[docs]
def save_embeddings(self):
"""
Saves the current speaker embeddings to a file.
"""
with open(EMBED_FILE, "wb") as f:
pickle.dump(self.speakers, f)
[docs]
def register_voice(self, file_path, user_id):
"""
Registers a new speaker by extracting their voice embedding and storing it.
Args:
file_path (str): The path to the audio file for registration.
user_id (str): The user ID for the speaker being registered.
"""
emb = self.extract_embedding(file_path)
if emb is not None:
self.speakers[user_id] = emb
self.save_embeddings()
print(f"Registered speaker: {user_id}")
else:
print("Failed to extract speaker embedding.")
[docs]
def main():
"""
Main function to register a new voice/file using command line arguments.
"""
parser = argparse.ArgumentParser(description="Register a New Voice/File")
parser.add_argument(
"file_path",
metavar="FILE_PATH",
help="Path to the audio file for registration",
)
parser.add_argument(
"user_id",
metavar="USER_ID",
help="User ID for the speaker being registered",
)
args = parser.parse_args()
verifier_cli = SpeakerVerifierCLI()
verifier_cli.register_voice(args.file_path, args.user_id)
if __name__ == "__main__":
main()