Source code for multivoice.stt

# src/rp/stt.py

import logging
import os
import sys
import tempfile

# Disable progress bar noise by default
disable_tqdm = True
for arg in sys.argv:
    if arg == "--debug" or arg == "-D" or arg == "--verbose" or arg == "-v":
        disable_tqdm = False
        break

if disable_tqdm:
    os.environ["TQDM_DISABLE"] = "1"

# Crufty workaround for disabling tqdm and logging level...
import speechbrain

# Configure the basic logging settings
logging.basicConfig(
    level=logging.ERROR, format="%(asctime)s - %(levelname)s - %(message)s"
)


[docs] def setup_logging(args): """ Set up logging based on command line arguments. Args: args (argparse.Namespace): The parsed command line arguments. """ if args.debug: log_level = logging.DEBUG elif args.verbose > 0: log_level = logging.INFO else: log_level = logging.ERROR # Reconfigure logging with the appropriate level logging.basicConfig( level=log_level, format="%(asctime)s - %(levelname)s - %(message)s" ) logging.disable(log_level)
[docs] def main(): """ Main function to execute the speech-to-text process. Parses arguments, sets up logging, processes audio, writes outputs, and cleans up. """ # Crufty workaround because disabling tqdm needs to be before these imports from multivoice.lib.stt_args import parse_arguments from multivoice.lib.stt_process_audio import process_audio from multivoice.lib.stt_write_outputs import write_outputs, cleanup args = parse_arguments() setup_logging(args) # Create a temporary directory for storing intermediate files temp_path = tempfile.mkdtemp(prefix="stt_") try: # Process the audio file and get speech segments metadata (ssm) ssm = process_audio(args, temp_path) # Write the outputs based on the processed data write_outputs(ssm, args) finally: logging.debug("Cleanup") cleanup(temp_path) # Clean up temporary files
if __name__ == "__main__": main()