# src/rp/stt.py
import logging
import os
import sys
import tempfile
# Disable progress bar noise by default
disable_tqdm = True
for arg in sys.argv:
if arg == "--debug" or arg == "-D" or arg == "--verbose" or arg == "-v":
disable_tqdm = False
break
if disable_tqdm:
os.environ["TQDM_DISABLE"] = "1"
# Crufty workaround for disabling tqdm and logging level...
import speechbrain
# Configure the basic logging settings
logging.basicConfig(
level=logging.ERROR, format="%(asctime)s - %(levelname)s - %(message)s"
)
[docs]
def setup_logging(args):
"""
Set up logging based on command line arguments.
Args:
args (argparse.Namespace): The parsed command line arguments.
"""
if args.debug:
log_level = logging.DEBUG
elif args.verbose > 0:
log_level = logging.INFO
else:
log_level = logging.ERROR
# Reconfigure logging with the appropriate level
logging.basicConfig(
level=log_level, format="%(asctime)s - %(levelname)s - %(message)s"
)
logging.disable(log_level)
[docs]
def main():
"""
Main function to execute the speech-to-text process.
Parses arguments, sets up logging, processes audio, writes outputs, and cleans up.
"""
# Crufty workaround because disabling tqdm needs to be before these imports
from multivoice.lib.stt_args import parse_arguments
from multivoice.lib.stt_process_audio import process_audio
from multivoice.lib.stt_write_outputs import write_outputs, cleanup
args = parse_arguments()
setup_logging(args)
# Create a temporary directory for storing intermediate files
temp_path = tempfile.mkdtemp(prefix="stt_")
try:
# Process the audio file and get speech segments metadata (ssm)
ssm = process_audio(args, temp_path)
# Write the outputs based on the processed data
write_outputs(ssm, args)
finally:
logging.debug("Cleanup")
cleanup(temp_path) # Clean up temporary files
if __name__ == "__main__":
main()