Source code for multivoice.lib.stt_args

# src/multivoice/lib/stt_args.py

import argparse
from torch import cuda
from multivoice.lib.stt_langs import whisper_langs
from multivoice._version import __version__



[docs]
def parse_arguments():
    """
    Parse command-line arguments for the Speech-to-Text (STT) module.

    This function sets up and parses command-line arguments using Python's argparse library.
    It defines various options such as audio file, model name, language, device, and more,
    which are essential for configuring the behavior of the STT system.

    Returns:
        Namespace: An object containing all the parsed arguments.
    """
    parser = argparse.ArgumentParser()

    parser.add_argument(
        "-a", "--audio", help="Name of the source audio file", required=True
    )

    parser.add_argument(
        "--batch-size",
        type=int,
        dest="batch_size",
        default=8,
        help="Batch size for batched inference, reduce if you run out of memory, "
        "set to 0 for original whisper longform inference (default: 8)",
    )

    parser.add_argument(
        "-D",
        "--debug",
        help="Debugging",
        action="store_true",
    )

    parser.add_argument(
        "--device",
        dest="device",
        default="cuda" if cuda.is_available() else "cpu",
        help="if you have a GPU use 'cuda', otherwise 'cpu' (default: cuda, if available)",
    )

    parser.add_argument(
        "--language",
        type=str,
        default=None,
        choices=whisper_langs,
        help="Language spoken in the audio, specify None to perform language detection (default: None)",
    )

    parser.add_argument(
        "--model",
        dest="model_name",
        default="large-v3",
        help="Name of the Whisper model to use (default: large-v3)",
    )

    parser.add_argument(
        "-o",
        "--output",
        dest="output_dir",
        default=".",
        type=str,
        help="Output directory for the final .txt and .srt files (default: audio file directory)",
    )

    parser.add_argument(
        "--no-stem",
        action="store_false",
        dest="stemming",
        default=True,
        help="Disables source separation."
        "This helps with long files that don't contain a lot of music.",
    )

    parser.add_argument(
        "--suppress_numerals",
        action="store_true",
        dest="suppress_numerals",
        default=False,
        help="Suppresses Numerical Digits."
        "This helps the diarization accuracy but converts all digits into written text.",
    )

    parser.add_argument(
        "-v",
        "--verbose",
        help="Increase output verbosity",
        action="count",
        default=0,
    )

    parser.add_argument(
        "-V",
        "--version",
        help="Show version",
        action="version",
        version=f"{__version__}",
    )

    args = parser.parse_args()
    return args