Source code for multivoice.lib.stt_speaker_mapping

# src/multivoice/lib/stt_speaker_mapping.py

import nltk



[docs]
def get_word_ts_anchor(s, e, option="start"):
    """
    Determine the anchor time for a word based on the specified option.

    Args:
        s (int): Start time of the word in milliseconds.
        e (int): End time of the word in milliseconds.
        option (str): The option to determine the anchor time ('start', 'mid', or 'end').

    Returns:
        int: The anchor time for the word based on the option.
    """
    if option == "end":
        return e
    elif option == "mid":
        return (s + e) / 2
    return s




[docs]
def get_words_speaker_mapping(wrd_ts, spk_ts, word_anchor_option="start"):
    """
    Map each word to its corresponding speaker based on time segments.

    Args:
        wrd_ts (list): A list of dictionaries containing word timestamps and text.
        spk_ts (list): A list of lists containing speaker start and end times and speaker IDs.
        word_anchor_option (str): The option to determine the anchor time for words ('start', 'mid', or 'end').

    Returns:
        list: A list of dictionaries mapping each word to its speaker, with timestamps and text.
    """
    s, e, sp = spk_ts[0]
    wrd_pos, turn_idx = 0, 0
    wrd_spk_mapping = []
    for wrd_dict in wrd_ts:
        ws, we, wrd = (
            int(wrd_dict["start"] * 1000),
            int(wrd_dict["end"] * 1000),
            wrd_dict["text"],
        )
        wrd_pos = get_word_ts_anchor(ws, we, word_anchor_option)
        while wrd_pos > float(e):
            turn_idx += 1
            turn_idx = min(turn_idx, len(spk_ts) - 1)
            s, e, sp = spk_ts[turn_idx]
            if turn_idx == len(spk_ts) - 1:
                e = get_word_ts_anchor(ws, we, option="end")
        wrd_spk_mapping.append(
            {"word": wrd, "start_time": ws, "end_time": we, "speaker": sp}
        )
    return wrd_spk_mapping




[docs]
def get_sentences_speaker_mapping(word_speaker_mapping, spk_ts):
    """
    Group words into sentences and map each sentence to its speaker.

    Args:
        word_speaker_mapping (list): A list of dictionaries mapping words to speakers with timestamps.
        spk_ts (list): A list of lists containing speaker start and end times and speaker IDs.

    Returns:
        list: A list of dictionaries, each representing a sentence with the speaker, start time, end time, and text.
    """
    sentence_checker = nltk.tokenize.PunktSentenceTokenizer().text_contains_sentbreak
    s, e, spk = spk_ts[0]
    prev_spk = spk

    snts = []
    snt = {"speaker": f"Speaker {spk}", "start_time": s, "end_time": e, "text": ""}

    for wrd_dict in word_speaker_mapping:
        wrd, spk = wrd_dict["word"], wrd_dict["speaker"]
        s, e = wrd_dict["start_time"], wrd_dict["end_time"]
        if spk != prev_spk or sentence_checker(snt["text"] + " " + wrd):
            snts.append(snt)
            snt = {
                "speaker": f"Speaker {spk}",
                "start_time": s,
                "end_time": e,
                "text": "",
            }
        else:
            snt["end_time"] = e
        snt["text"] += wrd + " "
        prev_spk = spk

    snts.append(snt)
    return snts




[docs]
def get_speaker_aware_transcript(sentences_speaker_mapping, f):
    """
    Write a speaker-aware transcript to the provided file object.

    Args:
        sentences_speaker_mapping (list): A list of dictionaries representing sentences with speakers and timestamps.
        f (file object): The file object where the transcript will be written.
    """
    previous_speaker = sentences_speaker_mapping[0]["speaker"]
    f.write(f"{previous_speaker}: ")

    for sentence_dict in sentences_speaker_mapping:
        speaker = sentence_dict["speaker"]
        sentence = sentence_dict["text"]

        # If this speaker doesn't match the previous one, start a new paragraph
        if speaker != previous_speaker:
            f.write(f"\n\n{speaker}: ")
            previous_speaker = speaker

        # No matter what, write the current sentence
        f.write(sentence + " ")