Пример #1
0
    def compute_cost_tensor(self, turns_list: List[List[Turn]]) -> np.ndarray:

        N = len(turns_list)
        k = int((N * (N - 1) / 2))
        pairwise_costs = {}

        has_single_speaker = False

        for i, ref_turns in enumerate(turns_list):
            for j, sys_turns in enumerate(turns_list):
                if j <= i:
                    continue
                cost = []
                ref_groups = {
                    key: list(group)
                    for key, group in groupby(ref_turns,
                                              lambda x: x.speaker_id)
                }
                sys_groups = {
                    key: list(group)
                    for key, group in groupby(sys_turns,
                                              lambda x: x.speaker_id)
                }

                if len(ref_groups.keys()) == 1 or len(sys_groups.keys()) == 1:
                    has_single_speaker = True

                for ref_spk_id in sorted(ref_groups.keys()):
                    cur_row = []
                    ref_spk_turns = ref_groups[ref_spk_id]
                    for sys_spk_id in sorted(sys_groups.keys()):
                        sys_spk_turns = sys_groups[sys_spk_id]
                        total_overlap = compute_spk_overlap(
                            ref_spk_turns, sys_spk_turns)
                        cur_row.append(-1 * total_overlap)
                    cost.append(cur_row)

                new_axis = list(range(N))
                new_axis.remove(i)
                new_axis.remove(j)

                # The expand_dims is for easy broadcasting
                pairwise_costs[(i, j)] = np.expand_dims(
                    np.array(cost), axis=tuple(k for k in new_axis))

        if has_single_speaker:
            # iterate and add since numpy cannot broadcast with 2 dummy dimensions
            vals = list(pairwise_costs.values())
            cost_tensor = vals[0]
            for val in vals[1:]:
                cost_tensor = np.add(cost_tensor, val)
        else:
            # otherwise use broadcasting
            cost_tensor = np.sum(list(pairwise_costs.values()))
        return cost_tensor, pairwise_costs
Пример #2
0
    def __map_hungarian(ref_turns: List[Turn],
                        sys_turns: List[Turn]) -> Dict[Tuple[int, int], int]:
        """
        Use Hungarian algorithm for label mapping for 2 system special case.
        """
        cost_matrix = []
        ref_groups = {
            key: list(group)
            for key, group in groupby(ref_turns, lambda x: x.speaker_id)
        }
        sys_groups = {
            key: list(group)
            for key, group in groupby(sys_turns, lambda x: x.speaker_id)
        }
        for ref_spk_id in sorted(ref_groups.keys()):
            cur_row = []
            ref_spk_turns = ref_groups[ref_spk_id]
            for sys_spk_id in sorted(sys_groups.keys()):
                sys_spk_turns = sys_groups[sys_spk_id]
                total_overlap = _compute_spk_overlap(ref_spk_turns,
                                                     sys_spk_turns)
                cur_row.append(-1 * total_overlap)
            cost_matrix.append(cur_row)

        cost_matrix = np.array(cost_matrix)
        row_ind, col_ind = linear_sum_assignment(cost_matrix)

        # Keep track of remaining row or col indices
        row_indices_remaining = list(range(len(cost_matrix)))
        col_indices_remaining = list(range(len(cost_matrix[0])))
        label_mapping = {}

        for i in range(len(row_ind)):
            label_mapping[(0, row_ind[i])] = i
            row_indices_remaining.remove(row_ind[i])
            label_mapping[(1, col_ind[i])] = i
            col_indices_remaining.remove(col_ind[i])

        next_label = i + 1

        # Assign labels to remaining row indices
        while len(row_indices_remaining) != 0:
            label_mapping[(0, row_indices_remaining[0])] = next_label
            next_label += 1
            del row_indices_remaining[0]

        # Assign labels to remaining col indices
        while len(col_indices_remaining) != 0:
            label_mapping[(1, col_indices_remaining[0])] = next_label
            next_label += 1
            del col_indices_remaining[0]

        return label_mapping
Пример #3
0
    def __map_pair(
        self, ref_turns: List[Turn], sys_turns: List[Turn]
    ) -> Dict[Tuple[int, str], int]:
        ref_groups = {
            key: list(group)
            for key, group in groupby(ref_turns, lambda x: x.speaker_id)
        }
        sys_groups = {
            key: list(group)
            for key, group in groupby(sys_turns, lambda x: x.speaker_id)
        }
        ref_keys = sorted(ref_groups.keys())
        sys_keys = sorted(sys_groups.keys())
        M, N = len(ref_keys), len(sys_keys)
        cost_matrix = np.zeros((M, N))
        for i, ref_spk_id in enumerate(ref_keys):
            cur_row = []
            ref_spk_turns = ref_groups[ref_spk_id]
            for j, sys_spk_id in enumerate(sys_keys):
                sys_spk_turns = sys_groups[sys_spk_id]
                total_overlap = compute_spk_overlap(ref_spk_turns, sys_spk_turns)
                cost_matrix[i, j] = -1 * total_overlap

        row_ind, col_ind = linear_sum_assignment(cost_matrix)

        # Keep track of remaining row or col indices
        row_indices_remaining = list(range(M))
        col_indices_remaining = list(range(N))
        label_mapping = {}

        for i in range(len(row_ind)):
            label_mapping[(0, ref_keys[row_ind[i]])] = i
            row_indices_remaining.remove(row_ind[i])
            label_mapping[(1, sys_keys[col_ind[i]])] = i
            col_indices_remaining.remove(col_ind[i])

        next_label = i + 1

        # Assign labels to remaining row indices
        while len(row_indices_remaining) != 0:
            label_mapping[(0, ref_keys[row_indices_remaining[0]])] = next_label
            next_label += 1
            del row_indices_remaining[0]

        # Assign labels to remaining col indices
        while len(col_indices_remaining) != 0:
            label_mapping[(1, sys_keys[col_indices_remaining[0]])] = next_label
            next_label += 1
            del col_indices_remaining[0]

        return label_mapping
Пример #4
0
 def __validate_global_mapping(self) -> bool:
     for i, turns in enumerate(self.sorted_turns_list):
         groups = {
             key: list(group)
             for key, group in groupby(turns, lambda x: x.speaker_id)
         }
         for spk in groups:
             if (i, spk) not in self.global_mapping:
                 return False
     return True
Пример #5
0
def main(
        input_rttms: List[click.Path],
        output_rttm: click.Path,
        uem_file: click.Path,
        channel: int,
        random_seed: int,
        **kwargs,  # these are passed directly to combine_turns_list() method
) -> None:
    """Apply the DOVER-Lap algorithm on the input RTTM files."""

    # Set random seeds globally
    random.seed(random_seed)
    np.random.seed(random_seed)

    # Load hypothesis speaker turns.
    info("Loading speaker turns from input RTTMs...", file=sys.stderr)
    turns_list = load_rttms(input_rttms)

    if uem_file is not None:
        info("Loading universal evaluation map...", file=sys.stderr)
        uem = load_uem(uem_file)

        # Trim turns to UEM scoring regions and merge any that overlap.
        info(
            "Trimming reference speaker turns to UEM scoring regions...",
            file=sys.stderr,
        )
        turns_list = [trim_turns(turns, uem) for turns in turns_list]

    info("Merging overlapping speaker turns...", file=sys.stderr)
    turns_list = [merge_turns(turns) for turns in turns_list]

    file_to_turns_list = dict()
    for turns in turns_list:
        for fid, g in groupby(turns, lambda x: x.file_id):
            if fid in file_to_turns_list:
                file_to_turns_list[fid].append(list(g))
            else:
                file_to_turns_list[fid] = [list(g)]

    # Run DOVER-Lap algorithm
    file_to_out_turns = dict()
    for file_id in file_to_turns_list:
        info("Processing file {}..".format(file_id), file=sys.stderr)
        turns_list = file_to_turns_list[file_id]
        random.shuffle(
            turns_list
        )  # We shuffle so that the hypothesis order is randomized
        file_to_out_turns[file_id] = DOVERLap.combine_turns_list(
            turns_list, file_id, **kwargs)

    # Write output RTTM file
    write_rttm(output_rttm,
               sum(list(file_to_out_turns.values()), []),
               channel=channel)
Пример #6
0
def get_speaker_keys(
        turns_list: List[List[Turn]]) -> Dict[Tuple[int, int], str]:
    """
    Returns a dictionary which maps a file id (relative) and speaker id (relative)
    to absolute speaker id.
    """
    speakers_dict = {}
    for i, turns in enumerate(turns_list):
        turn_groups = {
            key: list(group)
            for key, group in groupby(turns, lambda x: x.speaker_id)
        }
        for j, key in enumerate(sorted(turn_groups.keys())):
            speakers_dict[(i, j)] = key
    return speakers_dict
Пример #7
0
    def get_mapped_turns_list(
        cls,
        turns_list: List[List[Turn]],
        file_id: str,
        method: Optional[str] = "greedy",
        sort_first: Optional[bool] = False,
        second_maximal: Optional[bool] = False,
    ) -> List[List[Turn]]:
        """
        This function takes turns list from all RTTMs and applies an n-dimensional
        matching approximation algorithm to map all to a common label space.
        """

        if (len(turns_list) == 2) or (method == "hungarian"):
            # We replace the original turns list with one sorted by average DER
            hungarian_map = HungarianMap(sort_first=sort_first)
            label_mapping, weights = hungarian_map.compute_mapping(turns_list)
            turns_list = hungarian_map.sorted_turns_list

        elif method == "greedy":
            greedy_map = GreedyMap(second_maximal=second_maximal)
            label_mapping, weights = greedy_map.compute_mapping(turns_list)

        # Get mapped speaker labels using the mapping
        mapped_turns_list = []
        for i, turns in enumerate(turns_list):
            spk_groups = {
                key: list(group)
                for key, group in groupby(turns, lambda x: x.speaker_id)
            }
            mapped_turns = []
            for spk_id in spk_groups.keys():
                new_spk_id = label_mapping[(i, spk_id)]
                for turn in spk_groups[spk_id]:
                    mapped_turns.append(
                        Turn(
                            turn.onset,
                            turn.offset,
                            speaker_id=new_spk_id,
                            file_id=file_id,
                        )
                    )
            mapped_turns_list.append(mapped_turns)

        return mapped_turns_list, weights
Пример #8
0
    def get_mapped_turns_list(
            cls,
            turns_list: List[List[Turn]],
            file_id: str,
            run_second_maximal: Optional[bool] = False) -> List[List[Turn]]:
        """
        This function takes turns list from all RTTMs and applies an n-dimensional
        matching approximation algorithm to map all to a common label space.
        """

        N = len(turns_list)  # number of input hypotheses
        if N == 2:
            # if only 2 inputs need to be combined, we use the Hungarian algorithm
            # since it is provably optimal. Also, we assign both the systems
            # equal weight to prevent the voting to be dominated by one method.
            label_mapping = self.__map_hungarian(*turns_list)
            weights = np.array([0.5, 0.5])

        else:
            k = int((N * (N - 1) / 2))
            pairwise_costs = {}

            has_single_speaker = False

            for i, ref_turns in enumerate(turns_list):
                for j, sys_turns in enumerate(turns_list):
                    if j <= i:
                        continue
                    cost = []
                    ref_groups = {
                        key: list(group)
                        for key, group in groupby(ref_turns,
                                                  lambda x: x.speaker_id)
                    }
                    sys_groups = {
                        key: list(group)
                        for key, group in groupby(sys_turns,
                                                  lambda x: x.speaker_id)
                    }

                    if len(ref_groups.keys()) == 1 or len(
                            sys_groups.keys()) == 1:
                        has_single_speaker = True

                    for ref_spk_id in sorted(ref_groups.keys()):
                        cur_row = []
                        ref_spk_turns = ref_groups[ref_spk_id]
                        for sys_spk_id in sorted(sys_groups.keys()):
                            sys_spk_turns = sys_groups[sys_spk_id]
                            total_overlap = cls.__compute_spk_overlap(
                                ref_spk_turns, sys_spk_turns)
                            cur_row.append(-1 * total_overlap)
                        cost.append(cur_row)

                    new_axis = list(range(N))
                    new_axis.remove(i)
                    new_axis.remove(j)

                    # The expand_dims is for easy broadcasting
                    pairwise_costs[(i, j)] = np.expand_dims(
                        np.array(cost), axis=tuple(k for k in new_axis))

            if has_single_speaker:
                # iterate and add since numpy cannot broadcast with 2 dummy dimensions
                vals = list(pairwise_costs.values())
                cost_tensor = vals[0]
                for val in vals[1:]:
                    cost_tensor = np.add(cost_tensor, val)
            else:
                # otherwise use broadcasting
                cost_tensor = np.sum(list(pairwise_costs.values()))

            # The weight of each hypothesis is computed by computing its total
            # overlap with all other hypotheses
            weights = np.array([0] * N, dtype=float)
            for i in range(N):
                cur_pairwise_costs = [
                    np.squeeze(x) for x in pairwise_costs.values()
                    if x.shape[i] != 1
                ]
                weights[i] = -1 * sum([np.sum(x) for x in cur_pairwise_costs])

            label_mapping = cls.__apply_maximal_matching(
                cost_tensor, run_second_maximal)

        # Get mapped speaker labels using the mapping
        mapped_turns_list = []
        for i, turns in enumerate(turns_list):
            spk_groups = {
                key: list(group)
                for key, group in groupby(turns, lambda x: x.speaker_id)
            }
            mapped_turns = []
            for j, spk_id in enumerate(spk_groups.keys()):
                new_spk_id = label_mapping[(i, j)]
                for turn in spk_groups[spk_id]:
                    mapped_turns.append(
                        Turn(
                            turn.onset,
                            turn.offset,
                            speaker_id=new_spk_id,
                            file_id=file_id,
                        ))
            mapped_turns_list.append(mapped_turns)

        ranks = cls.__get_ranks(weights)
        return mapped_turns_list, ranks