Пример #1
0
def ctc_beam_search_decoder(probs_seq,
                            vocabulary,
                            beam_size,
                            cutoff_prob=1.0,
                            cutoff_top_n=40,
                            blank_id=0,
                            ext_scoring_func=None):
    """集束搜索解码器

    :param probs_seq: 单个2-D概率分布列表,每个元素是词汇表和空白上的标准化概率列表
    :type probs_seq: 2-D list
    :param vocabulary: 词汇列表
    :type vocabulary: list
    :param beam_size: 集束搜索宽度
    :type beam_size: int
    :param cutoff_prob: 剪枝中的截断概率,默认1.0,没有剪枝
    :type cutoff_prob: float
    :param cutoff_top_n: 剪枝时的截断数,仅在词汇表中具有最大probs的cutoff_top_n字符用于光束搜索,默认为40
    :type cutoff_top_n: int
    :param blank_id 空白索引
    :type blank_id int
    :param ext_scoring_func: 外部评分功能部分解码句子,如字计数或语言模型
    :type external_scoring_func: callable
    :return: 解码结果为log概率和句子的元组列表,按概率降序排列
    :rtype: list
    """
    beam_results = swig_decoders.ctc_beam_search_decoder(
        probs_seq.tolist(), vocabulary, beam_size, cutoff_prob, cutoff_top_n,
        blank_id, ext_scoring_func)
    beam_results = [(res[0], res[1]) for res in beam_results]
    return beam_results
def ctc_beam_search_decoder(probs_seq,
                            vocabulary,
                            beam_size,
                            cutoff_prob=1.0,
                            cutoff_top_n=40,
                            ext_scoring_func=None):
    """Wrapper for the CTC Beam Search Decoder.

    :param probs_seq: 2-D list of probability distributions over each time
                      step, with each element being a list of normalized
                      probabilities over vocabulary and blank.
    :type probs_seq: 2-D list
    :param vocabulary: Vocabulary list.
    :type vocabulary: list
    :param beam_size: Width for beam search.
    :type beam_size: int
    :param cutoff_prob: Cutoff probability in pruning,
                        default 1.0, no pruning.
    :type cutoff_prob: float
    :param cutoff_top_n: Cutoff number in pruning, only top cutoff_top_n
                         characters with highest probs in vocabulary will be
                         used in beam search, default 40.
    :type cutoff_top_n: int
    :param ext_scoring_func: External scoring function for
                             partially decoded sentence, e.g. word count
                             or language model.
    :type external_scoring_func: callable
    :return: List of tuples of log probability and sentence as decoding
             results, in descending order of the probability.
    :rtype: list
    """
    beam_results = swig_decoders.ctc_beam_search_decoder(
        probs_seq.tolist(), vocabulary, beam_size, cutoff_prob, cutoff_top_n,
        ext_scoring_func)
    beam_results = [(res[0], res[1]) for res in beam_results]
    # python2.7
    # beam_results = [(res[0], res[1].decode('utf-8')) for res in beam_results]
    return beam_results