Пример #1
0
def _build_tree_from_nps(tokens, nps):
    '''
    build nltk Tree from tokens and nps

    tokens: list of tokens
    nps: list of noun phrases
    '''
    tokens = [t.lower() for t in tokens]
    result = []
    list_np_tokens = []
    for np in nps:
        list_np_tokens.append(np.split())
    # build nested list
    # logging.info(list_np_tokens)
    while len(list_np_tokens)>0:
        nps_tokens = list_np_tokens.pop(0)
        s_index = subsequence(nps_tokens, tokens)

        result.extend(tokens[:s_index])
        result.append(nps_tokens)

        tokens = tokens[s_index+len(nps_tokens):]

    result.extend(tokens)

    tree_list = []
    for ele in result:
        if isinstance(ele, str):
            tree_list.append(ele)
        else:
            tree_list.append(Tree('NP', ele))
    np_chunk = Tree('S', tree_list)
    return np_chunk