def _build_tree_from_nps(tokens, nps): ''' build nltk Tree from tokens and nps tokens: list of tokens nps: list of noun phrases ''' tokens = [t.lower() for t in tokens] result = [] list_np_tokens = [] for np in nps: list_np_tokens.append(np.split()) # build nested list # logging.info(list_np_tokens) while len(list_np_tokens)>0: nps_tokens = list_np_tokens.pop(0) s_index = subsequence(nps_tokens, tokens) result.extend(tokens[:s_index]) result.append(nps_tokens) tokens = tokens[s_index+len(nps_tokens):] result.extend(tokens) tree_list = [] for ele in result: if isinstance(ele, str): tree_list.append(ele) else: tree_list.append(Tree('NP', ele)) np_chunk = Tree('S', tree_list) return np_chunk