def extract_process(texts): list_ = [] trie = _global['trie'] for text in texts: if not text: continue list_ += extract_new_string(trie, text) return list_
def doc_extract(texts): map_ = {} trie = _global['trie'] words_set = _global['words_set'] for text in texts: doc = DOCID_RE.search(text) if not doc:continue words = extract_new_string(trie, text) for word in words: if (not map_.get(word)) and word in words_set: map_[word]=doc.group() return map_