Пример #1
0
def extract_process(texts):
    list_ = []
    trie = _global['trie']
    for text in texts:
        if not text: continue
        list_ += extract_new_string(trie, text)
    return list_
Пример #2
0
def doc_extract(texts):
    map_ = {}
    trie = _global['trie']
    words_set = _global['words_set']
    for text in texts:
        doc = DOCID_RE.search(text)
        if not doc:continue
        words = extract_new_string(trie, text)
        for word in words:
            if (not map_.get(word)) and word in words_set:
                map_[word]=doc.group()
    return map_