Пример #1
0
 def _insertfull(iterable):
     for w in iterable:
         w = Dictionary.normalize(w)
         for c in w:
             c = Dictionary.normalize(c)
             if valid_chars and c not in valid_chars:
                 continue
             chars.add(c)
Пример #2
0
 def _insert(iterable):
     for cs in iterable:
         for c in cs:
             c = Dictionary.normalize(c)
             if valid_chars and c not in valid_chars:
                 continue
             chars.add(c)
Пример #3
0
def index_embedding_words(embedding_file):
    """Put all the words in embedding_file into a set."""
    words = set()
    with open(embedding_file) as f:
        for line in f:
            w = Dictionary.normalize(line.rstrip().split(' ')[0])
            words.add(w)
    return words
Пример #4
0
def index_embedding_chars(char_embedding_file):
    """Put all the chars in char_embedding_file into a set."""
    chars = set()
    with open(char_embedding_file) as f:
        for line in f:
            c = Dictionary.normalize(line.rstrip().split(' ')[0])
            chars.add(c)
    return chars
Пример #5
0
def top_question_words(args, examples, word_dict):
    """Count and return the most common question words in provided examples."""
    word_count = Counter()
    for ex in examples:
        for w in ex['question']:
            w = Dictionary.normalize(w)
            if w in word_dict:
                word_count.update([w])
    return word_count.most_common(args.tune_partial)
Пример #6
0
def index_embedding_words(embedding_file):
    """Put all the words in embedding_file into a set."""
    words = set()
    counter = 0
    try:
        with open(embedding_file, encoding="utf-8") as f:
            for line in f:
                counter += 1
                w = Dictionary.normalize(line.rstrip().split(' ')[0])
                words.add(w)
    except:
        print("An exception occurred on the " + counter + " word")

    return words
Пример #7
0
 def _insert(iterable):
     for w in iterable:
         w = Dictionary.normalize(w)
         if valid_words and w not in valid_words:
             continue
         words.add(w)