Exemplo n.º 1
0
def int_words(words):
    with open('../Model/wordDictionary.pkl', 'rb') as file:
        one_hot_dict = pickle.load(file).one_hot_dict
    str_words = Helper.splitSentence(words)
    int_words = []
    dismiss_cnt = 0
    total_cnt = 0
    for word in str_words:
        total_cnt += 1
        if word not in one_hot_dict:
            int_words.append(0)
            dismiss_cnt += 1
        else:
            int_words.append(one_hot_dict[word])
    int_words = pad_sequence(int_words, Meta.max_string_len)
    Helper.debug('[WARNING] dismiss: %d\ttotal: %d' % (dismiss_cnt, total_cnt))
    return int_words
Exemplo n.º 2
0
 def one_hot(self, filename, label):
     with open(filename, "r", encoding='utf-8') as f:
         reader = csv.DictReader(f)
         for row in reader:
             words = Helper.splitSentence(row['text'])
             features = []
             for word in words:
                 # 去除标点符号
                 # word = word.strip(string.punctuation)
                 if 'http' in word:
                     continue
                 if self.wordDictionary.isFull():
                     Helper.debug("[WARNING] Missing word:" + word)
                     continue
                 features.append(self.wordDictionary.lookup(word))
             self.X.append(features)
             self.Y.append(label)
     Helper.debug("[INFORMATION] Total One Hot: %d" % self.wordDictionary.count())
Exemplo n.º 3
0
 def one_hot(self, filename, label):
     with open(filename, "r", encoding='utf-8') as f:
         reader = csv.DictReader(f)
         for row in reader:
             words = Helper.splitSentence(row['text'])
             features = []
             for word in words:
                 # 去除标点符号
                 # word = word.strip(string.punctuation)
                 if 'http' in word:
                     continue
                 if self.wordDictionary.isFull():
                     Helper.debug("[WARNING] Missing word:" + word)
                     continue
                 features.append(self.wordDictionary.lookup(word))
             self.X.append(features)
             self.Y.append(label)
     Helper.debug("[INFORMATION] Total One Hot: %d" %
                  self.wordDictionary.count())