def calculate(self, w, seed): # Step 2: calculate P(w) and P(seed) w_count = 0 seed_count = 0 for i, sent in enumerate(self.train_X): if w in sent: w_count += 1 # count the occurrence of word w if seed in sent: seed_count += 1 # count the occurrence of word seed p_w = w_count / self.train_size p_seed = seed_count / self.train_size # Step 3: calculate P(w,seed) w_seed_count = 0 for i, sent in enumerate(self.train_X): if (w in sent) and (seed in sent): w_seed_count += 1 # count the occurrence of word seed p_w_seed = w_seed_count / self.train_size / self.train_size # Step 4: calculate PMI value = math.log2(p_w_seed / (p_w * p_seed)) return value if __name__ == '__main__': data = SST(fine_grained=False) pmi = PMI(data, 0.5) pmi_1 = pmi.calculate("movie", "good") pmi_2 = pmi.calculate("movie", "bad") lex_w = pmi_1 - pmi_2
def test(self): self.model.eval() X = self._get_feature_vec(self.data.test["sents"]) for i in range(X[0].shape[0]): outputs = [] for j in range(self.class_size): outputs.append(self.model(torch.FloatTensor(X[j][i])).squeeze()) # wv+b for all class in pytorch probabilities = list(torch.softmax(torch.stack(outputs), dim=0)) # get the probability for each class max_prob = max(probabilities) max_prob_idx = probabilities.index(max_prob) # the max output as the predicted class c c = self.class_dict[max_prob_idx] if c == 0: c_string = "Very negative" elif c == 1: c_string = "Negative" elif c == 2: c_string = "Neutural" elif c == 3: c_string = "Positive" else: assert c == 4 c_string = "Very positive" print(' '.join(self.data.test["sents"][i]),": ",c_string) if __name__ == '__main__': data = SST(fine_grained=True) classifier = Multiclass_log_linear(data,0.2) classifier.train() classifier.test()