def load_mr(): train_neg = [] train_pos = [] with open('rt-polaritydata/rt-polarity.neg', 'rb') as neg_r: line = neg_r.readline() while line: train_neg.append(clean_str(str(line))) line = neg_r.readline() with open('rt-polaritydata/rt-polarity.pos', 'rb') as pos_r: line = pos_r.readline() while line: train_pos.append(clean_str(str(line))) line = pos_r.readline() train_data = np.concatenate((train_pos, train_neg)) pos_labels = np.ones(len(train_pos)) neg_labels = np.zeros(len(train_neg)) labels = np.concatenate((pos_labels, neg_labels)) return train_data, labels
def load_data(positive_data_file, negative_data_file): positive_examples = list(open(positive_data_file, "r").readlines()) positive_examples = [s.strip() for s in positive_examples] negative_examples = list(open(negative_data_file, "r").readlines()) negative_examples = [s.strip() for s in negative_examples] # Split by words x_text = positive_examples + negative_examples x_text = [preprocessing.clean_str(sent) for sent in x_text] # x_text = [sent.split(' ') for sent in x_text] # Generate labels positive_labels = [[0, 1] for _ in positive_examples] negative_labels = [[1, 0] for _ in negative_examples] y = np.concatenate([positive_labels, negative_labels], 0) return [x_text, y]
step_size = 60 #how often graph is updated (in seconds) num_iterations = 30 #how long you want the graph to be x = [] lib = [] rep = [] k = 0 for i in range(num_iterations): timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S") get_all_tweets("place:5635c19c2b5078d1 (coronavirus)", timestamp) print("Saved tweets for", timestamp) tweets = pd.read_csv("covid-19_" + timestamp + "_tweets.csv") tweets = tweets["text"] all_tweets = " ".join(tweets) all_tweets = preprocessing.clean_str(all_tweets) sentences = preprocessing.sequence_text(all_tweets) print("Preprocessed tweets") #get emotion - CHANGE THIS FOR POLITICAL OR TOXICITY emos = get_sentimeant.emotion(sentences) sadness = emos[0] fear = emos[1] x.append(i) k += 1 lib.append(sadness) rep.append(fear) time.sleep(step_size) plt.plot(x, lib, 'blue', x, rep, 'red')
import torch import varPack as D import preprocessing from torch.autograd import Variable CNN = torch.load('./saveEntireCNN') test_data = list(open('../data/testData.txt', "r").readlines()) test_data = [preprocessing.clean_str(sent) for sent in test_data] f = open('result.txt', 'w') got_right = 0 for i, sent in enumerate(test_data): # Input words_in_sent = [] words_in_sent.extend(sent.split()) s = len(words_in_sent) if (s > D.max_sent_len): continue input = [] for word in words_in_sent: try: input.extend([D.word_to_ix[word]]) except KeyError: print("No Such Word in dictionary, zero padding") input.extend([D.word_to_ix['_UNSEEN_']]) for _ in range(D.max_sent_len - s): input.extend([D.word_to_ix['_ZEROS_']]) # Wrap Input inside the Variable input = Variable(torch.cuda.LongTensor(input))