def check_bag_of_words(): ex_name = "Bag of words" texts = [ "He loves to walk on the beach", "There is nothing better"] try: res = p1.bag_of_words(texts) except NotImplementedError: log(red("FAIL"), ex_name, ": not implemented") return if not type(res) == dict: log(red("FAIL"), ex_name, ": does not return a tuple, type: ", type(res)) return vals = sorted(res.values()) exp_vals = list(range(len(res.keys()))) if not vals == exp_vals: log(red("FAIL"), ex_name, ": wrong set of indices. Expected: ", exp_vals, " got ", vals) return log(green("PASS"), ex_name, "") keys = sorted(res.keys()) exp_keys = ['beach', 'better', 'he', 'is', 'loves', 'nothing', 'on', 'the', 'there', 'to', 'walk'] stop_keys = ['beach', 'better', 'loves', 'nothing', 'walk'] if keys == exp_keys: log(yellow("WARN"), ex_name, ": does not remove stopwords:", [k for k in keys if k not in stop_keys]) elif keys == stop_keys: log(green("PASS"), ex_name, " stopwords removed") else: log(red("FAIL"), ex_name, ": keys are missing:", [k for k in stop_keys if k not in keys], " or are not unexpected:", [k for k in keys if k not in stop_keys])
#------------------------------------------------------------------------------- # Data loading. There is no need to edit code in this section. #------------------------------------------------------------------------------- train_data = utils.load_data('reviews_train.tsv') val_data = utils.load_data('reviews_val.tsv') test_data = utils.load_data('reviews_test.tsv') train_texts, train_labels = zip(*((sample['text'], sample['sentiment']) for sample in train_data)) val_texts, val_labels = zip(*((sample['text'], sample['sentiment']) for sample in val_data)) test_texts, test_labels = zip(*((sample['text'], sample['sentiment']) for sample in test_data)) dictionary = p1.bag_of_words(train_texts) train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary) val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary) test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary) #------------------------------------------------------------------------------- # Problem 5 #------------------------------------------------------------------------------- # toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv') # # # T = 200 # L = 0.2 # # # thetas_perceptron = p1.perceptron(toy_features, toy_labels, T)
import utils import numpy as np #------------------------------------------------------------------------------- # Data loading. There is no need to edit code in this section. #------------------------------------------------------------------------------- train_data = utils.load_data('reviews_train.tsv') val_data = utils.load_data('reviews_val.tsv') test_data = utils.load_data('reviews_test.tsv') train_texts, train_labels = zip(*((sample['text'], sample['sentiment']) for sample in train_data)) val_texts, val_labels = zip(*((sample['text'], sample['sentiment']) for sample in val_data)) test_texts, test_labels = zip(*((sample['text'], sample['sentiment']) for sample in test_data)) dictionary = p1.bag_of_words(train_texts) train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary) val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary) test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary) #------------------------------------------------------------------------------- # Problem 5 #------------------------------------------------------------------------------- # toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv') # T = 1000 # L = 0.2 # thetas_perceptron = p1.perceptron(toy_features, toy_labels, T)
#------------------------------------------------------------------------------- # Data loading #------------------------------------------------------------------------------- train_data = utils.load_data('data/reviews_train.tsv') val_data = utils.load_data('data/reviews_val.tsv') test_data = utils.load_data('data/reviews_test.tsv') train_texts, train_labels = zip(*((sample['text'], sample['sentiment']) for sample in train_data)) val_texts, val_labels = zip(*((sample['text'], sample['sentiment']) for sample in val_data)) test_texts, test_labels = zip(*((sample['text'], sample['sentiment']) for sample in test_data)) dictionary = p1.bag_of_words(train_texts) train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary) val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary) test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary) #------------------------------------------------------------------------------- # Problem 5 #------------------------------------------------------------------------------- toy_features, toy_labels = toy_data = utils.load_toy_data('data/toy_data.tsv') T = 10 L = 0.2 thetas_perceptron = p1.perceptron(toy_features, toy_labels, T)
import utils import numpy as np #------------------------------------------------------------------------------- # Data loading. There is no need to edit code in this section. #------------------------------------------------------------------------------- train_data = utils.load_data('reviews_train.tsv') Arquivo = open('stopwords.txt') stopwords_data = Arquivo.read() stopwords_data = str(stopwords_data).split() Arquivo.close() train_texts, train_labels = zip(*((sample['text'], sample['sentiment']) for sample in train_data)) dictionary = p1.bag_of_words(train_texts,stopwords_data) train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary) T=25 L=0.01 thetas_pegasos = p1.pegasos(train_bow_features, train_labels, T, L,) run=1 while(1): input_texts=input('Input your review: ') input_bow_features = p1.extract_bow_feature_vectors(['blah',input_texts],dictionary) output=p1.classify(input_bow_features,thetas_pegasos[0],thetas_pegasos[1]) if (output[-1])==1: print('_______________________________________________________________') print('This is a possitive review!') else: print('_______________________________________________________________') print('This is a negative review')
import project1 as p1 import utils #------------------------------------------------------------------------------- # Data loading. There is no need to edit code in this section. #------------------------------------------------------------------------------- train_data = utils.load_data('reviews_train.tsv') val_data = utils.load_data('reviews_val.tsv') test_data = utils.load_data('reviews_test.tsv') train_texts, train_labels = zip(*((sample['text'], sample['sentiment']) for sample in train_data)) val_texts, val_labels = zip(*((sample['text'], sample['sentiment']) for sample in val_data)) test_texts, test_labels = zip(*((sample['text'], sample['sentiment']) for sample in test_data)) dictionary = p1.bag_of_words(train_texts) train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary) val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary) test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary) # #------------------------------------------------------------------------------- # Section 1.7 #------------------------------------------------------------------------------- toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv') T = 5 L = 10 thetas_perceptron = p1.perceptron(toy_features, toy_labels, T) thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T)
val_data = utils.load_data('reviews_val.tsv') test_data = utils.load_data('reviews_test.tsv') #stopwords = pd.read_csv('stopwords.txt', header=None).to_numpy() train_texts, train_labels = zip(*((sample['text'], sample['sentiment']) for sample in train_data)) val_texts, val_labels = zip(*((sample['text'], sample['sentiment']) for sample in val_data)) test_texts, test_labels = zip(*((sample['text'], sample['sentiment']) for sample in test_data)) stop_dict = {} # for i in range(len(stopwords)): # if stopwords[i][0] not in stop_dict: # stop_dict[stopwords[i][0]] = len(stop_dict) dictionary = p1.bag_of_words(train_texts, stop_dict) train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary) val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary) test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary) #------------------------------------------------------------------------------- # Problem 9 #------------------------------------------------------------------------------- #------------------------------------------------------------------------------- # Problem 5 #-------------------------------------------------------------------------------
#------------------------------------------------------------------------------- # Data loading. There is no need to edit code in this section. #------------------------------------------------------------------------------- train_data = utils.load_data('reviews_train.tsv') val_data = utils.load_data('reviews_val.tsv') test_data = utils.load_data('reviews_test.tsv') train_texts, train_labels = zip(*((sample['text'], sample['sentiment']) for sample in train_data)) val_texts, val_labels = zip(*((sample['text'], sample['sentiment']) for sample in val_data)) test_texts, test_labels = zip(*((sample['text'], sample['sentiment']) for sample in test_data)) dictionary = p1.bag_of_words(train_texts) train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary) val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary) test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary) #------------------------------------------------------------------------------- # Problem 5 #------------------------------------------------------------------------------- toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv') T = 10 L = 0.2 #thetas_perceptron = p1.perceptron(toy_features, toy_labels, T)
import project1 as p1 import utils #------------------------------------------------------------------------------- # Data loading. There is no need to edit code in this section. #------------------------------------------------------------------------------- train_data = utils.load_data('reviews_train.tsv') val_data = utils.load_data('reviews_val.tsv') test_data = utils.load_data('reviews_test.tsv') train_texts, train_labels = zip(*((sample['text'], sample['sentiment']) for sample in train_data)) val_texts, val_labels = zip(*((sample['text'], sample['sentiment']) for sample in val_data)) test_texts, test_labels = zip(*((sample['text'], sample['sentiment']) for sample in test_data)) dictionary = p1.bag_of_words(train_texts) train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary) val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary) test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary) # ------------------------------------------------------------------------------- # Section 1.7 # ------------------------------------------------------------------------------- # toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv') # # T = 10 # L = 0.2 # # thetas_perceptron = p1.perceptron(toy_features, toy_labels, T) # thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T)
#------------------------------------------------------------------------------- # Data loading. There is no need to edit code in this section. #------------------------------------------------------------------------------- train_data = utils.load_data('reviews_train.tsv') val_data = utils.load_data('reviews_val.tsv') test_data = utils.load_data('reviews_test.tsv') train_texts, train_labels = zip(*((sample['text'], sample['sentiment']) for sample in train_data)) val_texts, val_labels = zip(*((sample['text'], sample['sentiment']) for sample in val_data)) test_texts, test_labels = zip(*((sample['text'], sample['sentiment']) for sample in test_data)) dictionary = p1.bag_of_words(train_texts) train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary) val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary) test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary) #------------------------------------------------------------------------------- # Problem 5 #------------------------------------------------------------------------------- toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv') T = 10 L = 0.2 thetas_perceptron = p1.perceptron(toy_features, toy_labels, T)