Exemplo n.º 1
0
def check_extract_bow_feature_vectors():
    ex_name = "Extract bow feature vectors"
    texts = ["He loves her ", "He really really loves her"]
    keys = ["he", "loves", "her", "really"]
    dictionary = {k: i for i, k in enumerate(keys)}
    exp_res = np.array([[1, 1, 1, 0], [1, 1, 1, 1]])
    non_bin_res = np.array([[1, 1, 1, 0], [1, 1, 1, 2]])

    try:
        res = p1.extract_bow_feature_vectors(texts, dictionary)
    except NotImplementedError:
        log(red("FAIL"), ex_name, ": not implemented")
        return

    if not type(res) == np.ndarray:
        log(red("FAIL"), ex_name, ": does not return a numpy array, type: ",
            type(res))
        return
    if not len(res) == len(exp_res):
        log(red("FAIL"), ex_name, ": expected an array of shape ",
            exp_res.shape, " but got array of shape", res.shape)
        return

    log(green("PASS"), ex_name)

    if (res == exp_res).all():
        log(yellow("WARN"), ex_name, ": uses binary indicators as features")
    elif (res == non_bin_res).all():
        log(green("PASS"), ex_name, ": correct non binary features")
    else:
        log(red("FAIL"), ex_name, ": unexpected feature matrix")
        return
Exemplo n.º 2
0
#-------------------------------------------------------------------------------

train_data = utils.load_data('reviews_train.tsv')
val_data = utils.load_data('reviews_val.tsv')
test_data = utils.load_data('reviews_test.tsv')

train_texts, train_labels = zip(*((sample['text'], sample['sentiment'])
                                  for sample in train_data))
val_texts, val_labels = zip(*((sample['text'], sample['sentiment'])
                              for sample in val_data))
test_texts, test_labels = zip(*((sample['text'], sample['sentiment'])
                                for sample in test_data))

dictionary = p1.bag_of_words(train_texts)

train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary)
val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary)
test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary)

#-------------------------------------------------------------------------------
# Problem 5
#-------------------------------------------------------------------------------

# toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv')
# #
# T = 200
# L = 0.2
# #
# thetas_perceptron = p1.perceptron(toy_features, toy_labels, T)
# thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T)
# thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L)
Exemplo n.º 3
0
#-------------------------------------------------------------------------------

train_data = utils.load_data('data/reviews_train.tsv')
val_data = utils.load_data('data/reviews_val.tsv')
test_data = utils.load_data('data/reviews_test.tsv')

train_texts, train_labels = zip(*((sample['text'], sample['sentiment'])
                                  for sample in train_data))
val_texts, val_labels = zip(*((sample['text'], sample['sentiment'])
                              for sample in val_data))
test_texts, test_labels = zip(*((sample['text'], sample['sentiment'])
                                for sample in test_data))

dictionary = p1.bag_of_words(train_texts)

train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary)
val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary)
test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary)

#-------------------------------------------------------------------------------
# Problem 5
#-------------------------------------------------------------------------------

toy_features, toy_labels = toy_data = utils.load_toy_data('data/toy_data.tsv')

T = 10
L = 0.2

thetas_perceptron = p1.perceptron(toy_features, toy_labels, T)
thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T)
thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L)
import numpy as np

#-------------------------------------------------------------------------------
# Data loading. There is no need to edit code in this section.
#-------------------------------------------------------------------------------

train_data = utils.load_data('reviews_train.tsv')
Arquivo = open('stopwords.txt')
stopwords_data = Arquivo.read()
stopwords_data = str(stopwords_data).split()
Arquivo.close()

train_texts, train_labels = zip(*((sample['text'], sample['sentiment']) for sample in train_data))

dictionary = p1.bag_of_words(train_texts,stopwords_data)
train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary)
T=25
L=0.01
thetas_pegasos = p1.pegasos(train_bow_features, train_labels, T, L,)
run=1
while(1):
    input_texts=input('Input your review: ')
    input_bow_features = p1.extract_bow_feature_vectors(['blah',input_texts],dictionary)
    output=p1.classify(input_bow_features,thetas_pegasos[0],thetas_pegasos[1])
    if (output[-1])==1:
        print('_______________________________________________________________')
        print('This is a possitive review!')
    else:
        print('_______________________________________________________________')
        print('This is a negative review')
Exemplo n.º 5
0
#-------------------------------------------------------------------------------
# Data loading. There is no need to edit code in this section.
#-------------------------------------------------------------------------------

train_data = utils.load_data('reviews_train.tsv')
val_data = utils.load_data('reviews_val.tsv')
test_data = utils.load_data('reviews_test.tsv')

train_texts, train_labels = zip(*((sample['text'], sample['sentiment']) for sample in train_data))
val_texts, val_labels = zip(*((sample['text'], sample['sentiment']) for sample in val_data))
test_texts, test_labels = zip(*((sample['text'], sample['sentiment']) for sample in test_data))

dictionary = p1.bag_of_words(train_texts)

train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary)
val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary)
test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary)
#
#-------------------------------------------------------------------------------
# Section 1.7
#-------------------------------------------------------------------------------
toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv')

T = 5
L = 10

thetas_perceptron = p1.perceptron(toy_features, toy_labels, T)
thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T)
thetas_avg_pa = p1.average_passive_aggressive(toy_features, toy_labels, T, L)
Exemplo n.º 6
0
#-------------------------------------------------------------------------------

train_data = utils.load_data('reviews_train.tsv')
val_data = utils.load_data('reviews_val.tsv')
test_data = utils.load_data('reviews_test.tsv')

train_texts, train_labels = zip(*((sample['text'], sample['sentiment'])
                                  for sample in train_data))
val_texts, val_labels = zip(*((sample['text'], sample['sentiment'])
                              for sample in val_data))
test_texts, test_labels = zip(*((sample['text'], sample['sentiment'])
                                for sample in test_data))

dictionary = p1.bag_of_words(train_texts)

train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary)
val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary)
test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary)

#-------------------------------------------------------------------------------
# Problem 5
#-------------------------------------------------------------------------------

toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv')

T = 10
L = 0.2

thetas_perceptron = p1.perceptron(toy_features, toy_labels, T)
thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T)
thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L)
Exemplo n.º 7
0
# -------------------------------------------------------------------------------
# Data loading. There is no need to edit code in this section.
# -------------------------------------------------------------------------------

train_data = utils.load_data('reviews_train.tsv')
val_data = utils.load_data('reviews_val.tsv')
test_data = utils.load_data('reviews_test.tsv')

train_texts, train_labels = zip(*((sample['text'], sample['sentiment']) for sample in train_data))
val_texts, val_labels = zip(*((sample['text'], sample['sentiment']) for sample in val_data))
test_texts, test_labels = zip(*((sample['text'], sample['sentiment']) for sample in test_data))

dictionary = p1.bag_of_words(train_texts)

train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary, count=True)
val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary, count=True)
test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary, count=True)


class TestAlgoComparison(unittest.TestCase):
    def test_algorithm_compare(self):
        # -------------------------------------------------------------------------------
        # # Problem 5
        # #-------------------------------------------------------------------------------

        toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv')

        T = 100
        L = 0.2