def predict_svr(clf, data):
    feature_vector = []
    scores = []
    predictions = []
    fo = codecs.open(data, encoding='utf-8')
    lines = fo.readlines()
    fo.close()
    client = MongoClient('localhost', 27017)
    db = client['nlprokz']
    glove = db.glove

    line = 0
    for each_line in lines:
        row = each_line.split('\n')[0].split('\t')
        essay_set = int(row[1])
        scores.append(get_score(essay_set, row))
        words = [i for i in each_line.lower().split()]
        word_list = []
        for word in words:
            if not d.check(word):
                suggest = d.suggest(word)
                if (len(suggest) > 0):
                    word_list.append(suggest[0])
            else:
                word_list.append(word)
        essay_vector = np.array([0.0 for i in xrange(300)])
        for i in glove.find({"gram": {"$in": word_list}}):
            word_vector = np.array([float(n) for n in i['glove_vector']])

            if (len(word_vector) == 300):
                essay_vector += word_vector
        feature_vector.append(essay_vector / len(words))

    predictions = clf.predict(np.array(feature_vector))
    return get_average_kappa(np.array(scores), np.array(predictions))
Пример #2
0
def predict_svr(clf, data):
    feature_vector = []
    scores = []
    predictions = []
    fo = codecs.open(data, encoding='utf-8')
    lines = fo.readlines()
    fo.close()

    line = 0
    for each_line in lines:
        row = each_line.split('\n')[0].split('\t')
        vector = []
        # Ignore the heading line
        if line < 1:
            line += 1
            continue
        if line % 50 == 0:
            print('Validation sample: ' + str(line))
        e = Essay(row, store_score=True)
        f = e.features
        for i in sorted(f.__dict__.keys()):
            vector.append(f.__dict__[i])
        scores.append(e.score)
        feature_vector.append(vector)
        line += 1

    predictions = clf.predict(np.array(feature_vector))
    return get_average_kappa(np.array(scores), np.array(predictions))
def predict_sklearn_random_forest(model, test_data_dump):
    test_data = util.load_object(test_data_dump)
    predictions = []
    targets = []

    targets = test_data[:, -1]
    predictions = model.predict(test_data[:, :-1])
    return get_average_kappa(targets, predictions)
def predict_random_forest(model, test_data_dump):
    test_data = util.load_object(test_data_dump)
    predictions = []
    targets = []
    for sample in test_data:
        targets.append(sample[-1])
        predictions.append(model.predict(sample[:-1]))
    return get_average_kappa(targets, predictions)
def predict_svr(clf, data):
    feature_vector = []
    scores = []
    predictions = []
    fo = codecs.open(data, encoding='utf-8')
    lines = fo.readlines()
    fo.close()
    client = MongoClient('localhost', 27017)
    db = client['nlprokz']
    glove = db.glove

    line = 0
    for each_line in lines:
        row = each_line.split('\n')[0].split('\t')
        essay_set = int(row[1])
        scores.append(get_score(essay_set, row))
        words = [i for i in each_line.lower().split()]
        word_list = []
        for word in words:
            if not d.check(word):
                suggest = d.suggest(word)
                if (len(suggest) > 0):
                    word_list.append(suggest[0])
            else:
                word_list.append(word)
        essay_vector = np.array([0.0 for i in xrange(300)])
        for i in glove.find({"gram": {"$in": word_list}}):
            word_vector = np.array([float(n) for n in i['glove_vector']])

            if (len(word_vector) == 300):
                essay_vector += word_vector
        feature_vector.append(essay_vector / len(words))
    # for each_line in lines:
    #     row = each_line.split('\n')[0].split('\t')
    #     vector = []
    #     # Ignore the heading line
    #     if line < 1:
    #         line += 1
    #         continue
    #     if line % 50 == 0:
    #         print('Validation sample: '+str(line))
    #     e = Essay(row, store_score = True)
    #     f = e.features
    #     for i in sorted(f.__dict__.keys()):
    #         vector.append(f.__dict__[i])
    #     scores.append(e.score)
    #     feature_vector.append(vector)
    #     line += 1

    predictions = clf.predict(np.array(feature_vector))
    return get_average_kappa(np.array(scores), np.array(predictions))
Пример #6
0
def predict_svr(clf, test_data_dump):
    test_data = util.load_object(test_data_dump)
    targets = test_data[:, -1]
    predictions = clf.predict(test_data[:, :-1])
    return get_average_kappa(targets, predictions)
    i = 0
    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        while (True):
            sess.run(train_op, feed_dict={x_: feature_vector, y_: scores})
            if i % PRINT_STEP == 0:
                c = sess.run(cost,
                             feed_dict={
                                 x_: valid_feature_vector,
                                 y_: valid_scores
                             })
                print('training cost:', c)
                costs.append(c)
                if len(costs) > 3 and is_increasing(costs[-3:]) and count > 2:
                    break
                elif len(costs) > 3 and is_increasing(costs[-3:]):
                    count += 1
                    print count
                # else:
                #     count = 0
            i += 1
        response = sess.run(y, feed_dict={x_: test_feature_vector})
        print get_average_kappa(response, test_scores)
        # incorrect = sess.run(error,{data: data, target: target})
        # incorrect = sess.run(error, feed_dict={x_: feature_vector, y_: scores})
        # print feature_vector[feature_vector.shape[0] - 1]
        # print sess.run(prediction,feed_dict={x_: feature_vector})
        #print sess.run(prediction,{data: [[[1],[0],[0],[1],[1],[0],[1],[1],[1],[0],[1],[0],[0],[1],[1],[0],[1],[1],[1],[0]]]})
        # print('Epoch {:2d} error {:3.1f}%'.format(i + 1, 100 * incorrect))
        sess.close()