def predict_svr(clf, data): feature_vector = [] scores = [] predictions = [] fo = codecs.open(data, encoding='utf-8') lines = fo.readlines() fo.close() client = MongoClient('localhost', 27017) db = client['nlprokz'] glove = db.glove line = 0 for each_line in lines: row = each_line.split('\n')[0].split('\t') essay_set = int(row[1]) scores.append(get_score(essay_set, row)) words = [i for i in each_line.lower().split()] word_list = [] for word in words: if not d.check(word): suggest = d.suggest(word) if (len(suggest) > 0): word_list.append(suggest[0]) else: word_list.append(word) essay_vector = np.array([0.0 for i in xrange(300)]) for i in glove.find({"gram": {"$in": word_list}}): word_vector = np.array([float(n) for n in i['glove_vector']]) if (len(word_vector) == 300): essay_vector += word_vector feature_vector.append(essay_vector / len(words)) predictions = clf.predict(np.array(feature_vector)) return get_average_kappa(np.array(scores), np.array(predictions))
def predict_svr(clf, data): feature_vector = [] scores = [] predictions = [] fo = codecs.open(data, encoding='utf-8') lines = fo.readlines() fo.close() line = 0 for each_line in lines: row = each_line.split('\n')[0].split('\t') vector = [] # Ignore the heading line if line < 1: line += 1 continue if line % 50 == 0: print('Validation sample: ' + str(line)) e = Essay(row, store_score=True) f = e.features for i in sorted(f.__dict__.keys()): vector.append(f.__dict__[i]) scores.append(e.score) feature_vector.append(vector) line += 1 predictions = clf.predict(np.array(feature_vector)) return get_average_kappa(np.array(scores), np.array(predictions))
def predict_sklearn_random_forest(model, test_data_dump): test_data = util.load_object(test_data_dump) predictions = [] targets = [] targets = test_data[:, -1] predictions = model.predict(test_data[:, :-1]) return get_average_kappa(targets, predictions)
def predict_random_forest(model, test_data_dump): test_data = util.load_object(test_data_dump) predictions = [] targets = [] for sample in test_data: targets.append(sample[-1]) predictions.append(model.predict(sample[:-1])) return get_average_kappa(targets, predictions)
def predict_svr(clf, data): feature_vector = [] scores = [] predictions = [] fo = codecs.open(data, encoding='utf-8') lines = fo.readlines() fo.close() client = MongoClient('localhost', 27017) db = client['nlprokz'] glove = db.glove line = 0 for each_line in lines: row = each_line.split('\n')[0].split('\t') essay_set = int(row[1]) scores.append(get_score(essay_set, row)) words = [i for i in each_line.lower().split()] word_list = [] for word in words: if not d.check(word): suggest = d.suggest(word) if (len(suggest) > 0): word_list.append(suggest[0]) else: word_list.append(word) essay_vector = np.array([0.0 for i in xrange(300)]) for i in glove.find({"gram": {"$in": word_list}}): word_vector = np.array([float(n) for n in i['glove_vector']]) if (len(word_vector) == 300): essay_vector += word_vector feature_vector.append(essay_vector / len(words)) # for each_line in lines: # row = each_line.split('\n')[0].split('\t') # vector = [] # # Ignore the heading line # if line < 1: # line += 1 # continue # if line % 50 == 0: # print('Validation sample: '+str(line)) # e = Essay(row, store_score = True) # f = e.features # for i in sorted(f.__dict__.keys()): # vector.append(f.__dict__[i]) # scores.append(e.score) # feature_vector.append(vector) # line += 1 predictions = clf.predict(np.array(feature_vector)) return get_average_kappa(np.array(scores), np.array(predictions))
def predict_svr(clf, test_data_dump): test_data = util.load_object(test_data_dump) targets = test_data[:, -1] predictions = clf.predict(test_data[:, :-1]) return get_average_kappa(targets, predictions)
i = 0 with tf.Session() as sess: tf.initialize_all_variables().run() while (True): sess.run(train_op, feed_dict={x_: feature_vector, y_: scores}) if i % PRINT_STEP == 0: c = sess.run(cost, feed_dict={ x_: valid_feature_vector, y_: valid_scores }) print('training cost:', c) costs.append(c) if len(costs) > 3 and is_increasing(costs[-3:]) and count > 2: break elif len(costs) > 3 and is_increasing(costs[-3:]): count += 1 print count # else: # count = 0 i += 1 response = sess.run(y, feed_dict={x_: test_feature_vector}) print get_average_kappa(response, test_scores) # incorrect = sess.run(error,{data: data, target: target}) # incorrect = sess.run(error, feed_dict={x_: feature_vector, y_: scores}) # print feature_vector[feature_vector.shape[0] - 1] # print sess.run(prediction,feed_dict={x_: feature_vector}) #print sess.run(prediction,{data: [[[1],[0],[0],[1],[1],[0],[1],[1],[1],[0],[1],[0],[0],[1],[1],[0],[1],[1],[1],[0]]]}) # print('Epoch {:2d} error {:3.1f}%'.format(i + 1, 100 * incorrect)) sess.close()