def main(): model = LinearRegression() train_data = get_train_data() train_x, train_y = process_data(train_data) train_x = pd.concat( [train_x, np.square(train_x), np.power(train_x, 3)], axis=1) model.fit(train_x, train_y) pred_y = np.round(model.predict(train_x)) print('Train mean absolute error:', np.sum(np.absolute(pred_y - train_y)) / np.size(pred_y)) eval_data = get_eval_data() eval_x, eval_y = process_data(eval_data) eval_x = pd.concat([eval_x, np.square(eval_x), np.power(eval_x, 3)], axis=1) pred_y = np.round(model.predict(eval_x)) print('Eval mean absolute error:', np.sum(np.absolute(pred_y - eval_y)) / np.size(eval_y)) df = pd.DataFrame({ 'Score1': pred_y[:int(pred_y.size / 2)], 'Score2': pred_y[int(pred_y.size / 2):] }) df['Winner'] = df['Score1'] < df['Score2'] print(np.sum(df['Winner'] == eval_data['Winner']) / np.size(df['Winner'])) print(r2_score(eval_y, pred_y))
def main(): model = LogisticRegression(penalty='l2', solver='liblinear') train_data = get_train_data() train_x, train_y = process_data(train_data) model.fit(train_x, train_y) pred_y = model.predict(train_x) print('Train accuracy:', np.sum(pred_y == train_y) / np.size(train_y)) eval_data = get_eval_data() eval_x, eval_y = process_data(eval_data) pred_y = model.predict(eval_x) print('Eval accuracy:', np.sum(pred_y == eval_y) / np.size(eval_y)) print('\n') demo_data = pd.concat([ eval_data.loc[eval_data['WTeamID'] == 1242].loc[ eval_data['Season'] == 2018], eval_data.loc[ eval_data['LTeamID'] == 1242].loc[eval_data['Season'] == 2018] ], axis=0) demo_x, demo_y = process_data(demo_data) pred_y = model.predict(demo_x) pred_y = pd.DataFrame(data=pred_y, columns=["Predicted"]) demo_data = lookup_teams(demo_data) result = pd.concat([demo_data, pred_y], axis=1) print(result[['Season', 'T0TeamName', 'T1TeamName', 'Winner', 'Predicted']])
def main(): train_data = get_train_data() X, y = process_data(train_data) train_X, test_X, train_y, test_y = train_test_split(X, y) eval_data = get_eval_data() eval_X, eval_y = process_data(eval_data) tree = RandomForestClassifier(80) tree.fit(train_X, train_y) pred_y = tree.predict(test_X) print("Training accuracy: ", np.sum(pred_y == test_y) / np.size(test_y)) pred_y = tree.predict(eval_X) print('Eval accuracy:', np.sum(pred_y == eval_y) / np.size(eval_y))
def main(): train_data = get_train_data() X, y = process_data(train_data) train_X, test_X, train_y, test_y = train_test_split(X, y) eval_data = get_eval_data() eval_X, eval_y = process_data(eval_data) for i in tqdm(range(2, 10)): print(i) tree = KNeighborsClassifier(n_neighbors=i) tree.fit(train_X, train_y) pred_y = tree.predict(test_X) print("Training accuracy: ", np.sum(pred_y == test_y) / np.size(test_y)) pred_y = tree.predict(eval_X) print('Eval accuracy:', np.sum(pred_y == eval_y) / np.size(eval_y))
def train(args): # all_input, all_output = get_data() # 75% of data is training # train_inp, train_out = all_input[:int(.75*len(all_input))], all_output[:int(.75*len(all_input))] train_inp, train_out = get_train_data() print "train data loaded" no_of_batches = len(train_inp) / BATCH_SIZE # 25% is testing data # test_inp, test_out = all_input[int(.75*len(all_input)):], all_output[int(.75*len(all_input)):] test_inp, test_out = get_test_data() print "test data loaded" data = tf.placeholder(tf.float32, [None, MAX_SEQ_LEN, WORD_DIM]) target = tf.placeholder(tf.float32, [None, MAX_SEQ_LEN, NUM_CLASSES]) dropout = tf.placeholder(tf.float32) model = Model(data, target, dropout, NUM_HIDDEN, NUM_LAYERS) with tf.Session() as sess: sess.run(tf.initialize_all_variables()) saver = tf.train.Saver() if args.restore is not None: saver.restore(sess, "model.ckpt") print "last model restored" for epoch in range(NUM_EPOCH): ptr = 0 for _ in range(no_of_batches): batch_inp, batch_out = train_inp[ptr : ptr + BATCH_SIZE], train_out[ptr : ptr + BATCH_SIZE] ptr += BATCH_SIZE sess.run(model.optimize, {data: batch_inp, target: batch_out, dropout: 0.5}) error = sess.run(model.error, {data: test_inp, target: test_out, dropout: 1}) print ("Epoch {:2d} error {:3.1f}%".format(epoch + 1, error * 100)) if epoch % 10 == 0: save_path = saver.save(sess, "model.ckpt") print ("Model saved in file: %s" % save_path)
def train(args): train_inp, train_out = get_train_data() print "train data loaded" no_of_batches = len(train_inp) / BATCH_SIZE test_inp, test_out = get_test_data() print "test data loaded" data = tf.placeholder(tf.float32,[None, MAX_SEQ_LEN, WORD_DIM]) target = tf.placeholder(tf.float32, [None, MAX_SEQ_LEN, NUM_CLASSES]) dropout = tf.placeholder(tf.float32) model = Model(data,target,dropout,NUM_HIDDEN,NUM_LAYERS) with tf.Session() as sess: sess.run(tf.initialize_all_variables()) saver = tf.train.Saver() if args.restore is not None: saver.restore(sess, 'model.ckpt') print "last model restored" for epoch in range(100): ptr=0 for _ in range(no_of_batches): batch_inp, batch_out = train_inp[ptr:ptr+BATCH_SIZE], train_out[ptr:ptr+BATCH_SIZE] ptr += BATCH_SIZE sess.run(model.optimize,{data: batch_inp, target : batch_out, dropout: 0.5}) if epoch % 10 == 0: save_path = saver.save(sess, "model.ckpt") print("Model saved in file: %s" % save_path) error = sess.run(model.error, { data:test_inp, target: test_out, dropout: 1}) print('Epoch {:2d} error {:3.1f}%'.format(epoch + 1, error*100)) pred = sess.run(model.prediction, {data: test_inp, target: test_out, dropout: 1}) pred,length = sess.run(model.getpredf1, {data: test_inp, target: test_out, dropout: 1}) f1(pred,test_out,length)
def main(): np.random.seed(2410) train_data = get_train_data() X, y = process_data(train_data) train_X, test_X, train_y, test_y = train_test_split(X, y) eval_data = get_eval_data() eval_X, eval_y = process_data(eval_data) model = Sequential() model.add(Dense(20, input_dim=len(train_X.columns))) model.add(Dense(10)) model.add(Dense(1, activation="sigmoid")) model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy']) model.fit(train_X, train_y, epochs=10, batch_size=32) pred_y = np.round(model.predict(test_X).flatten()) print(np.unique(pred_y, return_counts=True)) print("Training accuracy: ", np.sum(pred_y == test_y) / np.size(test_y)) pred_y = np.round(model.predict(eval_X).flatten()) print('Eval accuracy:', np.sum(pred_y == eval_y) / np.size(eval_y)) model.save('model.h5')
def train(args): train_inp, train_out = get_train_data() print "train data loaded" no_of_batches = (len(train_inp) + BATCH_SIZE - 1) / BATCH_SIZE test_inp, test_out = get_test_data() print "test data loaded" final_inp, final_out = get_final_data() print "final data loaded" data = tf.placeholder(tf.float32, [None, MAX_SEQ_LEN, WORD_DIM]) target = tf.placeholder(tf.float32, [None, MAX_SEQ_LEN, NUM_CLASSES]) dropout = tf.placeholder(tf.float32) model = Model(data, target, dropout, NUM_HIDDEN, NUM_LAYERS) maximum = 0 with tf.Session() as sess: sess.run(tf.initialize_all_variables()) saver = tf.train.Saver() if args.restore is not None: saver.restore(sess, 'model.ckpt') print "last model restored" for epoch in range(200): ptr = 0 for _ in range(no_of_batches): batch_inp, batch_out = train_inp[ ptr:ptr + BATCH_SIZE], train_out[ptr:ptr + BATCH_SIZE] ptr += BATCH_SIZE sess.run(model.optimize, { data: batch_inp, target: batch_out, dropout: 0.5 }) if epoch % 10 == 0: save_path = saver.save(sess, "model.ckpt") print("Model saved in file: %s" % save_path) pred = sess.run(model.prediction, { data: test_inp, target: test_out, dropout: 1 }) pred, length = sess.run(model.getpredf1, { data: test_inp, target: test_out, dropout: 1 }) print "Epoch:" + str(epoch), "TestA score," m = f1(pred, test_out, length) if m > maximum: maximum = m save_path = saver.save(sess, "model_max.ckpt") print("Max Model saved in file: %s" % save_path) pred = sess.run(model.prediction, { data: final_inp, target: final_out, dropout: 1 }) pred, length = sess.run(model.getpredf1, { data: final_inp, target: final_out, dropout: 1 }) print "TestB score," f1(pred, final_out, length)