Exemplo n.º 1
0
def main():
    model = LinearRegression()
    train_data = get_train_data()
    train_x, train_y = process_data(train_data)
    train_x = pd.concat(
        [train_x, np.square(train_x),
         np.power(train_x, 3)], axis=1)

    model.fit(train_x, train_y)
    pred_y = np.round(model.predict(train_x))
    print('Train mean absolute error:',
          np.sum(np.absolute(pred_y - train_y)) / np.size(pred_y))

    eval_data = get_eval_data()
    eval_x, eval_y = process_data(eval_data)
    eval_x = pd.concat([eval_x, np.square(eval_x),
                        np.power(eval_x, 3)],
                       axis=1)
    pred_y = np.round(model.predict(eval_x))
    print('Eval mean absolute error:',
          np.sum(np.absolute(pred_y - eval_y)) / np.size(eval_y))

    df = pd.DataFrame({
        'Score1': pred_y[:int(pred_y.size / 2)],
        'Score2': pred_y[int(pred_y.size / 2):]
    })
    df['Winner'] = df['Score1'] < df['Score2']
    print(np.sum(df['Winner'] == eval_data['Winner']) / np.size(df['Winner']))
    print(r2_score(eval_y, pred_y))
Exemplo n.º 2
0
def main():
    model = LogisticRegression(penalty='l2', solver='liblinear')
    train_data = get_train_data()
    train_x, train_y = process_data(train_data)

    model.fit(train_x, train_y)
    pred_y = model.predict(train_x)
    print('Train accuracy:', np.sum(pred_y == train_y) / np.size(train_y))

    eval_data = get_eval_data()
    eval_x, eval_y = process_data(eval_data)
    pred_y = model.predict(eval_x)
    print('Eval accuracy:', np.sum(pred_y == eval_y) / np.size(eval_y))
    print('\n')

    demo_data = pd.concat([
        eval_data.loc[eval_data['WTeamID'] == 1242].loc[
            eval_data['Season'] == 2018], eval_data.loc[
                eval_data['LTeamID'] == 1242].loc[eval_data['Season'] == 2018]
    ],
                          axis=0)

    demo_x, demo_y = process_data(demo_data)
    pred_y = model.predict(demo_x)
    pred_y = pd.DataFrame(data=pred_y, columns=["Predicted"])

    demo_data = lookup_teams(demo_data)

    result = pd.concat([demo_data, pred_y], axis=1)
    print(result[['Season', 'T0TeamName', 'T1TeamName', 'Winner',
                  'Predicted']])
Exemplo n.º 3
0
def main():
    train_data = get_train_data()
    X, y = process_data(train_data)
    train_X, test_X, train_y, test_y = train_test_split(X, y)
    eval_data = get_eval_data()
    eval_X, eval_y = process_data(eval_data)

    tree = RandomForestClassifier(80)
    tree.fit(train_X, train_y)
    pred_y = tree.predict(test_X)
    print("Training accuracy: ", np.sum(pred_y == test_y) / np.size(test_y))
    pred_y = tree.predict(eval_X)
    print('Eval accuracy:', np.sum(pred_y == eval_y) / np.size(eval_y))
Exemplo n.º 4
0
def main():
    train_data = get_train_data()
    X, y = process_data(train_data)
    train_X, test_X, train_y, test_y = train_test_split(X, y)
    eval_data = get_eval_data()
    eval_X, eval_y = process_data(eval_data)

    for i in tqdm(range(2, 10)):
        print(i)
        tree = KNeighborsClassifier(n_neighbors=i)
        tree.fit(train_X, train_y)
        pred_y = tree.predict(test_X)
        print("Training accuracy: ",
              np.sum(pred_y == test_y) / np.size(test_y))
        pred_y = tree.predict(eval_X)
        print('Eval accuracy:', np.sum(pred_y == eval_y) / np.size(eval_y))
Exemplo n.º 5
0
def train(args):

    # all_input, all_output = get_data()
    # 75% of data is training
    # train_inp, train_out = all_input[:int(.75*len(all_input))], all_output[:int(.75*len(all_input))]

    train_inp, train_out = get_train_data()
    print "train data loaded"
    no_of_batches = len(train_inp) / BATCH_SIZE

    # 25% is testing data
    # test_inp, test_out = all_input[int(.75*len(all_input)):], all_output[int(.75*len(all_input)):]

    test_inp, test_out = get_test_data()
    print "test data loaded"

    data = tf.placeholder(tf.float32, [None, MAX_SEQ_LEN, WORD_DIM])
    target = tf.placeholder(tf.float32, [None, MAX_SEQ_LEN, NUM_CLASSES])
    dropout = tf.placeholder(tf.float32)
    model = Model(data, target, dropout, NUM_HIDDEN, NUM_LAYERS)

    with tf.Session() as sess:
        sess.run(tf.initialize_all_variables())
        saver = tf.train.Saver()
        if args.restore is not None:
            saver.restore(sess, "model.ckpt")
            print "last model restored"

        for epoch in range(NUM_EPOCH):
            ptr = 0
            for _ in range(no_of_batches):
                batch_inp, batch_out = train_inp[ptr : ptr + BATCH_SIZE], train_out[ptr : ptr + BATCH_SIZE]
                ptr += BATCH_SIZE
                sess.run(model.optimize, {data: batch_inp, target: batch_out, dropout: 0.5})
            error = sess.run(model.error, {data: test_inp, target: test_out, dropout: 1})
            print ("Epoch {:2d} error {:3.1f}%".format(epoch + 1, error * 100))
            if epoch % 10 == 0:
                save_path = saver.save(sess, "model.ckpt")
                print ("Model saved in file: %s" % save_path)
Exemplo n.º 6
0
def train(args):

    train_inp, train_out = get_train_data()
    print "train data loaded"
    no_of_batches = len(train_inp) / BATCH_SIZE

    test_inp, test_out = get_test_data()
    print "test data loaded"


    data = tf.placeholder(tf.float32,[None, MAX_SEQ_LEN, WORD_DIM])
    target = tf.placeholder(tf.float32, [None, MAX_SEQ_LEN, NUM_CLASSES])
    dropout = tf.placeholder(tf.float32)
    model = Model(data,target,dropout,NUM_HIDDEN,NUM_LAYERS)

    with tf.Session() as sess:
        sess.run(tf.initialize_all_variables())
        saver = tf.train.Saver()
        if args.restore is not None:
            saver.restore(sess, 'model.ckpt')
            print "last model restored"

        for epoch in range(100):
            ptr=0
            for _ in range(no_of_batches):
                batch_inp, batch_out = train_inp[ptr:ptr+BATCH_SIZE], train_out[ptr:ptr+BATCH_SIZE]
                ptr += BATCH_SIZE
                sess.run(model.optimize,{data: batch_inp, target : batch_out, dropout: 0.5})
            if epoch % 10 == 0:
                save_path = saver.save(sess, "model.ckpt")
                print("Model saved in file: %s" % save_path)
        
            error = sess.run(model.error, { data:test_inp, target: test_out, dropout: 1})
            print('Epoch {:2d} error {:3.1f}%'.format(epoch + 1, error*100))
                
            pred = sess.run(model.prediction, {data: test_inp, target: test_out, dropout: 1})
            pred,length = sess.run(model.getpredf1, {data: test_inp, target: test_out, dropout: 1})
            f1(pred,test_out,length)
Exemplo n.º 7
0
def main():
    np.random.seed(2410)

    train_data = get_train_data()
    X, y = process_data(train_data)
    train_X, test_X, train_y, test_y = train_test_split(X, y)
    eval_data = get_eval_data()
    eval_X, eval_y = process_data(eval_data)
    model = Sequential()
    model.add(Dense(20, input_dim=len(train_X.columns)))
    model.add(Dense(10))
    model.add(Dense(1, activation="sigmoid"))
    model.compile(optimizer='rmsprop',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    
    model.fit(train_X, train_y, epochs=10, batch_size=32)
    pred_y = np.round(model.predict(test_X).flatten())
    print(np.unique(pred_y, return_counts=True))
    print("Training accuracy: ", np.sum(pred_y == test_y) / np.size(test_y))
    pred_y = np.round(model.predict(eval_X).flatten())
    print('Eval accuracy:', np.sum(pred_y == eval_y) / np.size(eval_y))

    model.save('model.h5')
Exemplo n.º 8
0
def train(args):

    train_inp, train_out = get_train_data()
    print "train data loaded"
    no_of_batches = (len(train_inp) + BATCH_SIZE - 1) / BATCH_SIZE

    test_inp, test_out = get_test_data()
    print "test data loaded"

    final_inp, final_out = get_final_data()
    print "final data loaded"

    data = tf.placeholder(tf.float32, [None, MAX_SEQ_LEN, WORD_DIM])
    target = tf.placeholder(tf.float32, [None, MAX_SEQ_LEN, NUM_CLASSES])
    dropout = tf.placeholder(tf.float32)
    model = Model(data, target, dropout, NUM_HIDDEN, NUM_LAYERS)
    maximum = 0

    with tf.Session() as sess:
        sess.run(tf.initialize_all_variables())
        saver = tf.train.Saver()
        if args.restore is not None:
            saver.restore(sess, 'model.ckpt')
            print "last model restored"

        for epoch in range(200):
            ptr = 0
            for _ in range(no_of_batches):
                batch_inp, batch_out = train_inp[
                    ptr:ptr + BATCH_SIZE], train_out[ptr:ptr + BATCH_SIZE]
                ptr += BATCH_SIZE
                sess.run(model.optimize, {
                    data: batch_inp,
                    target: batch_out,
                    dropout: 0.5
                })
            if epoch % 10 == 0:
                save_path = saver.save(sess, "model.ckpt")
                print("Model saved in file: %s" % save_path)
            pred = sess.run(model.prediction, {
                data: test_inp,
                target: test_out,
                dropout: 1
            })
            pred, length = sess.run(model.getpredf1, {
                data: test_inp,
                target: test_out,
                dropout: 1
            })
            print "Epoch:" + str(epoch), "TestA score,"
            m = f1(pred, test_out, length)
            if m > maximum:
                maximum = m
                save_path = saver.save(sess, "model_max.ckpt")
                print("Max Model saved in file: %s" % save_path)
                pred = sess.run(model.prediction, {
                    data: final_inp,
                    target: final_out,
                    dropout: 1
                })
                pred, length = sess.run(model.getpredf1, {
                    data: final_inp,
                    target: final_out,
                    dropout: 1
                })
                print "TestB score,"
                f1(pred, final_out, length)