Пример #1
0
def train(team_raw_data, opt):

    train_x = []
    train_y = []
    train_data = train_data_func()
    if opt.dataset == "all":
        test_data = test_data_func()
        train_data += test_data_func
    for x in train_data:
        home = x[1]
        away = x[0]
        home_vector = team_raw_data[home]
        away_vector = team_raw_data[away]

        away_state = x[2:4]
        home_state = x[4:6]

        input_vector = home_vector.tolist() + home_state + away_vector.tolist() + away_state

        train_x.append(input_vector)
        train_y.append(x[-1])

    __svm = svm.svm_model(train_x, train_y)
    
    if opt.dataset == "train":
        svm.save_model(__svm, 'train_svm_%d.pkl.pkl')
    elif opt.dataset == "all":
        svm.save_model(__svm, 'all_svm_%d.pkl.pkl')
    else:
        print("choose dataset error")
def train(team_raw_data, opt):

    train_x = []
    train_y = []
    train_data = train_data_func()
    if opt.dataset == "all":
        test_data = test_data_func()
        train_data += test_data_func

    bayes_model = bayes.Bayes()
    for x in train_data:
        home = x[1]
        away = x[0]
        home_vector = team_raw_data[home]
        away_vector = team_raw_data[away]
        away_state = np.array(x[2:4])
        home_state = np.array(x[4:6])

        input_vector = home_vector.tolist() + away_vector.tolist(
        ) + home_state.tolist() + away_state.tolist()
        input_vector = np.array(input_vector)
        train_x.append(input_vector)
        train_y.append(x[-1])

    bayes_model.train(train_x, train_y)
    bayes_model.save_model()
    test(team_raw_data, opt)
Пример #3
0
def test(team_raw_data):
    __test_data = test_data_func()
    svm_model = svm.load_model('svm_model_params.pkl')
    log_file = open('log/svm.log', 'w+')
    correct = 0
    wrong = 0
    for x in __test_data:
        home = x[1]
        away = x[0]
        home_vector = team_raw_data[home]
        away_vector = team_raw_data[away]

        away_state = x[2:4]
        home_state = x[4:6]

        input_vector = home_vector.tolist() + home_state + away_vector.tolist() + away_state

        pred = svm.predict(svm_model, input_vector)
        print(pred)
        pred_id = 1
        if pred < 0.5:
            pred_id = 0
        line = 'SVM: Pred:%s Real: %s Confidence=%s' % (pred_id, x[-1], pred)
        if pred_id == x[-1]:
            correct += 1
        else:
            wrong += 1
        print(line)
        log_file.write(line+'\n')

    print("Correct: %s Wrong: %s" % (correct, wrong))
    print("Acc=%s" % (float(correct)/float(correct+wrong)))
    log_file.close()
def train(team_raw_data, opt):

    train_x = []
    train_y = []
    train_data = train_data_func()
    if opt.dataset == "all":
        test_data = test_data_func()
        train_data += test_data
    __boost = boost.Boost()
    for x in train_data:
        home = x[1]
        away = x[0]
        home_vector = team_raw_data[home]
        away_vector = team_raw_data[away]

        away_state = x[2:4]
        home_state = x[4:6]
        input_vector = (home_vector -
                        away_vector).tolist() + home_state + away_state

        train_x.append(input_vector)
        train_y.append(x[-1])

    __boost.train(train_x, train_y)
    __boost.save_model()
def test(team_raw_data):
    test_x = []
    y = []
    train_data = test_data_func()
    __boost = boost.Boost()
    __boost.load_model()
    for x in train_data:
        home = x[1]
        away = x[0]
        home_vector = team_raw_data[home]
        away_vector = team_raw_data[away]

        away_state = x[2:4]
        home_state = x[4:6]

        input_vector = home_vector.tolist() + home_state + away_vector.tolist(
        ) + away_state
        y.append(x[-1])
        test_x.append(input_vector)
    pred_y = __boost.predict(test_x)
    auc_ = auc(y, pred_y)
    acc(y, pred_y)
    print("AUC:%s" % auc_)

    with open('log/xgboost_test.log', 'w+') as f:
        for i in range(len(pred_y)):
            f.write('%s,%s' % (y[i], pred_y[i]))
            f.write('\n')
Пример #6
0
def test(team_data, opt):
    data_provider = MatchData(1000)
    data_provider.roll_data()

    #testing_data = data_provider.get_test_data()
    testing_data = test_data_func()

    log_file = open('testing.log', 'w+')

    correct = 0
    wrong = 0
    y_label = []
    y_pred = []
    for i in range(len(testing_data)):

        away_id = testing_data[i][0]
        home_id = testing_data[i][1]

        away_current_state = testing_data[i][2:4]
        home_current_state = testing_data[i][4:6]
        score = [testing_data[i][7], testing_data[i][6]]
        away_vector = team_data[away_id]
        home_vector = team_data[home_id]
        result = [testing_data[i][8]]

        prob = predict(opt.model_name,
                       home_state=home_current_state,
                       home_vector=home_vector,
                       away_state=away_current_state,
                       away_vector=away_vector,
                       opt=opt)

        pred_win = np.argmax(prob.data.cpu().numpy())

        y_label.append(result)
        y_pred.append(prob.data.cpu().numpy()[1])

        if pred_win == result:
            correct += 1
            #line = 'Test: %s Correct! Confidence=%s' % (i, prob.data[pred_win])
        else:
            wrong += 1
            #line = 'Test: %s Wrong! Confidence=%s' % (i, prob.data.cpu().numpy().tolist())

        # print(line)
        # log_file.write(line+'\n')
    auc_score = auc(y_label, y_pred)
    print("TEST: auc score: %s" % auc_score)
    # log_file.close()

    print("Wrong: %s Correct: %s" % (wrong, correct))
def test(team_raw_data, opt):
    __test_data = test_data_func()
    bayes_model = bayes.Bayes()
    bayes_model.load_model()
    log_file = open('log/svm.log', 'w+')
    correct = 0
    wrong = 0
    probs = []
    y = []
    for x in __test_data:
        home = x[1]
        away = x[0]
        home_vector = team_raw_data[home]
        away_vector = team_raw_data[away]
        away_state = np.array(x[2:4])
        home_state = np.array(x[4:6])
        input_vector = home_vector.tolist() + away_vector.tolist(
        ) + home_state.tolist() + away_state.tolist()
        input_vector = np.array(input_vector)

        pred = bayes_model.predict([input_vector])[0]
        if pred[0] > pred[1]:
            pred_id = 0
        else:
            pred_id = 1
        line = 'Bayes: Pred:%s Real: %s Confidence=%s' % (pred_id, x[-1],
                                                          pred[pred_id])
        if pred_id == x[-1]:
            correct += 1
        else:
            wrong += 1
        print(line)
        probs.append(pred[pred_id])
        y.append(x[-1])
        log_file.write(line + '\n')

    print("Correct: %s Wrong: %s" % (correct, wrong))
    print("Acc=%s" % (float(correct) / float(correct + wrong)))
    print("AUC=%s" % (auc(y, probs)))
    log_file.close()
Пример #8
0
def test(team_data, opt):

    testing_data = test_data_func()

    correct = 0
    wrong = 0
    y_label = []
    y_pred = []
    for i in range(len(testing_data)):

        away_id = testing_data[i][0]
        home_id = testing_data[i][1]

        away_current_state = testing_data[i][2:4]
        home_current_state = testing_data[i][4:6]
        score = [testing_data[i][7], testing_data[i][6]]
        away_vector = team_data[away_id]
        home_vector = team_data[home_id]
        result = [testing_data[i][8]]

        prob = predict(opt.model_name,
                       home_state=home_current_state,
                       home_vector=home_vector,
                       away_state=away_current_state,
                       away_vector=away_vector,
                       opt=opt)

        pred_win = np.argmax(prob.data.cpu().numpy())

        y_label.append(result)
        y_pred.append(prob.data.cpu().numpy()[1])

        if pred_win == result:
            correct += 1
        else:
            wrong += 1
    auc_score = auc(y_label, y_pred)
    print("TEST: auc score: %s" % auc_score)

    print("Wrong: %s Correct: %s" % (wrong, correct))
Пример #9
0
def train_dnn_batch(epoches, team_data, opt):
    """
    train mini batch dnn here
    :return: 
    :rtype: 
    """

    batch_size = opt.batch_size
    LEARNING_RATE = 0.0005
    dnn = SimDNN(TEAM_VECTOR_SIZE)
    if opt.cuda == 1:
        dnn.cuda()
    dnn_optimizer = optimizer.Adamax(dnn.parameters(), lr=LEARNING_RATE)
    prob_criterion = torch.nn.CrossEntropyLoss()
    score_criterion = torch.nn.MSELoss()
    train_data = train_data_func()
    if opt.dataset == "all":
        test_data = test_data_func()
        train_data += test_data
    print("Starting to train with DNN")
    for epoch in range(epoches):
        random.shuffle(train_data)
        if epoch == 15:
            LEARNING_RATE = LEARNING_RATE / 5.0
            for param_group in dnn_optimizer.param_groups:
                param_group['lr'] = LEARNING_RATE
        if epoch == 25:
            LEARNING_RATE = LEARNING_RATE / 10.0
            for param_group in dnn_optimizer.param_groups:
                param_group['lr'] = LEARNING_RATE
        for i in range(0, len(train_data) - batch_size, batch_size):
            batch_home_current_state = Variable(
                torch.zeros((batch_size, CURRENT_COMP_VECTOR_SIZE)))
            batch_away_current_state = Variable(
                torch.zeros((batch_size, CURRENT_COMP_VECTOR_SIZE)))

            batch_home_vector = Variable(
                torch.zeros((batch_size, TEAM_VECTOR_SIZE)))
            batch_away_vector = Variable(
                torch.zeros((batch_size, TEAM_VECTOR_SIZE)))

            batch_score = Variable(torch.zeros((batch_size, 2)))
            batch_result = []

            for p in range(batch_size):
                away_id = train_data[i + p][0]
                home_id = train_data[i + p][1]

                away_current_state = train_data[i + p][2:4]
                home_current_state = train_data[i + p][4:6]
                score = [train_data[i + p][7], train_data[i + p][6]]
                away_vector = team_data[away_id]
                home_vector = team_data[home_id]
                result = train_data[i + p][8]

                batch_home_current_state[p] = Variable(
                    torch.FloatTensor(home_current_state))
                batch_away_current_state[p] = Variable(
                    torch.FloatTensor(away_current_state))
                batch_away_vector[p] = Variable(torch.FloatTensor(away_vector))
                batch_home_vector[p] = Variable(torch.FloatTensor(home_vector))
                batch_score[p] = Variable(torch.FloatTensor(score))
                batch_result.append(result)
            batch_result = Variable(torch.LongTensor(batch_result))

            if opt.cuda == 1:
                # Use GPU
                batch_home_current_state = batch_home_current_state.cuda()
                batch_away_current_state = batch_home_current_state.cuda()
                batch_home_vector = batch_home_vector.cuda()
                batch_away_vector = batch_away_vector.cuda()
                batch_result = batch_result.cuda()
                batch_score = batch_score.cuda()

            output_prob = dnn.forward(home_vector=batch_home_vector,
                                      home_state=batch_home_current_state,
                                      away_vector=batch_away_vector,
                                      away_state=batch_away_current_state)
            loss = prob_criterion(output_prob, batch_result)

            dnn_optimizer.zero_grad()
            loss.backward()
            dnn_optimizer.step()

            if i % 100 == 0:
                print("Epoches: %s Sample: %s Loss: %s" %
                      (epoch, i + 1, loss.data[0]))

        # Test after training
        if opt.dataset == "train":
            save_model(dnn, 'train_dnn_%d.pkl' % epoch)
            opt.model_name = 'train_dnn_%d.pkl' % epoch
            test(team_data, opt)
        elif opt.dataset == "all":
            save_model(dnn, 'all_dnn_%d.pkl' % epoch)
        else:
            print("dataset error")
Пример #10
0
def train_dnn(epoches, team_data, opt):
    """
    train dnn without Mini Batch here
    :return: 
    :rtype: 
    """
    LEARNING_RATE = 0.0001
    dnn = SimDNN(TEAM_VECTOR_SIZE)
    if opt.cuda == 1:
        dnn.cuda()
    dnn_optimizer = optimizer.RMSprop(dnn.parameters(), lr=LEARNING_RATE)
    prob_criterion = torch.nn.CrossEntropyLoss()
    score_criterion = torch.nn.MSELoss()
    train_data = train_data_func()
    if opt.dataset == "all":
        test_data = test_data_func()
        train_data += test_data

    print("Starting to train with DNN")
    for epoch in range(epoches):
        random.shuffle(train_data)
        if epoch == 35:
            LEARNING_RATE = LEARNING_RATE / 5.0
            for param_group in dnn_optimizer.param_groups:
                param_group['lr'] = LEARNING_RATE
        if epoch == 100:
            LEARNING_RATE = LEARNING_RATE / 10.0
            for param_group in dnn_optimizer.param_groups:
                param_group['lr'] = LEARNING_RATE
        for i in range(len(train_data)):
            away_id = train_data[i][0]
            home_id = train_data[i][1]

            away_current_state = train_data[i][2:4]
            home_current_state = train_data[i][4:6]
            score = [train_data[i][7], train_data[i][6]]
            away_vector = team_data[away_id]
            home_vector = team_data[home_id]
            result = [train_data[i][8]]

            if opt.cuda == 1:
                home_current_state = Variable(
                    torch.FloatTensor(home_current_state).cuda())
                away_current_state = Variable(
                    torch.FloatTensor(away_current_state).cuda())

                away_vector = Variable(torch.FloatTensor(away_vector).cuda())
                home_vector = Variable(torch.FloatTensor(home_vector).cuda())
                prob = Variable(torch.LongTensor(result).cuda())
                score = Variable(torch.FloatTensor(score).cuda())
            else:
                home_current_state = Variable(
                    torch.FloatTensor(home_current_state))
                away_current_state = Variable(
                    torch.FloatTensor(away_current_state))

                away_vector = Variable(torch.FloatTensor(away_vector))
                home_vector = Variable(torch.FloatTensor(home_vector))
                prob = Variable(torch.LongTensor(result))

                score = Variable(torch.FloatTensor(score))

            home_current_state = home_current_state.unsqueeze(0)
            away_current_state = away_current_state.unsqueeze(0)
            home_vector = home_vector.unsqueeze(0)
            away_vector = away_vector.unsqueeze(0)

            output_prob = dnn.forward(home_vector=home_vector,
                                      home_state=home_current_state,
                                      away_vector=away_vector,
                                      away_state=away_current_state)

            loss = prob_criterion(output_prob, prob)

            dnn_optimizer.zero_grad()
            loss.backward()
            dnn_optimizer.step()

            if i % 100 == 0:
                print("Epoches: %s Sample: %s Loss: %s" %
                      (epoch, i + 1, loss.data[0]))

        if opt.dataset == "train":
            save_model(dnn, 'train_dnn_%d.pkl' % epoch)
            opt.model_name = 'train_dnn_%d.pkl' % epoch
            test(team_data, opt)
        elif opt.dataset == "all":
            save_model(dnn, 'all_dnn_%d.pkl' % epoch)
        else:
            print("dataset error")