def train(team_raw_data, opt): train_x = [] train_y = [] train_data = train_data_func() if opt.dataset == "all": test_data = test_data_func() train_data += test_data_func for x in train_data: home = x[1] away = x[0] home_vector = team_raw_data[home] away_vector = team_raw_data[away] away_state = x[2:4] home_state = x[4:6] input_vector = home_vector.tolist() + home_state + away_vector.tolist() + away_state train_x.append(input_vector) train_y.append(x[-1]) __svm = svm.svm_model(train_x, train_y) if opt.dataset == "train": svm.save_model(__svm, 'train_svm_%d.pkl.pkl') elif opt.dataset == "all": svm.save_model(__svm, 'all_svm_%d.pkl.pkl') else: print("choose dataset error")
def train(team_raw_data, opt): train_x = [] train_y = [] train_data = train_data_func() if opt.dataset == "all": test_data = test_data_func() train_data += test_data_func bayes_model = bayes.Bayes() for x in train_data: home = x[1] away = x[0] home_vector = team_raw_data[home] away_vector = team_raw_data[away] away_state = np.array(x[2:4]) home_state = np.array(x[4:6]) input_vector = home_vector.tolist() + away_vector.tolist( ) + home_state.tolist() + away_state.tolist() input_vector = np.array(input_vector) train_x.append(input_vector) train_y.append(x[-1]) bayes_model.train(train_x, train_y) bayes_model.save_model() test(team_raw_data, opt)
def test(team_raw_data): __test_data = test_data_func() svm_model = svm.load_model('svm_model_params.pkl') log_file = open('log/svm.log', 'w+') correct = 0 wrong = 0 for x in __test_data: home = x[1] away = x[0] home_vector = team_raw_data[home] away_vector = team_raw_data[away] away_state = x[2:4] home_state = x[4:6] input_vector = home_vector.tolist() + home_state + away_vector.tolist() + away_state pred = svm.predict(svm_model, input_vector) print(pred) pred_id = 1 if pred < 0.5: pred_id = 0 line = 'SVM: Pred:%s Real: %s Confidence=%s' % (pred_id, x[-1], pred) if pred_id == x[-1]: correct += 1 else: wrong += 1 print(line) log_file.write(line+'\n') print("Correct: %s Wrong: %s" % (correct, wrong)) print("Acc=%s" % (float(correct)/float(correct+wrong))) log_file.close()
def train(team_raw_data, opt): train_x = [] train_y = [] train_data = train_data_func() if opt.dataset == "all": test_data = test_data_func() train_data += test_data __boost = boost.Boost() for x in train_data: home = x[1] away = x[0] home_vector = team_raw_data[home] away_vector = team_raw_data[away] away_state = x[2:4] home_state = x[4:6] input_vector = (home_vector - away_vector).tolist() + home_state + away_state train_x.append(input_vector) train_y.append(x[-1]) __boost.train(train_x, train_y) __boost.save_model()
def test(team_raw_data): test_x = [] y = [] train_data = test_data_func() __boost = boost.Boost() __boost.load_model() for x in train_data: home = x[1] away = x[0] home_vector = team_raw_data[home] away_vector = team_raw_data[away] away_state = x[2:4] home_state = x[4:6] input_vector = home_vector.tolist() + home_state + away_vector.tolist( ) + away_state y.append(x[-1]) test_x.append(input_vector) pred_y = __boost.predict(test_x) auc_ = auc(y, pred_y) acc(y, pred_y) print("AUC:%s" % auc_) with open('log/xgboost_test.log', 'w+') as f: for i in range(len(pred_y)): f.write('%s,%s' % (y[i], pred_y[i])) f.write('\n')
def test(team_data, opt): data_provider = MatchData(1000) data_provider.roll_data() #testing_data = data_provider.get_test_data() testing_data = test_data_func() log_file = open('testing.log', 'w+') correct = 0 wrong = 0 y_label = [] y_pred = [] for i in range(len(testing_data)): away_id = testing_data[i][0] home_id = testing_data[i][1] away_current_state = testing_data[i][2:4] home_current_state = testing_data[i][4:6] score = [testing_data[i][7], testing_data[i][6]] away_vector = team_data[away_id] home_vector = team_data[home_id] result = [testing_data[i][8]] prob = predict(opt.model_name, home_state=home_current_state, home_vector=home_vector, away_state=away_current_state, away_vector=away_vector, opt=opt) pred_win = np.argmax(prob.data.cpu().numpy()) y_label.append(result) y_pred.append(prob.data.cpu().numpy()[1]) if pred_win == result: correct += 1 #line = 'Test: %s Correct! Confidence=%s' % (i, prob.data[pred_win]) else: wrong += 1 #line = 'Test: %s Wrong! Confidence=%s' % (i, prob.data.cpu().numpy().tolist()) # print(line) # log_file.write(line+'\n') auc_score = auc(y_label, y_pred) print("TEST: auc score: %s" % auc_score) # log_file.close() print("Wrong: %s Correct: %s" % (wrong, correct))
def test(team_raw_data, opt): __test_data = test_data_func() bayes_model = bayes.Bayes() bayes_model.load_model() log_file = open('log/svm.log', 'w+') correct = 0 wrong = 0 probs = [] y = [] for x in __test_data: home = x[1] away = x[0] home_vector = team_raw_data[home] away_vector = team_raw_data[away] away_state = np.array(x[2:4]) home_state = np.array(x[4:6]) input_vector = home_vector.tolist() + away_vector.tolist( ) + home_state.tolist() + away_state.tolist() input_vector = np.array(input_vector) pred = bayes_model.predict([input_vector])[0] if pred[0] > pred[1]: pred_id = 0 else: pred_id = 1 line = 'Bayes: Pred:%s Real: %s Confidence=%s' % (pred_id, x[-1], pred[pred_id]) if pred_id == x[-1]: correct += 1 else: wrong += 1 print(line) probs.append(pred[pred_id]) y.append(x[-1]) log_file.write(line + '\n') print("Correct: %s Wrong: %s" % (correct, wrong)) print("Acc=%s" % (float(correct) / float(correct + wrong))) print("AUC=%s" % (auc(y, probs))) log_file.close()
def test(team_data, opt): testing_data = test_data_func() correct = 0 wrong = 0 y_label = [] y_pred = [] for i in range(len(testing_data)): away_id = testing_data[i][0] home_id = testing_data[i][1] away_current_state = testing_data[i][2:4] home_current_state = testing_data[i][4:6] score = [testing_data[i][7], testing_data[i][6]] away_vector = team_data[away_id] home_vector = team_data[home_id] result = [testing_data[i][8]] prob = predict(opt.model_name, home_state=home_current_state, home_vector=home_vector, away_state=away_current_state, away_vector=away_vector, opt=opt) pred_win = np.argmax(prob.data.cpu().numpy()) y_label.append(result) y_pred.append(prob.data.cpu().numpy()[1]) if pred_win == result: correct += 1 else: wrong += 1 auc_score = auc(y_label, y_pred) print("TEST: auc score: %s" % auc_score) print("Wrong: %s Correct: %s" % (wrong, correct))
def train_dnn_batch(epoches, team_data, opt): """ train mini batch dnn here :return: :rtype: """ batch_size = opt.batch_size LEARNING_RATE = 0.0005 dnn = SimDNN(TEAM_VECTOR_SIZE) if opt.cuda == 1: dnn.cuda() dnn_optimizer = optimizer.Adamax(dnn.parameters(), lr=LEARNING_RATE) prob_criterion = torch.nn.CrossEntropyLoss() score_criterion = torch.nn.MSELoss() train_data = train_data_func() if opt.dataset == "all": test_data = test_data_func() train_data += test_data print("Starting to train with DNN") for epoch in range(epoches): random.shuffle(train_data) if epoch == 15: LEARNING_RATE = LEARNING_RATE / 5.0 for param_group in dnn_optimizer.param_groups: param_group['lr'] = LEARNING_RATE if epoch == 25: LEARNING_RATE = LEARNING_RATE / 10.0 for param_group in dnn_optimizer.param_groups: param_group['lr'] = LEARNING_RATE for i in range(0, len(train_data) - batch_size, batch_size): batch_home_current_state = Variable( torch.zeros((batch_size, CURRENT_COMP_VECTOR_SIZE))) batch_away_current_state = Variable( torch.zeros((batch_size, CURRENT_COMP_VECTOR_SIZE))) batch_home_vector = Variable( torch.zeros((batch_size, TEAM_VECTOR_SIZE))) batch_away_vector = Variable( torch.zeros((batch_size, TEAM_VECTOR_SIZE))) batch_score = Variable(torch.zeros((batch_size, 2))) batch_result = [] for p in range(batch_size): away_id = train_data[i + p][0] home_id = train_data[i + p][1] away_current_state = train_data[i + p][2:4] home_current_state = train_data[i + p][4:6] score = [train_data[i + p][7], train_data[i + p][6]] away_vector = team_data[away_id] home_vector = team_data[home_id] result = train_data[i + p][8] batch_home_current_state[p] = Variable( torch.FloatTensor(home_current_state)) batch_away_current_state[p] = Variable( torch.FloatTensor(away_current_state)) batch_away_vector[p] = Variable(torch.FloatTensor(away_vector)) batch_home_vector[p] = Variable(torch.FloatTensor(home_vector)) batch_score[p] = Variable(torch.FloatTensor(score)) batch_result.append(result) batch_result = Variable(torch.LongTensor(batch_result)) if opt.cuda == 1: # Use GPU batch_home_current_state = batch_home_current_state.cuda() batch_away_current_state = batch_home_current_state.cuda() batch_home_vector = batch_home_vector.cuda() batch_away_vector = batch_away_vector.cuda() batch_result = batch_result.cuda() batch_score = batch_score.cuda() output_prob = dnn.forward(home_vector=batch_home_vector, home_state=batch_home_current_state, away_vector=batch_away_vector, away_state=batch_away_current_state) loss = prob_criterion(output_prob, batch_result) dnn_optimizer.zero_grad() loss.backward() dnn_optimizer.step() if i % 100 == 0: print("Epoches: %s Sample: %s Loss: %s" % (epoch, i + 1, loss.data[0])) # Test after training if opt.dataset == "train": save_model(dnn, 'train_dnn_%d.pkl' % epoch) opt.model_name = 'train_dnn_%d.pkl' % epoch test(team_data, opt) elif opt.dataset == "all": save_model(dnn, 'all_dnn_%d.pkl' % epoch) else: print("dataset error")
def train_dnn(epoches, team_data, opt): """ train dnn without Mini Batch here :return: :rtype: """ LEARNING_RATE = 0.0001 dnn = SimDNN(TEAM_VECTOR_SIZE) if opt.cuda == 1: dnn.cuda() dnn_optimizer = optimizer.RMSprop(dnn.parameters(), lr=LEARNING_RATE) prob_criterion = torch.nn.CrossEntropyLoss() score_criterion = torch.nn.MSELoss() train_data = train_data_func() if opt.dataset == "all": test_data = test_data_func() train_data += test_data print("Starting to train with DNN") for epoch in range(epoches): random.shuffle(train_data) if epoch == 35: LEARNING_RATE = LEARNING_RATE / 5.0 for param_group in dnn_optimizer.param_groups: param_group['lr'] = LEARNING_RATE if epoch == 100: LEARNING_RATE = LEARNING_RATE / 10.0 for param_group in dnn_optimizer.param_groups: param_group['lr'] = LEARNING_RATE for i in range(len(train_data)): away_id = train_data[i][0] home_id = train_data[i][1] away_current_state = train_data[i][2:4] home_current_state = train_data[i][4:6] score = [train_data[i][7], train_data[i][6]] away_vector = team_data[away_id] home_vector = team_data[home_id] result = [train_data[i][8]] if opt.cuda == 1: home_current_state = Variable( torch.FloatTensor(home_current_state).cuda()) away_current_state = Variable( torch.FloatTensor(away_current_state).cuda()) away_vector = Variable(torch.FloatTensor(away_vector).cuda()) home_vector = Variable(torch.FloatTensor(home_vector).cuda()) prob = Variable(torch.LongTensor(result).cuda()) score = Variable(torch.FloatTensor(score).cuda()) else: home_current_state = Variable( torch.FloatTensor(home_current_state)) away_current_state = Variable( torch.FloatTensor(away_current_state)) away_vector = Variable(torch.FloatTensor(away_vector)) home_vector = Variable(torch.FloatTensor(home_vector)) prob = Variable(torch.LongTensor(result)) score = Variable(torch.FloatTensor(score)) home_current_state = home_current_state.unsqueeze(0) away_current_state = away_current_state.unsqueeze(0) home_vector = home_vector.unsqueeze(0) away_vector = away_vector.unsqueeze(0) output_prob = dnn.forward(home_vector=home_vector, home_state=home_current_state, away_vector=away_vector, away_state=away_current_state) loss = prob_criterion(output_prob, prob) dnn_optimizer.zero_grad() loss.backward() dnn_optimizer.step() if i % 100 == 0: print("Epoches: %s Sample: %s Loss: %s" % (epoch, i + 1, loss.data[0])) if opt.dataset == "train": save_model(dnn, 'train_dnn_%d.pkl' % epoch) opt.model_name = 'train_dnn_%d.pkl' % epoch test(team_data, opt) elif opt.dataset == "all": save_model(dnn, 'all_dnn_%d.pkl' % epoch) else: print("dataset error")