示例#1
0
def main(name, num_epochs):
    train_stream = ServerDataStream(('features', 'labels'),
                                    produces_examples=False)

    valid_stream = ServerDataStream(('features', 'labels'),
                                    produces_examples=False,
                                    port=5558)

    X = tensor.ftensor4('images')
    y = tensor.imatrix('targets')

    prediction_train, prediction_test, params = get_model(X)

    loss = lasagne.objectives.binary_crossentropy(prediction_train, y)
    loss = loss.mean()

    prediction_01 = tensor.ge(prediction_train, numpy.float32(.5))
    f2 = f2_score(prediction_01, y)
    f2_diff = f2_score(prediction_train, y)
    loss = -f2_diff

    updates = lasagne.updates.nesterov_momentum(loss,
                                                params,
                                                learning_rate=1e-3,
                                                momentum=0.9)

    train_fn = function([X, y], loss, updates=updates)
    valid_fn = function([X, y], f2)

    best_valid_score = 0
    patience = 0
    all_train_loss = []
    iteration = 0
    for epoch in range(num_epochs):
        f2_valid_loss = []
        f2_train_loss = []
        for imgs, targets in train_stream.get_epoch_iterator():
            f2_train_loss.append(train_fn(imgs, targets))
            iteration += 1
        all_train_loss.append(f2_train_loss)
        train_score = -numpy.mean(numpy.asarray(f2_train_loss))
        print('Iteration %d' % (iteration, ))
        print('train score : {0}'.format(train_score))
        for imgs, targets in valid_stream.get_epoch_iterator():
            f2_valid_loss.append(valid_fn(imgs, targets))
        valid_score = numpy.mean(numpy.asarray(f2_valid_loss))
        print('valid score : {0}'.format(valid_score))
        if best_valid_score < valid_score:
            best_valid_score = valid_score
            patience = 0
            param_values = [p.get_value() for p in params]
            numpy.savez_compressed('%s.npz' % (name, ), param_values)
            pickle.dump(all_train_loss, open('%s.pkl' % (name, ), 'wb'))
        else:
            patience += 1
            if patience == 5:
                break
        print('patience : {0}'.format(patience))
        print('\n')
示例#2
0
 def value(self, th=0.3, bestf2=False):
     labels, preds = np.array(self.labels), np.array(self.preds)
     if not bestf2:
         return utils.f2_score(labels,
                               preds,
                               np.zeros(self.num_classes) + th,
                               num_classes=self.num_classes)
     else:
         th1 = utils.f2_opti_score(labels,
                                   preds,
                                   thresholds=np.arange(0, 1, 0.01),
                                   num_classes=self.num_classes)
         th2 = utils.f2_opti_score(labels,
                                   preds,
                                   thresholds=np.arange(1, 0, -0.01),
                                   num_classes=self.num_classes)
         self.best_th = (th1 + th2) / 2
         return utils.f2_score(labels,
                               preds,
                               self.best_th,
                               num_classes=self.num_classes)
示例#3
0
def loop(model,
         criterion,
         optimizer,
         data,
         target,
         device,
         loss_log,
         acc_log,
         end,
         train=True):

    data, target = data.to(device), target.to(device)
    output = model(data)
    loss = criterion(output, target)
    acc = f2_score(output, target)

    # record the loss and accuracy
    loss_log.update(loss.item(), data.size(0))
    acc_log.update(acc, data.size(0))

    if train:
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
示例#4
0
                                produces_examples=False)

valid_stream = ServerDataStream(('features', 'labels'),
                                produces_examples=False,
                                port=5558)

X = tensor.ftensor4('images')
y = tensor.imatrix('targets')

prediction_train, prediction_test, params = get_model(X)

loss = lasagne.objectives.binary_crossentropy(prediction_train, y)
loss = loss.mean()

prediction_01 = tensor.ge(prediction_train, numpy.float32(.5))
f2 = f2_score(prediction_01, y)
f2_diff = f2_score(prediction_train, y)
loss = -f2_diff

updates = lasagne.updates.nesterov_momentum(loss,
                                            params,
                                            learning_rate=1e-4,
                                            momentum=0.9)

train_fn = function([X, y], loss, updates=updates)
valid_fn = function([X, y], f2)

best_valid_score = 0
patience = 0
all_train_loss = []
for epoch in range(num_epochs):
示例#5
0
文件: gbdt.py 项目: yiyang186/planet
    k_now = 0
    for i_tr, i_vl in skf.split(X_train, y_train[:, i_c]):
        print(i_c, k_now)
        lgb_train = lgb.Dataset(X_train[i_tr],
                                y_train[i_tr, i_c],
                                weight=w[i_tr],
                                free_raw_data=False)
        lgb_eval = lgb.Dataset(X_train[i_vl],
                               y_train[i_vl, i_c],
                               reference=lgb_train,
                               weight=w[i_vl],
                               free_raw_data=False)
        bst = lgb.train(params,
                        lgb_train,
                        num_boost_round=2000,
                        valid_sets=lgb_eval,
                        early_stopping_rounds=30)
        p_tr[i_vl, i_c] = bst.predict(X_train[i_vl],
                                      num_iteration=bst.best_iteration)
        y_ts[:, i_c] = bst.predict(
            X_test, num_iteration=bst.best_iteration) / float(n_splits)
        k_now += 1

th1 = utils.f2_opti_score(y_train, p_tr, thresholds=np.arange(0, 1, 0.01))
th2 = utils.f2_opti_score(y_train, p_tr, thresholds=np.arange(1, 0, -0.01))
th = (th1 + th2) / 2.0
print(utils.f2_score(y_train, p_tr, th, num_classes=17))

# 保存
np.save('./pred/modelsiftgdbt_date719_pred_train.npy', p_tr)
np.save('./pred/modelsiftgdbt_date719_pred_test.npy', y_ts)
示例#6
0
    x_ts = np.c_[nnts[:, i_c], gbts[:, i_c], wnnts[:, i_c]]
    k_now = 0
    for i_tr, i_vl in skf.split(x_tr, y_tr[:, i_c]):
        lr = LogisticRegression(class_weight='balanced')
        lr.fit(x_tr[i_tr, :], y_tr[i_tr, i_c])
        p_tr[i_vl, i_c] = lr.predict_proba(x_tr[i_vl, :])[:,1]

    lr = LogisticRegression(class_weight='balanced')
    lr.fit(x_tr, y_tr[:, i_c])
    y_ts[:, i_c] = lr.predict_proba(x_ts)[:, 1]

# 求最佳阈值
th1 = utils.f2_opti_score(y_tr, p_tr, thresholds = np.arange(0, 1, 0.01))
th2 = utils.f2_opti_score(y_tr, p_tr, thresholds = np.arange(1, 0, -0.01))
th = (th1 + th2) / 2.0
utils.f2_score(y_tr, p_tr, th, num_classes=17)

# 输出结果
submit_df = utils.to_submit(y_ts, th, test_set, inv_label_map)
submit_df.to_csv('./submit/3model_stacking_date711_no2.csv', index=False)

submit_df1 = utils.to_submit_new(y_ts, th, test_set, inv_label_map)
submit_df1.to_csv('./submit/3model_stacking_date711_no2.csv', index=False)

# KNN stacking
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

# 搜索最佳k值
n_neighbors = []
for i_c in range(17):
示例#7
0
        if epoch > tolerance and best_model.nobetter > tolerance:
            print('Early Stop in Epoch: {}, Best Val-Loss: {:.6f}, Best F2: {:.6f}'.format(
                epoch+1, best_model.best_loss, best_model.best_f2.value(bestf2=True)))
            pred_tr[i_vl, :] = f2_vl.preds
            best_model.save('./model{}-date{}-kf{}.pth'.format(model_name, date, k_now+1))
                                                               
            tst_loader = utils.test_loader(test_file_all, transform_vl, batch_size, pic_size)
            estimator.model.load_state_dict(best_model.best_model)                                       
            pred_ts_temp = estimator.predict(tst_loader)
            pred_ts =  pred_ts_temp / float(n_splits)
            np.save('./model{}_date{}_pred_train_kf{}.npy'.format(model_name, date, k_now+1), pred_tr)
            np.save('./model{}_date{}_pred_test_kf{}.npy'.format(model_name, date, k_now+1), pred_ts_temp)
            break
        
        # 打印每一次迭代的训练验证成绩
        print('{} [{}/{}] {}s, Loss: {:.4f}, Val-Loss: {:.4f}, Best Val-Loss: {:.4f}, Val-F2: {:.2f}'.format(
            k_now+1, epoch+1, num_epoches, int(time() - time_st), loss_tr.avg, loss_vl.avg, 
            best_model.best_loss, f2_vl.value(0.3)))
        gc.collect()
    k_now += 1

# 序列化验证和预测结果,用于stacking
np.save('./model{}_date{}_pred_train.npy'.format(model_name, date), pred_tr)
np.save('./model{}_date{}_pred_test.npy'.format(model_name, date), pred_ts)

th1 = utils.f2_opti_score(y_all, pred_tr, thresholds = np.arange(0, 1, 0.01), num_classes=17)
th2 = utils.f2_opti_score(y_all, pred_tr, thresholds = np.arange(1, 0, -0.01), num_classes=17)
th = (th1 + th2) / 2.0
print(utils.f2_score(y_all, pred_tr, th))
submit_df = utils.to_submit(pred_ts, th, test_set, inv_label_map)
submit_df.to_csv('./submit/model{}_date{}_no{}.csv'.format(model_name, date, 1), index=False)