Пример #1
0
def submission():
    """
    Generate submission file for the trained models.
    """
    logging.info('Loading and compiling models...')
    model_systole = get_model()
    model_diastole = get_model()

    logging.info('Loading models weights...')
    model_systole.load_weights('../models/weights/weights_systole_best.hdf5')
    model_diastole.load_weights('../models/weights/weights_diastole_best.hdf5')

    logging.info('Loading validation data...')
    X, ids = load_validation_data()

    logging.info('Pre-processing images...')
    X = preprocess(X)

    batch_size = 32
    logging.info('Predicting on validation data...')
    pred_systole = model_systole.predict(X, batch_size=batch_size, verbose=1)
    pred_diastole = model_diastole.predict(X, batch_size=batch_size, verbose=1)

    # real predictions to CDF
    cdf_pred_systole = correct_cdf(pred_systole)
    cdf_pred_diastole = correct_cdf(pred_diastole)

    logging.info('Accumulating results...')
    sub_systole = accumulate_study_results(ids, cdf_pred_systole)
    sub_diastole = accumulate_study_results(ids, cdf_pred_diastole)

    # write to submission file
    logging.info('Writing submission to file...')
    fi = csv.reader(open('../input/sample_submission_validate.csv'))
    f = open('../submissions/submission_13.csv', 'w')
    fo = csv.writer(f, lineterminator='\n')
    fo.writerow(next(fi))
    for line in fi:
        idx = line[0]
        key, target = idx.split('_')
        key = int(key)
        out = [idx]
        if key in sub_systole:
            if target == 'Diastole':
                out.extend(list(sub_diastole[key][0]))
            else:
                out.extend(list(sub_systole[key][0]))
        else:
            logging.info('Miss {0}'.format(idx))
        fo.writerow(out)
    f.close()

    logging.info('Done.')
Пример #2
0
def train():
    """
    Training systole and diastole models.
    """
    logging.info('Loading and compiling models...')
    model_systole = get_model()
    model_diastole = get_model()

    logging.info('Loading training data...')
    X, y = load_train_data()

    logging.info('Pre-processing images...')
    X = preprocess(X)

    # split to training and test
    X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.15)

    nb_iter = 500
    epochs_per_iter = 1
    batch_size = 32
    calc_crps = 1  # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed)

    # remember min val. losses (best iterations), used as sigmas for submission
    min_val_loss_systole = sys.float_info.max
    min_val_loss_diastole = sys.float_info.max

    logging.info('-' * 50)
    logging.info('Training...')
    logging.info('-' * 50)

    for i in range(nb_iter):
        logging.info('-' * 50)
        logging.info('Iteration {0}/{1}'.format(i + 1, nb_iter))
        logging.info('-' * 50)

        logging.info('Augmenting images - rotations')
        X_train_aug = rotation_augmentation(X_train, 20)
        logging.info('Augmenting images - shifts')
        X_train_aug = shift_augmentation(X_train_aug, 0.1, 0.1)

        logging.info('Fitting systole model...')
        hist_systole = model_systole.fit(
            X_train_aug,
            real_to_cdf(y_train[:, 0]),
            shuffle=True,
            nb_epoch=epochs_per_iter,
            batch_size=batch_size,
            validation_data=(X_test, real_to_cdf(y_test[:, 0])))

        logging.info('Fitting diastole model...')
        hist_diastole = model_diastole.fit(
            X_train_aug,
            real_to_cdf(y_train[:, 1]),
            shuffle=True,
            nb_epoch=epochs_per_iter,
            batch_size=batch_size,
            validation_data=(X_test, real_to_cdf(y_test[:, 1])))

        # sigmas for predicted data, actually loss function values (RMSE)
        loss_systole = hist_systole.history['loss'][-1]
        loss_diastole = hist_diastole.history['loss'][-1]
        val_loss_systole = hist_systole.history['val_loss'][-1]
        val_loss_diastole = hist_diastole.history['val_loss'][-1]

        if calc_crps > 0 and i % calc_crps == 0:
            logging.info('Evaluating CRPS...')
            pred_systole = model_systole.predict(X_train,
                                                 batch_size=batch_size,
                                                 verbose=1)
            pred_diastole = model_diastole.predict(X_train,
                                                   batch_size=batch_size,
                                                   verbose=1)
            val_pred_systole = model_systole.predict(X_test,
                                                     batch_size=batch_size,
                                                     verbose=1)
            val_pred_diastole = model_diastole.predict(X_test,
                                                       batch_size=batch_size,
                                                       verbose=1)

            # CDF for train and test data (actually a step function)
            cdf_train = real_to_cdf(
                np.concatenate((y_train[:, 0], y_train[:, 1])))
            cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:,
                                                                        1])))

            # CDF for predicted data
            cdf_pred_systole = correct_cdf(pred_systole)
            cdf_pred_diastole = correct_cdf(pred_diastole)
            cdf_val_pred_systole = correct_cdf(val_pred_systole)
            cdf_val_pred_diastole = correct_cdf(val_pred_diastole)

            # evaluate CRPS on training data
            crps_train = crps(
                cdf_train, np.concatenate(
                    (cdf_pred_systole, cdf_pred_diastole)))
            logging.info('CRPS(train) = {0}'.format(crps_train))

            # evaluate CRPS on test data
            crps_test = crps(
                cdf_test,
                np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole)))
            logging.info('CRPS(test) = {0}'.format(crps_test))

        logging.info('Saving weights...')
        # save weights so they can be loaded later
        model_systole.save_weights('../models/weights/weights_systole.hdf5',
                                   overwrite=True)
        model_diastole.save_weights('../models/weights/weights_diastole.hdf5',
                                    overwrite=True)

        # for best (lowest) val losses, save weights
        if val_loss_systole < min_val_loss_systole:
            min_val_loss_systole = val_loss_systole
            model_systole.save_weights(
                '../models/weights/weights_systole_best.hdf5', overwrite=True)

        if val_loss_diastole < min_val_loss_diastole:
            min_val_loss_diastole = val_loss_diastole
            model_diastole.save_weights(
                '../models/weights/weights_diastole_best.hdf5', overwrite=True)

        # save best (lowest) val losses in file (to be later used for generating submission)
        with open('./logs/val_loss.txt', mode='w+') as f:
            f.write(str(min_val_loss_systole))
            f.write('\n')
            f.write(str(min_val_loss_diastole))
def train():
    """
    Training systole and diastole models.
    """
    logging.info('Loading and compiling models...')
    model_systole = get_model()
    model_diastole = get_model()

    logging.info('Loading training data...')
    X, y = load_train_data()

    logging.info('Pre-processing images...')
    X = preprocess(X)

    # split to training and test
    X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.15)

    nb_iter = 500
    epochs_per_iter = 1
    batch_size = 32
    calc_crps = 1  # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed)

    # remember min val. losses (best iterations), used as sigmas for submission
    min_val_loss_systole = sys.float_info.max
    min_val_loss_diastole = sys.float_info.max

    logging.info('-'*50)
    logging.info('Training...')
    logging.info('-'*50)

    for i in range(nb_iter):
        logging.info('-'*50)
        logging.info('Iteration {0}/{1}'.format(i + 1, nb_iter))
        logging.info('-'*50)

        logging.info('Augmenting images - rotations')
        X_train_aug = rotation_augmentation(X_train, 20)
        logging.info('Augmenting images - shifts')
        X_train_aug = shift_augmentation(X_train_aug, 0.1, 0.1)

        logging.info('Fitting systole model...')
        hist_systole = model_systole.fit(X_train_aug, real_to_ans(y_train[:, 0]), shuffle=True, nb_epoch=epochs_per_iter,
                                         batch_size=batch_size, validation_data=(X_test, real_to_ans(y_test[:, 0])))

        logging.info('Fitting diastole model...')
        hist_diastole = model_diastole.fit(X_train_aug, real_to_ans(y_train[:, 1]), shuffle=True, nb_epoch=epochs_per_iter,
                                           batch_size=batch_size, validation_data=(X_test, real_to_ans(y_test[:, 1])))

        # sigmas for predicted data, actually loss function values (RMSE)
        loss_systole = hist_systole.history['loss'][-1]
        loss_diastole = hist_diastole.history['loss'][-1]
        val_loss_systole = hist_systole.history['val_loss'][-1]
        val_loss_diastole = hist_diastole.history['val_loss'][-1]

        if calc_crps > 0 and i % calc_crps == 0:
            logging.info('Evaluating CRPS...')
            pred_systole = model_systole.predict(X_train, batch_size=batch_size, verbose=1)
            pred_diastole = model_diastole.predict(X_train, batch_size=batch_size, verbose=1)
            val_pred_systole = model_systole.predict(X_test, batch_size=batch_size, verbose=1)
            val_pred_diastole = model_diastole.predict(X_test, batch_size=batch_size, verbose=1)

            # CDF for train and test data (actually a step function)
            cdf_train = real_to_cdf(np.concatenate((y_train[:, 0], y_train[:, 1])))
            cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:, 1])))

            # CDF for predicted data
            cdf_pred_systole = correct_cdf(pred_systole)
            cdf_pred_diastole = correct_cdf(pred_diastole)
            cdf_val_pred_systole = correct_cdf(val_pred_systole)
            cdf_val_pred_diastole = correct_cdf(val_pred_diastole)

            # evaluate CRPS on training data
            crps_train = crps(cdf_train, np.concatenate((cdf_pred_systole, cdf_pred_diastole)))
            logging.info('CRPS(train) = {0}'.format(crps_train))

            # evaluate CRPS on test data
            crps_test = crps(cdf_test, np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole)))
            logging.info('CRPS(test) = {0}'.format(crps_test))

        logging.info('Saving weights...')
        # save weights so they can be loaded later
        model_systole.save_weights('../models/weights/weights_systole_1.hdf5', overwrite=True)
        model_diastole.save_weights('../models/weights/weights_diastole_1.hdf5', overwrite=True)

        # for best (lowest) val losses, save weights
        if val_loss_systole < min_val_loss_systole:
            min_val_loss_systole = val_loss_systole
            model_systole.save_weights('../models/weights/weights_systole_best_1.hdf5', overwrite=True)

        if val_loss_diastole < min_val_loss_diastole:
            min_val_loss_diastole = val_loss_diastole
            model_diastole.save_weights('../models/weights/weights_diastole_best_1.hdf5', overwrite=True)

        # save best (lowest) val losses in file (to be later used for generating submission)
        with open('./logs/val_loss.txt_1', mode='w+') as f:
            f.write(str(min_val_loss_systole))
            f.write('\n')
            f.write(str(min_val_loss_diastole))