Пример #1
0
        if args.partition == 'none':
            los_ret = metrics.print_metrics_regression(los_y_true, los_pred)

    # pheno
    if args.pheno_C > 0:
        print("\n =================== phenotype ==================")
        pheno_pred = np.array(pheno_pred)
        pheno_ret = metrics.print_metrics_multilabel(pheno_y_true, pheno_pred)

    print("Saving the predictions in test_predictions/task directories ...")

    # ihm
    ihm_path = os.path.join(
        os.path.join(args.output_dir, "test_predictions/ihm",
                     os.path.basename(args.load_state)) + ".csv")
    ihm_utils.save_results(ihm_names, ihm_pred, ihm_y_true, ihm_path)

    # decomp
    decomp_path = os.path.join(
        os.path.join(args.output_dir, "test_predictions/decomp",
                     os.path.basename(args.load_state)) + ".csv")
    decomp_utils.save_results(decomp_names, decomp_ts, decomp_pred,
                              decomp_y_true, decomp_path)

    # los
    los_path = os.path.join(
        os.path.join(args.output_dir, "test_predictions/los",
                     os.path.basename(args.load_state)) + ".csv")
    los_utils.save_results(los_names, los_ts, los_pred, los_y_true, los_path)

    # pheno
Пример #2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--period',
                        type=str,
                        default='all',
                        help='specifies which period extract features from',
                        choices=[
                            'first4days', 'first8days', 'last12hours',
                            'first25percent', 'first50percent', 'all'
                        ])
    parser.add_argument('--features',
                        type=str,
                        default='all',
                        help='specifies what features to extract',
                        choices=['all', 'len', 'all_but_len'])
    parser.add_argument('--data',
                        type=str,
                        help='Path to the data of in-hospital mortality task',
                        default=os.path.join(
                            os.path.dirname(__file__),
                            '../../../data/in-hospital-mortality/'))
    parser.add_argument(
        '--output_dir',
        type=str,
        help='Directory relative which all output files are stored',
        default='.')
    args = parser.parse_args()
    print(args)

    train_reader = InHospitalMortalityReader(
        dataset_dir=os.path.join(args.data, 'train'),
        listfile=os.path.join(args.data, 'train_listfile.csv'),
        period_length=48.0)

    val_reader = InHospitalMortalityReader(
        dataset_dir=os.path.join(args.data, 'train'),
        listfile=os.path.join(args.data, 'val_listfile.csv'),
        period_length=48.0)

    test_reader = InHospitalMortalityReader(
        dataset_dir=os.path.join(args.data, 'test'),
        listfile=os.path.join(args.data, 'test_listfile.csv'),
        period_length=48.0)

    #print("shape->",train_reader.read_example(100)['X'].shape)

    print('Reading data and extracting features ...')
    (train_X, train_y,
     train_names) = read_and_extract_features(train_reader, args.period,
                                              args.features)
    (val_X, val_y,
     val_names) = read_and_extract_features(val_reader, args.period,
                                            args.features)
    (test_X, test_y,
     test_names) = read_and_extract_features(test_reader, args.period,
                                             args.features)
    print('  train data shape = {}'.format(train_X.shape))
    print('  validation data shape = {}'.format(val_X.shape))
    print('  test data shape = {}'.format(test_X.shape))

    #print("feature sample->", train_X[11])

    print('Imputing missing values ...')
    imputer = Imputer(missing_values=np.nan,
                      strategy='mean',
                      axis=0,
                      verbose=0,
                      copy=True)
    imputer.fit(train_X)
    train_X = np.array(imputer.transform(train_X), dtype=np.float32)
    val_X = np.array(imputer.transform(val_X), dtype=np.float32)
    test_X = np.array(imputer.transform(test_X), dtype=np.float32)

    print('Normalizing the data to have zero mean and unit variance ...')
    scaler = StandardScaler()
    scaler.fit(train_X)
    train_X = scaler.transform(train_X)
    val_X = scaler.transform(val_X)
    test_X = scaler.transform(test_X)

    file_name = 'xgboost_{}.{}.'.format(args.period, args.features)

    xgreg = xgb.XGBRegressor(colsample_bytree=0.4,
                             gamma=0,
                             learning_rate=0.07,
                             max_depth=3,
                             min_child_weight=1.5,
                             n_estimators=10000,
                             reg_alpha=0.75,
                             reg_lambda=0.45,
                             subsample=0.6,
                             seed=42)
    xgreg.fit(train_X, train_y)

    result_dir = os.path.join(args.output_dir, 'results')
    common_utils.create_directory(result_dir)

    with open(os.path.join(result_dir, 'train_{}.json'.format(file_name)),
              'w') as res_file:
        ret = print_metrics_binary(train_y, xgreg.predict(train_X))
        ret = {k: float(v) for k, v in ret.items()}
        json.dump(ret, res_file)

    with open(os.path.join(result_dir, 'val_{}.json'.format(file_name)),
              'w') as res_file:
        ret = print_metrics_binary(val_y, xgreg.predict(val_X))
        ret = {k: float(v) for k, v in ret.items()}
        json.dump(ret, res_file)

    prediction = xgreg.predict(test_X)

    with open(os.path.join(result_dir, 'test_{}.json'.format(file_name)),
              'w') as res_file:
        ret = print_metrics_binary(test_y, prediction)
        ret = {k: float(v) for k, v in ret.items()}
        json.dump(ret, res_file)

    save_results(
        test_names, prediction, test_y,
        os.path.join(args.output_dir, 'predictions', file_name + '.csv'))
Пример #3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--C',
                        type=float,
                        default=1.0,
                        help='inverse of L1 / L2 regularization')
    parser.add_argument('--l1', dest='l2', action='store_false')
    parser.add_argument('--l2', dest='l2', action='store_true')
    parser.set_defaults(l2=True)
    parser.add_argument('--period',
                        type=str,
                        default='all',
                        help='specifies which period extract features from',
                        choices=[
                            'first4days', 'first8days', 'last12hours',
                            'first25percent', 'first50percent', 'all'
                        ])
    parser.add_argument('--features',
                        type=str,
                        default='all',
                        help='specifies what features to extract',
                        choices=['all', 'len', 'all_but_len'])
    parser.add_argument('--data',
                        type=str,
                        help='Path to the data of in-hospital mortality task',
                        default=os.path.join(
                            os.path.dirname(__file__),
                            '../../../data/in-hospital-mortality/'))
    parser.add_argument(
        '--output_dir',
        type=str,
        help='Directory relative which all output files are stored',
        default='.')
    args = parser.parse_args()
    print(args)

    train_reader = InHospitalMortalityReader(
        dataset_dir=os.path.join(args.data, 'train'),
        listfile=os.path.join(args.data, 'train_listfile.csv'),
        period_length=48.0)

    val_reader = InHospitalMortalityReader(
        dataset_dir=os.path.join(args.data, 'train'),
        listfile=os.path.join(args.data, 'val_listfile.csv'),
        period_length=48.0)

    test_reader = InHospitalMortalityReader(
        dataset_dir=os.path.join(args.data, 'test'),
        listfile=os.path.join(args.data, 'test_listfile.csv'),
        period_length=48.0)

    print('Reading data and extracting features ...')
    (train_X, train_y,
     train_names) = read_and_extract_features(train_reader, args.period,
                                              args.features)
    (val_X, val_y,
     val_names) = read_and_extract_features(val_reader, args.period,
                                            args.features)
    (test_X, test_y,
     test_names) = read_and_extract_features(test_reader, args.period,
                                             args.features)
    print('  train data shape = {}'.format(train_X.shape))
    print('  validation data shape = {}'.format(val_X.shape))
    print('  test data shape = {}'.format(test_X.shape))

    print('Imputing missing values ...')
    imputer = Imputer(missing_values=np.nan,
                      strategy='mean',
                      axis=0,
                      verbose=0,
                      copy=True)
    imputer.fit(train_X)
    train_X = np.array(imputer.transform(train_X), dtype=np.float32)
    val_X = np.array(imputer.transform(val_X), dtype=np.float32)
    test_X = np.array(imputer.transform(test_X), dtype=np.float32)

    print('Normalizing the data to have zero mean and unit variance ...')
    scaler = StandardScaler()
    scaler.fit(train_X)
    train_X = scaler.transform(train_X)
    val_X = scaler.transform(val_X)
    test_X = scaler.transform(test_X)

    penalty = ('l2' if args.l2 else 'l1')
    file_name = '{}.{}.{}.C{}'.format(args.period, args.features, penalty,
                                      args.C)

    logreg = LogisticRegression(penalty=penalty, C=args.C, random_state=42)
    logreg.fit(train_X, train_y)

    result_dir = os.path.join(args.output_dir, 'results')
    common_utils.create_directory(result_dir)

    with open(os.path.join(result_dir, 'train_{}.json'.format(file_name)),
              'w') as res_file:
        ret = print_metrics_binary(train_y, logreg.predict_proba(train_X))
        ret = {k: float(v) for k, v in ret.items()}
        json.dump(ret, res_file)

    with open(os.path.join(result_dir, 'val_{}.json'.format(file_name)),
              'w') as res_file:
        ret = print_metrics_binary(val_y, logreg.predict_proba(val_X))
        ret = {k: float(v) for k, v in ret.items()}
        json.dump(ret, res_file)

    time_start = time.time()
    prediction = logreg.predict_proba(test_X)[:, 1]
    time_elapse = time.time() - time_start
    print("Processing time on Test set :", time_elapse, " s")

    with open(os.path.join(result_dir, 'test_{}.json'.format(file_name)),
              'w') as res_file:
        ret = print_metrics_binary(test_y, prediction)
        ret = {k: float(v) for k, v in ret.items()}
        json.dump(ret, res_file)

    save_results(
        test_names, prediction, test_y,
        os.path.join(args.output_dir, 'predictions', file_name + '.csv'))