Пример #1
0
                    time_elapsed_filename = model_filename[:-3] + '_time.txt'
                    if not os.path.isfile(model_filename):
                        # print('*** Fitting with hyperparam:', hyperparam,
                        #       '-- cross val index:', cross_val_idx, flush=True)
                        surv_model.fit(
                            fold_X_train_std,
                            (fold_y_train[:, 0], fold_y_train[:, 1]),
                            batch_size,
                            n_epochs,
                            verbose=False)
                        surv_model.compute_baseline_hazards()
                        elapsed = time.time() - tic
                        print('Time elapsed: %f second(s)' % elapsed)
                        np.savetxt(time_elapsed_filename,
                                   np.array(elapsed).reshape(1, -1))
                        surv_model.save_net(model_filename)
                    else:
                        # print('*** Loading ***', flush=True)
                        surv_model.load_net(model_filename)
                        elapsed = float(np.loadtxt(time_elapsed_filename))
                        print('Time elapsed (from previous fitting): ' +
                              '%f second(s)' % elapsed)

                    surv_df = surv_model.predict_surv_df(fold_X_val_std)
                    ev = EvalSurv(surv_df,
                                  fold_y_val[:, 0],
                                  fold_y_val[:, 1],
                                  censor_surv='km')

                    sorted_fold_y_val = np.sort(np.unique(fold_y_val[:, 0]))
                    time_grid = np.linspace(sorted_fold_y_val[0],
Пример #2
0
def pycox_deep(filename, Y_train, Y_test, opt, choice):
    # choice = {'lr_rate': l, 'batch': b, 'decay': 0, 'weighted_decay': wd, 'net': net, 'index': index}
    X_train, X_test = enc_using_trained_ae(filename, TARGET=opt, ALPHA=0.01, N_ITER=100, L1R=-9999)
    path = './models/analysis/'
    check = 0
    savename = 'model_check_autoen_m5_test_batch+dropout+wd.csv'
    # r=root, d=directories, f = files
    for r, d, f in os.walk(path):
        for file in f:
            if savename in file:
                check = 1

    # X_train = X_train.drop('UR_SG3', axis=1)
    # X_test = X_test.drop('UR_SG3', axis=1)

    x_train = X_train
    x_test = X_test
    x_train['SVDTEPC_G'] = Y_train['SVDTEPC_G']
    x_train['PC_YN'] = Y_train['PC_YN']
    x_test['SVDTEPC_G'] = Y_test['SVDTEPC_G']
    x_test['PC_YN'] = Y_test['PC_YN']

    ## DataFrameMapper ##
    cols_standardize = list(X_train.columns)
    cols_standardize.remove('SVDTEPC_G')
    cols_standardize.remove('PC_YN')

    standardize = [(col, None) for col in cols_standardize]

    x_mapper = DataFrameMapper(standardize)

    _ = x_mapper.fit_transform(X_train).astype('float32')
    X_train = x_mapper.transform(X_train).astype('float32')
    X_test = x_mapper.transform(X_test).astype('float32')

    get_target = lambda df: (df['SVDTEPC_G'].values, df['PC_YN'].values)
    y_train = get_target(x_train)
    durations_test, events_test = get_target(x_test)
    in_features = X_train.shape[1]
    print(in_features)
    num_nodes = choice['nodes']
    out_features = 1
    batch_norm = True  # False for batch_normalization
    dropout = 0.01
    output_bias = False
    # net = choice['net']
    net = tt.practical.MLPVanilla(in_features, num_nodes, out_features, batch_norm, dropout, output_bias=output_bias)


    print("training")
    model = CoxPH(net, tt.optim.Adam)
    # lrfinder = model.lr_finder(X_train, y_train, batch_size)

    # lr_best = lrfinder.get_best_lr()
    lr_best = 0.0001
    model.optimizer.set_lr(choice['lr_rate'])

    weighted_decay = choice['weighted_decay']
    verbose = True

    batch_size = choice['batch']
    epochs = 100

    if weighted_decay == 0:
        callbacks = [tt.callbacks.EarlyStopping(patience=epochs)]
        # model.fit(X_train, y_train, batch_size, epochs, callbacks, verbose=verbose)
    else:
        callbacks = [tt.callbacks.DecoupledWeightDecay(weight_decay=choice['decay'])]
        # model.fit(X_train, y_train, batch_size, epochs, callbacks, verbose)

    ''''''
    # dataloader = model.make_dataloader(tt.tuplefy(X_train, y_train),batch_size,True)
    datas = tt.tuplefy(X_train, y_train).to_tensor()
    print(datas)
    make_dataset = tt.data.DatasetTuple;
    DataLoader = tt.data.DataLoaderBatch
    dataset = make_dataset(*datas)
    dataloader = DataLoader(dataset, batch_size, False, sampler=StratifiedSampler(datas, batch_size))
    # dataloader = DataLoader(dataset,batch_size, True)
    model.fit_dataloader(dataloader, epochs, callbacks, verbose)
    # model.fit(X_train, y_train, batch_size, epochs, callbacks, verbose)
    # model.partial_log_likelihood(*val).mean()

    print("predicting")
    baseline_hazards = model.compute_baseline_hazards(datas[0], datas[1])
    baseline_hazards = df(baseline_hazards)

    surv = model.predict_surv_df(X_test)
    surv = 1 - surv
    ev = EvalSurv(surv, durations_test, events_test, censor_surv='km')

    print("scoring")
    c_index = ev.concordance_td()
    print("c-index(", opt, "): ", c_index)

    if int(c_index * 10) == 0:
        hazardname = 'pycox_model_hazard_m5_v2_' + opt + '_0'
        netname = 'pycox_model_net_m5_v2_' + opt + '_0'
        weightname = 'pycox_model_weight_m5_v2_' + opt + '_0'
    else:
        hazardname = 'pycox_model_hazard_m5_' + opt + '_'
        netname = 'pycox_model_net_m5_' + opt + '_'
        weightname = 'pycox_model_weight_m5_' + opt + '_'

    baseline_hazards.to_csv('./test/'+hazardname + str(int(c_index * 100)) + '_' + str(index) + '.csv', index=False)
    netname = netname + str(int(c_index * 100)) + '_' + str(index) + '.sav'
    weightname = weightname + str(int(c_index * 100)) + '_' + str(index) + '.sav'
    model.save_net('./test/' + netname)
    model.save_model_weights('./test/' + weightname)

    pred = df(surv)
    pred = pred.transpose()
    surv_final = []
    pred_final = []

    for i in range(len(pred)):
        pred_final.append(float(1-pred[Y_test['SVDTEPC_G'][i]][i]))
        surv_final.append(float(pred[Y_test['SVDTEPC_G'][i]][i]))

    Y_test_cox = CoxformY(Y_test)
    #print(surv_final)
    c_cox, concordant, discordant,_,_ = concordance_index_censored(Y_test_cox['PC_YN'], Y_test_cox['SVDTEPC_G'], surv_final)
    c_cox_pred = concordance_index_censored(Y_test_cox['PC_YN'], Y_test_cox['SVDTEPC_G'], pred_final)[0]
    print("c-index(", opt, ") - sksurv: ", round(c_cox, 4))
    print("cox-concordant(", opt, ") - sksurv: ", concordant)
    print("cox-disconcordant(", opt, ") - sksurv: ", discordant)
    print("c-index_pred(", opt, ") - sksurv: ", round(c_cox_pred, 4))

    fpr, tpr, _ = metrics.roc_curve(Y_test['PC_YN'], pred_final)
    auc = metrics.auc(fpr, tpr)
    print("auc(", opt, "): ", round(auc, 4))

    if check == 1:
        model_check = pd.read_csv(path+savename)
    else:
        model_check = df(columns=['option', 'gender', 'c-td', 'c-index', 'auc'])
    line_append = {'option':str(choice), 'gender':opt, 'c-td':round(c_index,4), 'c-index':round(c_cox_pred,4), 'auc':round(auc,4)}
    model_check = model_check.append(line_append, ignore_index=True)
    model_check.to_csv(path+savename, index=False)

    del X_train
    del X_test

    return surv_final
Пример #3
0
def main():
    parser = setup_parser()
    args = parser.parse_args()

    if args.which_gpu != 'none':
        os.environ["CUDA_VISIBLE_DEVICES"] = args.which_gpu

    # save setting
    if not os.path.exists(os.path.join(args.save_path, args.model_name)):
        os.mkdir(os.path.join(args.save_path, args.model_name))

    # data reading seeting
    singnal_data_path = args.signal_dataset_path
    table_path = args.table_path
    time_col = 'SurvivalDays'
    event_col = 'Mortality'

    # dataset
    data_pathes, times, events = read_dataset(singnal_data_path, table_path,
                                              time_col, event_col,
                                              args.sample_ratio)

    data_pathes_train, data_pathes_test, times_train, times_test, events_train, events_test = train_test_split(
        data_pathes, times, events, test_size=0.3, random_state=369)
    data_pathes_train, data_pathes_val, times_train, times_val, events_train, events_val = train_test_split(
        data_pathes_train,
        times_train,
        events_train,
        test_size=0.2,
        random_state=369)

    labels_train = label_transfer(times_train, events_train)
    dataset_train = VsDatasetBatch(data_pathes_train, *labels_train)
    dl_train = tt.data.DataLoaderBatch(dataset_train,
                                       args.train_batch_size,
                                       shuffle=True)

    labels_val = label_transfer(times_val, events_val)
    dataset_val = VsDatasetBatch(data_pathes_val, *labels_val)
    dl_val = tt.data.DataLoaderBatch(dataset_val,
                                     args.train_batch_size,
                                     shuffle=True)

    labels_test = label_transfer(times_test, events_test)
    dataset_test_x = VsTestInput(data_pathes_test)
    dl_test_x = DataLoader(dataset_test_x, args.test_batch_size, shuffle=False)

    net = resnet18(args)
    model = CoxPH(
        net,
        tt.optim.Adam(lr=args.lr,
                      betas=(0.9, 0.999),
                      eps=1e-08,
                      weight_decay=5e-4,
                      amsgrad=False))
    # callbacks = [tt.cb.EarlyStopping(patience=15)]
    callbacks = [
        tt.cb.BestWeights(file_path=os.path.join(
            args.save_path, args.model_name, args.model_name + '_bestWeight'),
                          rm_file=False)
    ]
    verbose = True
    model_log = model.fit_dataloader(dl_train,
                                     args.epochs,
                                     callbacks,
                                     verbose,
                                     val_dataloader=dl_val)

    save_args(os.path.join(args.save_path, args.model_name), args)
    model_log.to_pandas().to_csv(os.path.join(args.save_path, args.model_name,
                                              'loss.csv'),
                                 index=False)

    _ = model.compute_baseline_hazards(
        get_vs_data(dataset_train), (dataset_train.time, dataset_train.event))
    model.save_net(
        path=os.path.join(args.save_path, args.model_name, args.model_name +
                          '_final'))
    surv = model.predict_surv_df(dl_test_x)
    surv.to_csv(os.path.join(args.save_path, args.model_name,
                             'test_sur_df.csv'),
                index=False)
    ev = EvalSurv(surv, np.array(labels_test[0]), np.array(labels_test[1]),
                  'km')
    print(ev.concordance_td())
    save_cindex(os.path.join(args.save_path, args.model_name),
                ev.concordance_td())
    print('done')