Пример #1
0
    result1, result2 = np.zeros([num_Event, len(evalTime)
                                 ]), np.zeros([num_Event,
                                               len(evalTime)])

    for t, t_time in enumerate(evalTime):
        eval_horizon = int(t_time)
        if eval_horizon >= num_Category:
            print('ERROR: evaluation horizon is out of range')
            result1[:, t] = result2[:, t] = -1
        else:
            risk = pred[:, :, t]  #risk score until evalTime
            risk_br = pred_br[:, :, t]

            for k in range(num_Event):
                result1[k, t] = weighted_c_index(
                    tr_time, (tr_label[:, 0] == k + 1).astype(int), risk[:, k],
                    te_time, (te_label[:, 0] == k + 1).astype(int),
                    eval_horizon)  #-1 for no event (not comparable)
                result2[k, t] = weighted_brier_score(
                    tr_time, (tr_label[:, 0] == k + 1).astype(int),
                    risk_br[:, k], te_time, (te_label[:,
                                                      0] == k + 1).astype(int),
                    eval_horizon)  #-1 for no event (not comparable)

    WEIGHTED_C_INDEX[:, :, out_itr] = result1
    WEIGHTED_BRIER_SCORE[:, :, out_itr] = result2

    ### SAVE RESULTS
    row_header = []
    for t in range(num_Event):
        row_header.append('Event_' + str(t + 1))
Пример #2
0
def get_valid_performance(in_parser,
                          out_itr,
                          evalTime=None,
                          MAX_VALUE=-99,
                          OUT_ITERATION=5):
    """ Trains the Marginal DeepPseudo model and give the validation C-index performance for random search.

    Arguments:
        - in_parser: dictionary of hyperparameters
        - out_itr: indicator of set of 5-fold cross validation datasets
        - evalTime: None or a list(e.g. [12, 60]). Evaluation times at which the validation performance is measured
        - MAX_VALUE: maximum validation value
        - OUT_ITERATION: Total number of the set of cross-validation data

    Returns:
        - the validation performance of the trained network
        - save the trained network in the folder directed by "in_parser['out_path'] + '/itr_' + str(out_itr)"
    """

    ## Define a list of continuous columns from the covariates
    continuous_columns = [
        'feature1', 'feature2', 'feature3', 'feature4', 'feature5', 'feature6',
        'feature7', 'feature8', 'feature9', 'feature10', 'feature11',
        'feature12'
    ]
    ## If there are categorical variables in the covariates, define a list of the categorical variables

    ## Import the attributes
    tr_data, tr_time, tr_label, y_train, va_data, va_time, va_label, y_val, te_data, te_time, te_label, y_test, num_Category, num_Event, num_evalTime, x_dim = import_data(
        out_itr,
        evalTime,
        categorical_columns=None,
        continuous_columns=continuous_columns)
    y_train1 = y_train[:, 0, :]  #pseudo values for CIF for cause 1
    y_train2 = y_train[:, 1, :]  #pseudo values for CIF for cause 2

    ## Hyper-parameters
    ACTIVATION_FN = {
        'selu': tf.nn.selu,
        'elu': tf.nn.elu,
        'tanh': tf.nn.tanh,
        'relu': tf.nn.relu
    }
    mb_size = in_parser['mb_size']
    iteration = in_parser['iteration']
    keep_prob = in_parser['keep_prob']
    lr_train = in_parser['lr_train']
    initial_W = tf.contrib.layers.xavier_initializer()

    ## Make Dictionaries
    # Input Dimensions
    input_dims = {
        'x_dim': x_dim,
        'num_Event': num_Event,
        'num_Category': num_Category,
        'num_evalTime': len(evalTime)
    }

    # NETWORK HYPER-PARMETERS
    network_settings = {
        'num_units_shared': in_parser['num_units_shared'],
        'num_layers_shared': in_parser['num_layers_shared'],
        'num_units_CS': in_parser['num_units_CS'],
        'num_layers_CS': in_parser['num_layers_CS'],
        'activation_fn': ACTIVATION_FN[in_parser['activation_fn']],
        'initial_W': initial_W
    }

    file_path_final = in_parser['out_path'] + '/itr_' + str(out_itr)

    #change parameters...
    if not os.path.exists(file_path_final + '/models/'):
        os.makedirs(file_path_final + '/models/')

    ## Use GPU
    tf.reset_default_graph()
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)

    ## Call the Marginal DeepPseudo Model
    model = CS_Marginal_DeepPseudo_Model(sess, "CS_Marginal_DeepPseudo",
                                         input_dims, network_settings)
    saver = tf.train.Saver()
    sess.run(tf.global_variables_initializer())

    max_valid = -99
    stop_flag = 0

    ### Training - Main
    print("MAIN TRAINING ...")
    print("EVALUATION TIMES: " + str(evalTime))

    avg_loss = 0
    for itr in range(iteration):
        if stop_flag > 10:  #for faster early stopping
            break
        else:
            x_mb, y1_mb, y2_mb = f_get_minibatch(
                mb_size, tr_data, y_train1, y_train2)  #get the minibatches
            DATA = (x_mb, y1_mb, y2_mb)
            _, loss_curr = model.train(DATA, keep_prob,
                                       lr_train)  #train the model
            avg_loss += loss_curr / 1000

            if (itr + 1) % 1000 == 0:
                print(
                    '|| ITR: ' + str('%04d' % (itr + 1)) + ' | Loss: ' +
                    colored(str('%.4f' %
                                (avg_loss)), 'yellow', attrs=['bold']))
                avg_loss = 0

            ### Validation based on the average C-index
            if (itr + 1) % 1000 == 0:

                ### Prediction for validation data
                pred = model.predict(va_data)

                ### Evaluation on validation data
                val_result = np.zeros([num_Event, len(evalTime)])

                for t, t_time in enumerate(evalTime):
                    eval_horizon = int(t_time)
                    if eval_horizon >= num_Category:
                        print('ERROR: evaluation horizon is out of range')
                        val_result[:, t] = -1
                    else:
                        risk = pred[:, :, t]  #risk score until evalTime
                        for k in range(num_Event):
                            val_result[k, t] = weighted_c_index(
                                tr_time, (tr_label[:, 0] == k + 1).astype(int),
                                risk[:, k], va_time,
                                (va_label[:, 0] == k + 1).astype(int),
                                eval_horizon
                            )  #weighted c-index calculation for validation data

                tmp_valid = np.mean(val_result)  #average weighted C-index

                if tmp_valid > max_valid:
                    stop_flag = 0
                    max_valid = tmp_valid
                    print('updated.... average c-index = ' + str('%.4f' %
                                                                 (tmp_valid)))

                    if max_valid > MAX_VALUE:
                        saver.save(
                            sess, file_path_final + '/models/model_itr_' +
                            str(out_itr))
                else:
                    stop_flag += 1

    return max_valid
Пример #3
0
def get_valid_performance(DATA,
                          MASK,
                          in_parser,
                          out_itr,
                          eval_time=None,
                          MAX_VALUE=-99,
                          OUT_ITERATION=5,
                          seed=1234):
    ##### DATA & MASK
    (data, time, label) = DATA
    (mask1, mask2) = MASK

    x_dim = np.shape(data)[1]
    _, num_Event, num_Category = np.shape(
        mask1)  # dim of mask1: [subj, Num_Event, Num_Category]

    ACTIVATION_FN = {'relu': tf.nn.relu, 'elu': tf.nn.elu, 'tanh': tf.nn.tanh}

    ##### HYPER-PARAMETERS
    mb_size = in_parser['mb_size']

    iteration = in_parser['iteration']

    keep_prob = in_parser['keep_prob']
    lr_train = in_parser['lr_train']

    alpha = in_parser['alpha']  #for log-likelihood loss
    beta = in_parser['beta']  #for ranking loss
    gamma = in_parser['gamma']  #for RNN-prediction loss
    parameter_name = 'a' + str('%02.0f' % (10 * alpha)) + 'b' + str(
        '%02.0f' % (10 * beta)) + 'c' + str('%02.0f' % (10 * gamma))

    initial_W = tf.contrib.layers.xavier_initializer()

    ##### MAKE DICTIONARIES
    # INPUT DIMENSIONS
    input_dims = {
        'x_dim': x_dim,
        'num_Event': num_Event,
        'num_Category': num_Category
    }

    # NETWORK HYPER-PARMETERS
    network_settings = {
        'h_dim_shared': in_parser['h_dim_shared'],
        'num_layers_shared': in_parser['num_layers_shared'],
        'h_dim_CS': in_parser['h_dim_CS'],
        'num_layers_CS': in_parser['num_layers_CS'],
        'active_fn': ACTIVATION_FN[in_parser['active_fn']],
        'initial_W': initial_W
    }

    file_path_final = in_parser['out_path'] + '/itr_' + str(out_itr)

    #change parameters...
    if not os.path.exists(file_path_final + '/models/'):
        os.makedirs(file_path_final + '/models/')

    print(file_path_final + ' (a:' + str(alpha) + ' b:' + str(beta) + ' c:' +
          str(gamma) + ')')

    ##### CREATE DEEPFHT NETWORK
    tf.reset_default_graph()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)

    model = Model_DeepHit(sess, "DeepHit", input_dims, network_settings)
    saver = tf.train.Saver()

    sess.run(tf.global_variables_initializer())

    ### TRAINING-TESTING SPLIT
    (tr_data, te_data, tr_time, te_time, tr_label, te_label, tr_mask1,
     te_mask1, tr_mask2, te_mask2) = train_test_split(data,
                                                      time,
                                                      label,
                                                      mask1,
                                                      mask2,
                                                      test_size=0.20,
                                                      random_state=seed)

    (tr_data, va_data, tr_time, va_time, tr_label, va_label, tr_mask1,
     va_mask1, tr_mask2, va_mask2) = train_test_split(tr_data,
                                                      tr_time,
                                                      tr_label,
                                                      tr_mask1,
                                                      tr_mask2,
                                                      test_size=0.20,
                                                      random_state=seed)

    max_valid = -99
    stop_flag = 0

    if eval_time is None:
        eval_time = [
            int(np.percentile(tr_time, 25)),
            int(np.percentile(tr_time, 50)),
            int(np.percentile(tr_time, 75))
        ]

    ### TRAINING - MAIN
    print("MAIN TRAINING ...")
    print("EVALUATION TIMES: " + str(eval_time))

    avg_loss = 0
    for itr in range(iteration):
        if stop_flag > 5:  #for faster early stopping
            break
        else:
            x_mb, k_mb, t_mb, m1_mb, m2_mb = f_get_minibatch(
                mb_size, tr_data, tr_label, tr_time, tr_mask1, tr_mask2)
            DATA = (x_mb, k_mb, t_mb)
            MASK = (m1_mb, m2_mb)
            PARAMETERS = (alpha, beta, gamma)
            _, loss_curr = model.train(DATA, MASK, PARAMETERS, keep_prob,
                                       lr_train)
            avg_loss += loss_curr / 1000

            if (itr + 1) % 1000 == 0:
                print(
                    '|| ITR: ' + str('%04d' % (itr + 1)) + ' | Loss: ' +
                    colored(str('%.4f' %
                                (avg_loss)), 'yellow', attrs=['bold']))
                avg_loss = 0

            ### VALIDATION  (based on average C-index of our interest)
            if (itr + 1) % 1000 == 0:
                ### PREDICTION
                pred = model.predict(va_data)

                ### EVALUATION
                va_result1 = np.zeros([num_Event, len(eval_time)])

                for t, t_time in enumerate(eval_time):
                    eval_horizon = int(t_time)

                    if eval_horizon >= num_Category:
                        print('ERROR: evaluation horizon is out of range')
                        va_result1[:, t] = va_result2[:, t] = -1
                    else:
                        risk = np.sum(pred[:, :, :(eval_horizon + 1)],
                                      axis=2)  #risk score until eval_time
                        for k in range(num_Event):
                            # va_result1[k, t] = c_index(risk[:,k], va_time, (va_label[:,0] == k+1).astype(int), eval_horizon) #-1 for no event (not comparable)
                            va_result1[k, t] = weighted_c_index(
                                tr_time, (tr_label[:, 0] == k + 1).astype(int),
                                risk[:, k], va_time,
                                (va_label[:, 0] == k + 1).astype(int),
                                eval_horizon)
                tmp_valid = np.mean(va_result1)

                if tmp_valid > max_valid:
                    stop_flag = 0
                    max_valid = tmp_valid
                    print('updated.... average c-index = ' + str('%.4f' %
                                                                 (tmp_valid)))

                    if max_valid > MAX_VALUE:
                        saver.save(
                            sess, file_path_final + '/models/model_itr_' +
                            str(out_itr))
                else:
                    stop_flag += 1

    return max_valid