示例#1
0
def train(outdir):
    HAVE_TRUTH = False

    ''' Set up paths and start log '''
    npzfile = outdir+'result'
    repfile = outdir+'reps'
    outform = outdir+'y_pred'
    lossform = outdir+'loss'
    logfile = outdir+'log.txt'
    f = open(logfile,'w')
    f.close()

    ''' Set random seeds '''
    random.seed(FLAGS.seed)
    tf.set_random_seed(FLAGS.seed)
    np.random.seed(FLAGS.seed)

    ''' Save parameters '''
    save_config(outdir+'config.txt')
    log(logfile, 'Training with hyperparameters: alpha=%.2e, lambda=%.2e' % (FLAGS.p_alpha,FLAGS.p_lambda))

    ''' Load data '''
    log(logfile, 'Loading data for dimensions... '+FLAGS.datapath)
    x_all, t_all, y_f_all, y_cf_all = load_data(FLAGS.datapath)
    if not y_cf_all is None:
        HAVE_TRUTH = True
    dim = x_all.shape[1]
    n = x_all.shape[0]

    log(logfile, 'Loaded data with shape [%d,%d]' % (n,dim))

    ''' Start Session '''
    log(logfile, 'Starting session...')
    sess = tf.Session()

    ''' Initialize input placeholders '''
    x_  = tf.placeholder("float", shape=[None,dim], name='x_') # Features
    t_  = tf.placeholder("float", shape=[None,1], name='t_')   # Treatent
    y_ = tf.placeholder("float", shape=[None,1], name='y_')  # Outcome

    ''' Parameter placeholders '''
    alpha_ = tf.placeholder("float", name='alpha_')
    lambda_ = tf.placeholder("float", name='lambda_')
    do_in = tf.placeholder("float", name='dropout_in')
    do_out = tf.placeholder("float", name='dropout_out')
    p = tf.placeholder("float", name='p_treated')

    ''' Define model graph '''
    log(logfile, 'Defining graph...')
    dims = [dim,FLAGS.dim_in,FLAGS.dim_out]
    CFR = cfr.cfr_net(x_, t_, y_, p, FLAGS, alpha_, lambda_, do_in, do_out, dims)

    if FLAGS.varsel:
        w_proj = tf.placeholder("float", shape=[dim], name='w_proj')
        projection = CFR.weights_in[0].assign(w_proj)

    ''' Set up optimizer '''
    log(logfile, 'Training...')
    global_step = tf.Variable(0, trainable=False)
    lr = tf.train.exponential_decay(FLAGS.lrate, global_step, \
        NUM_ITERATIONS_PER_DECAY, FLAGS.lrate_decay, staircase=True)
    train_step = tf.train.RMSPropOptimizer(lr, FLAGS.decay).minimize(CFR.tot_loss,global_step=global_step)

    ''' Compute treatment probability'''
    t_cf_all = 1-t_all
    if FLAGS.use_p_correction:
        p_treated = np.mean(t_all)
    else:
        p_treated = 0.5

    ''' Set up loss feed_dicts'''
    dict_factual = {x_: x_all, t_: t_all, y_: y_f_all, \
        do_in:1.0, do_out:1.0, alpha_:FLAGS.p_alpha, \
        lambda_:FLAGS.p_lambda, p:p_treated}

    if HAVE_TRUTH:
        dict_cfactual = {x_: x_all, t_: t_cf_all, y_: y_cf_all, \
            do_in:1.0, do_out:1.0}

    ''' Initialize tensorflow variables '''
    sess.run(tf.initialize_all_variables())

    ''' Compute losses before training'''
    losses = []
    obj_loss, f_error, imb_err = sess.run([CFR.tot_loss, CFR.pred_loss, \
        CFR.imb_loss], feed_dict=dict_factual)

    cf_error = np.nan
    if HAVE_TRUTH:
        cf_error = sess.run(CFR.pred_loss, feed_dict=dict_cfactual)

    losses.append([obj_loss, f_error, cf_error, imb_err])

    log(logfile, 'Objective Factual CFactual Imbalance')
    log(logfile, str(losses[0]))

    ''' Train for m iterations '''
    for i in range(FLAGS.iterations):

        ''' Fetch sample '''
        I = random.sample(range(1, n), FLAGS.batch_size)
        x_batch = x_all[I,:]
        t_batch = t_all[I]
        y_batch = y_f_all[I]

        ''' Do one step of gradient descent '''
        sess.run(train_step, feed_dict={x_: x_batch, t_: t_batch, \
            y_: y_batch, do_in:FLAGS.dropout_in, do_out:FLAGS.dropout_out, \
            alpha_:FLAGS.p_alpha, lambda_:FLAGS.p_lambda, p:p_treated})

        ''' Project variable selection weights '''
        if FLAGS.varsel:
            wip = cfr.simplex_project(sess.run(CFR.weights_in[0]), 1)
            sess.run(projection,feed_dict={w_proj: wip})

        ''' Compute loss every N iterations '''
        if i % FLAGS.output_delay == 0:
            obj_loss,f_error,imb_err = sess.run([CFR.tot_loss, CFR.pred_loss, CFR.imb_loss],
                feed_dict=dict_factual)

            y_pred = sess.run(CFR.output, feed_dict={x_: x_batch, t_: t_batch, \
                y_: y_batch, do_in:FLAGS.dropout_in, do_out:FLAGS.dropout_out, \
                alpha_:FLAGS.p_alpha, lambda_:FLAGS.p_lambda, p:p_treated})

            cf_error = np.nan
            if HAVE_TRUTH:
                cf_error = sess.run(CFR.pred_loss, feed_dict=dict_cfactual)

            losses.append([obj_loss, f_error, cf_error, imb_err])
            loss_str = str(i) + '\tObj: %.4g,\tF: %.4g,\tCf: %.4g,\tImb: %.4g' % (obj_loss, f_error, cf_error, imb_err)

            if FLAGS.loss == 'log':
                y_pred = 1.0*(y_pred>0.5)
                acc = 100*(1-np.mean(np.abs(y_batch-y_pred)))
                loss_str += ',\tAcc: %.2f%%' % acc

            log(logfile, loss_str)

    log(logfile, 'Ending learning rate: %.2g' % sess.run(lr))

    ''' Predict response and store '''
    ypred_f = sess.run(CFR.output, feed_dict={x_: x_all, t_: t_all, \
        do_in:1.0, do_out:1.0, alpha_:FLAGS.p_alpha, lambda_:FLAGS.p_lambda})
    ypred_c = sess.run(CFR.output, feed_dict={x_: x_all, t_: t_cf_all, \
        do_in:1.0, do_out:1.0, alpha_:FLAGS.p_alpha, lambda_:FLAGS.p_lambda})

    ypred = np.concatenate((ypred_f,ypred_c),axis=1)

    log(logfile, 'Saving to %s...' % outform)
    if FLAGS.output_csv:
        np.savetxt('%s.csv' % (outform), ypred, delimiter=',')
        np.savetxt('%s.csv' % (lossform), losses, delimiter=',')

    ''' Compute weights'''
    if FLAGS.varsel:
        all_weights = np.dstack((all_weights,sess.run(CFR.weights_in[0])))
        all_beta = np.dstack((all_beta,sess.run(CFR.weights_pred)))

    ''' Save results and predictions '''
    if FLAGS.varsel:
        np.savez(npzfile, pred=ypred, loss=losses, w=all_weights, beta=all_beta)
    else:
        np.savez(npzfile, pred=ypred, loss=losses)

    ''' Save representations '''
    if FLAGS.save_rep:
        reps = sess.run([CFR.h_rep], feed_dict={x_: x_all, do_in:1.0, do_out:0.0})
        np.savez(repfile, rep=reps )
示例#2
0
def train(outdir):
    HAVE_TRUTH = False
    ''' Set up paths and start log '''
    npzfile = outdir + 'result'
    repfile = outdir + 'reps'
    outform = outdir + 'y_pred'
    lossform = outdir + 'loss'
    logfile = outdir + 'log.txt'
    metric_file = outdir + 'metrics.txt'
    f = open(logfile, 'w')
    f.close()
    ''' Set random seeds '''
    random.seed(FLAGS.seed)
    tf.set_random_seed(FLAGS.seed)
    np.random.seed(FLAGS.seed)
    ''' Save parameters '''
    save_config(outdir + 'config.txt')
    log(
        logfile, 'Training with hyperparameters: alpha=%.2e, lambda=%.2e' %
        (FLAGS.p_alpha, FLAGS.p_lambda))
    ''' Load data '''
    log(logfile, 'Loading data for dimensions... ' + FLAGS.datapath)
    x_all, t_all, y_f_all, y_cf_all = load_data(FLAGS.datapath)
    if not y_cf_all is None:
        HAVE_TRUTH = True
    dim = x_all.shape[1]
    n = x_all.shape[0]

    log(logfile, 'Loaded data with shape [%d,%d]' % (n, dim))
    ''' Start Session '''
    log(logfile, 'Starting session...')
    sess = tf.Session()
    ''' Initialize input placeholders '''
    x_ = tf.placeholder("float", shape=[None, dim], name='x_')  # Features
    t_ = tf.placeholder("float", shape=[None, 1], name='t_')  # Treatent
    y_ = tf.placeholder("float", shape=[None, 1], name='y_')  # Outcome
    ''' Parameter placeholders '''
    alpha_ = tf.placeholder("float", name='alpha_')
    lambda_ = tf.placeholder("float", name='lambda_')
    do_in = tf.placeholder("float", name='dropout_in')
    do_out = tf.placeholder("float", name='dropout_out')
    p = tf.placeholder("float", name='p_treated')
    ''' Define model graph '''
    log(logfile, 'Defining graph...')
    dims = [dim, FLAGS.dim_in, FLAGS.dim_out]
    CFR = cfr.cfr_net(x_, t_, y_, p, FLAGS, alpha_, lambda_, do_in, do_out,
                      dims)

    if FLAGS.varsel:
        w_proj = tf.placeholder("float", shape=[dim], name='w_proj')
        projection = CFR.weights_in[0].assign(w_proj)
    ''' Set up optimizer '''
    log(logfile, 'Training...')
    global_step = tf.Variable(0, trainable=False)
    lr = tf.train.exponential_decay(FLAGS.lrate, global_step, \
        NUM_ITERATIONS_PER_DECAY, FLAGS.lrate_decay, staircase=True)
    train_step = tf.train.RMSPropOptimizer(lr, FLAGS.decay).minimize(
        CFR.tot_loss, global_step=global_step)
    ''' Compute treatment probability'''
    t_cf_all = 1 - t_all
    if FLAGS.use_p_correction:
        p_treated = np.mean(t_all)
    else:
        p_treated = 0.5
    ''' Set up loss feed_dicts'''
    dict_factual = {x_: x_all, t_: t_all, y_: y_f_all, \
        do_in:1.0, do_out:1.0, alpha_:FLAGS.p_alpha, \
        lambda_:FLAGS.p_lambda, p:p_treated}

    if HAVE_TRUTH:
        dict_cfactual = {x_: x_all, t_: t_cf_all, y_: y_cf_all, \
            do_in:1.0, do_out:1.0}
    ''' Initialize tensorflow variables '''
    sess.run(tf.global_variables_initializer())
    ''' Compute losses before training'''
    losses = []
    obj_loss, f_error, imb_err = sess.run([CFR.tot_loss, CFR.pred_loss, \
        CFR.imb_loss], feed_dict=dict_factual)

    cf_error = np.nan
    if HAVE_TRUTH:
        cf_error = sess.run(CFR.pred_loss, feed_dict=dict_cfactual)

    losses.append([obj_loss, f_error, cf_error, imb_err])

    log(logfile, 'Objective Factual CFactual Imbalance')
    log(logfile, str(losses[0]))
    ''' Train for m iterations '''
    for i in range(FLAGS.iterations):
        ''' Fetch sample '''
        I = random.sample(range(0, n), FLAGS.batch_size)
        x_batch = x_all[I, :]
        t_batch = t_all[I]
        y_batch = y_f_all[I]
        ''' Do one step of gradient descent '''
        sess.run(train_step, feed_dict={x_: x_batch, t_: t_batch, \
            y_: y_batch, do_in:FLAGS.dropout_in, do_out:FLAGS.dropout_out, \
            alpha_:FLAGS.p_alpha, lambda_:FLAGS.p_lambda, p:p_treated})
        ''' Project variable selection weights '''
        if FLAGS.varsel:
            wip = cfr.simplex_project(sess.run(CFR.weights_in[0]), 1)
            sess.run(projection, feed_dict={w_proj: wip})
        ''' Compute loss every N iterations '''
        if i % FLAGS.output_delay == 0:
            obj_loss, f_error, imb_err = sess.run(
                [CFR.tot_loss, CFR.pred_loss, CFR.imb_loss],
                feed_dict=dict_factual)

            y_pred = sess.run(CFR.output, feed_dict={x_: x_batch, t_: t_batch, \
                y_: y_batch, do_in:FLAGS.dropout_in, do_out:FLAGS.dropout_out, \
                alpha_:FLAGS.p_alpha, lambda_:FLAGS.p_lambda, p:p_treated})

            cf_error = np.nan
            if HAVE_TRUTH:
                cf_error = sess.run(CFR.pred_loss, feed_dict=dict_cfactual)

            losses.append([obj_loss, f_error, cf_error, imb_err])
            loss_str = str(
                i) + '\tObj: %.4g,\tF: %.4g,\tCf: %.4g,\tImb: %.4g' % (
                    obj_loss, f_error, cf_error, imb_err)

            if FLAGS.loss == 'log':
                y_pred = 1.0 * (y_pred > 0.5)
                acc = 100 * (1 - np.mean(np.abs(y_batch - y_pred)))
                loss_str += ',\tAcc: %.2f%%' % acc

            log(logfile, loss_str)

    log(logfile, 'Ending learning rate: %.2g' % sess.run(lr))
    ''' Predict response and store '''
    ypred_f = sess.run(CFR.output, feed_dict={x_: x_all, t_: t_all, \
        do_in:1.0, do_out:1.0, alpha_:FLAGS.p_alpha, lambda_:FLAGS.p_lambda})
    ypred_c = sess.run(CFR.output, feed_dict={x_: x_all, t_: t_cf_all, \
        do_in:1.0, do_out:1.0, alpha_:FLAGS.p_alpha, lambda_:FLAGS.p_lambda})

    ypred = np.concatenate((ypred_f, ypred_c), axis=1)

    log(logfile, 'Saving to %s...' % outform)
    if FLAGS.output_csv:
        np.savetxt('%s.csv' % (outform), ypred, delimiter=',')
        np.savetxt('%s.csv' % (lossform), losses, delimiter=',')
    ''' Compute weights'''
    if FLAGS.varsel:
        all_weights = np.dstack((all_weights, sess.run(CFR.weights_in[0])))
        all_beta = np.dstack((all_beta, sess.run(CFR.weights_pred)))
    ''' Save results and predictions '''
    if FLAGS.varsel:
        np.savez(npzfile,
                 pred=ypred,
                 loss=losses,
                 w=all_weights,
                 beta=all_beta)
    else:
        np.savez(npzfile, pred=ypred, loss=losses)
    ''' Save representations '''
    if FLAGS.save_rep:
        reps = sess.run([CFR.h_rep],
                        feed_dict={
                            x_: x_all,
                            do_in: 1.0,
                            do_out: 0.0
                        })
        np.savez(repfile, rep=reps)

    # calculate rmse and pehe
    df = pd.read_csv('data/ihdp_sample.csv', header=None)
    df['treatment_effect'] = np.where(df[0] == 1, df[1] - df[2], df[2] - df[1])
    pred_df = pd.read_csv('{}.csv'.format(outform), header=None)
    pred_df['condition'] = df[0]
    pred_df['treatment_effect'] = np.where(pred_df['condition'] == 1,
                                           pred_df[0] - pred_df[1],
                                           pred_df[1] - pred_df[0])
    pehe = np.sqrt(
        mean_squared_error(pred_df['treatment_effect'].values,
                           df['treatment_effect'].values))
    pred_df['ite_for_rmse'] = np.where(pred_df['condition'] == 1,
                                       df[1] - pred_df[1], pred_df[1] - df[1])
    rmse = np.sqrt(
        mean_squared_error(pred_df['ite_for_rmse'].values,
                           df['treatment_effect'].values))
    print('\nPEHE = {}\nRMSE = {}\n'.format(pehe, rmse))
    with open(metric_file, 'w') as f:
        f.write('PEHE = {}\n'.format(pehe))
        f.write('rmse = {}'.format(rmse))
示例#3
0
文件: CFR.py 项目: kiminh/BV-NICE
results = np.zeros([epoch, 3, 5])
epoch_id = 0
''' Train for m iterations '''
for i in range(FLAGS.iterations):
    ''' Fetch sample '''
    I = random.sample(range(1, n), FLAGS.batch_size)
    x_batch = X[I, :]
    t_batch = T[I]
    y_batch = Y[I]
    ''' Do one step of gradient descent '''
    sess.run(train_step, feed_dict={x_: x_batch, t_: t_batch, \
        y_: y_batch, do_in:FLAGS.dropout_in, do_out:FLAGS.dropout_out, \
        alpha_:FLAGS.p_alpha, lambda_:FLAGS.p_lambda, p:p_treated})
    ''' Project variable selection weights '''
    if FLAGS.varsel:
        wip = cfr.simplex_project(sess.run(CFR.weights_in[0]), 1)
        sess.run(projection, feed_dict={w_proj: wip})
    ''' Compute loss every N iterations '''
    if i % FLAGS.output_delay == 0:
        obj_loss, f_error, imb_err = sess.run(
            [CFR.tot_loss, CFR.pred_loss, CFR.imb_loss],
            feed_dict=dict_factual)

        y_pred = sess.run(CFR.output, feed_dict={x_: x_batch, t_: t_batch, \
            y_: y_batch, do_in:FLAGS.dropout_in, do_out:FLAGS.dropout_out, \
            alpha_:FLAGS.p_alpha, lambda_:FLAGS.p_lambda, p:p_treated})

        cf_error = np.nan
        if HAVE_TRUTH:
            cf_error = sess.run(CFR.pred_loss, feed_dict=dict_cfactual)