def get_predictions(config): net_name = config[0] hidden_activation, output_activation = activation_map[net_name] # Pull datasets b_predict_actions = "treatment_rnn" in net_name b_use_actions_only = "rnn_action_inputs_only" in net_name # Extract only relevant trajs and shift data training_processed = core.get_processed_data( training_data, b_predict_actions, b_use_actions_only, b_use_predicted_confounders, b_use_oracle_confounders, b_remove_x1) validation_processed = core.get_processed_data( validation_data, b_predict_actions, b_use_actions_only, b_use_predicted_confounders, b_use_oracle_confounders, b_remove_x1) num_features = training_processed['scaled_inputs'].shape[ -1] # 4 if not b_use_actions_only else 3 num_outputs = training_processed['scaled_outputs'].shape[ -1] # 1 if not b_predict_actions else 3 # 5 # Unpack remaining variables dropout_rate = config[1] memory_multiplier = config[2] / num_features num_epochs = config[3] minibatch_size = config[4] learning_rate = config[5] max_norm = config[6] model_folder = os.path.join(MODEL_ROOT, net_name) means, outputs, _, _ = test(training_processed, validation_processed, training_processed, tf_config, net_name, expt_name, dropout_rate, num_features, num_outputs, memory_multiplier, num_epochs, minibatch_size, learning_rate, max_norm, hidden_activation, output_activation, model_folder) return means, outputs
def rnn_fit(dataset_map, networks_to_train, MODEL_ROOT, b_use_predicted_confounders, b_use_oracle_confounders=False, b_remove_x1=False): logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO) # Get the correct networks to train if networks_to_train == "propensity_networks": logging.info("Training propensity networks") net_names = ['treatment_rnn_action_inputs_only', 'treatment_rnn'] elif networks_to_train == "encoder": logging.info("Training R-MSN encoder") net_names = ["rnn_propensity_weighted"] elif networks_to_train == "user_defined": logging.info("Training user defined network") raise NotImplementedError("Specify network to use!") else: raise ValueError("Unrecognised network type") logging.info("Running hyperparameter optimisation") # Experiment name expt_name = "treatment_effects" # Possible networks to use along with their activation functions activation_map = { 'rnn_propensity_weighted': ("elu", 'linear'), 'rnn_propensity_weighted_logistic': ("elu", 'linear'), 'rnn_model': ("elu", 'linear'), 'treatment_rnn': ("tanh", 'sigmoid'), 'treatment_rnn_action_inputs_only': ("tanh", 'sigmoid') } # Setup tensorflow tf_device = 'gpu' if tf_device == "cpu": config = tf.ConfigProto(log_device_placement=False, device_count={'GPU': 0}) else: config = tf.ConfigProto(log_device_placement=False, device_count={'GPU': 1}) config.gpu_options.allow_growth = True training_data = dataset_map['training_data'] validation_data = dataset_map['validation_data'] test_data = dataset_map['test_data'] # Start Running hyperparam opt opt_params = {} for net_name in net_names: # Re-run hyperparameter optimisation if parameters are not specified, otherwise train with defined params max_hyperparam_runs = 3 if net_name not in specifications else 1 # Pull datasets b_predict_actions = "treatment_rnn" in net_name use_truncated_bptt = net_name != "rnn_model_bptt" # whether to train with truncated backpropagation through time b_propensity_weight = "rnn_propensity_weighted" in net_name b_use_actions_only = "rnn_action_inputs_only" in net_name # Extract only relevant trajs and shift data training_processed = core.get_processed_data( training_data, b_predict_actions, b_use_actions_only, b_use_predicted_confounders, b_use_oracle_confounders, b_remove_x1) validation_processed = core.get_processed_data( validation_data, b_predict_actions, b_use_actions_only, b_use_predicted_confounders, b_use_oracle_confounders, b_remove_x1) test_processed = core.get_processed_data(test_data, b_predict_actions, b_use_actions_only, b_use_predicted_confounders, b_use_oracle_confounders, b_remove_x1) num_features = training_processed['scaled_inputs'].shape[-1] num_outputs = training_processed['scaled_outputs'].shape[-1] # Load propensity weights if they exist if b_propensity_weight: if net_name == 'rnn_propensity_weighted_den_only': # use un-stabilised IPTWs generated by propensity networks propensity_weights = np.load( os.path.join(MODEL_ROOT, "propensity_scores_den_only.npy")) elif net_name == "rnn_propensity_weighted_logistic": # Use logistic regression weights propensity_weights = np.load( os.path.join(MODEL_ROOT, "propensity_scores.npy")) tmp = np.load( os.path.join(MODEL_ROOT, "propensity_scores_logistic.npy")) propensity_weights = tmp[:propensity_weights.shape[0], :, :] else: # use stabilised IPTWs generated by propensity networks propensity_weights = np.load( os.path.join(MODEL_ROOT, "propensity_scores.npy")) logging.info("Net name = {}. Mean-adjusting!".format(net_name)) propensity_weights /= propensity_weights.mean() training_processed['propensity_weights'] = propensity_weights # Start hyperparamter optimisation hyperparam_count = 0 while True: if net_name not in specifications: dropout_rate = np.random.choice([0.1, 0.2, 0.3, 0.4, 0.5]) memory_multiplier = np.random.choice([0.5, 1, 2, 3, 4]) num_epochs = 100 minibatch_size = np.random.choice([64, 128, 256]) learning_rate = np.random.choice([0.01, 0.005, 0.001 ]) #([0.01, 0.001, 0.0001]) max_norm = np.random.choice([0.5, 1.0, 2.0, 4.0]) hidden_activation, output_activation = activation_map[net_name] else: spec = specifications[net_name] logging.info("Using specifications for {}: {}".format( net_name, spec)) dropout_rate = spec[0] memory_multiplier = spec[1] num_epochs = spec[2] minibatch_size = spec[3] learning_rate = spec[4] max_norm = spec[5] hidden_activation, output_activation = activation_map[net_name] model_folder = os.path.join(MODEL_ROOT, net_name) hyperparam_opt = train( net_name, expt_name, training_processed, validation_processed, test_processed, dropout_rate, memory_multiplier, num_epochs, minibatch_size, learning_rate, max_norm, use_truncated_bptt, num_features, num_outputs, model_folder, hidden_activation, output_activation, config, "hyperparam opt: {} of {}".format(hyperparam_count, max_hyperparam_runs)) hyperparam_count = len(hyperparam_opt.columns) if hyperparam_count >= max_hyperparam_runs: opt_params[net_name] = hyperparam_opt.T break logging.info("Done") logging.info(hyperparam_opt.T) # Flag optimal params logging.info(opt_params)
def rnn_test(dataset_map, MODEL_ROOT, b_use_predicted_confounders, b_use_oracle_confounders=False, b_remove_x1=False): logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO) # Setup tensorflow tf_device = 'gpu' if tf_device == "cpu": tf_config = tf.ConfigProto(log_device_placement=False, device_count={'GPU': 0}) else: tf_config = tf.ConfigProto(log_device_placement=False, device_count={'GPU': 1}) tf_config.gpu_options.allow_growth = True configs = [ load_optimal_parameters('rnn_propensity_weighted', expt_name, MODEL_ROOT, add_net_name=True) ] # Config activation_map = {'rnn_propensity_weighted': ("elu", 'linear'), 'rnn_propensity_weighted_binary': ("elu", 'linear'), 'rnn_propensity_weighted_logistic': ("elu", 'linear'), 'rnn_model': ("elu", 'linear'), 'treatment_rnn': ("tanh", 'sigmoid'), 'treatment_rnn_actions_only': ("tanh", 'sigmoid')} projection_map = {} mse_by_followup = {} for config in configs: net_name = config[0] projection_map[net_name] = {} training_data = dataset_map['training_data'] validation_data = dataset_map['validation_data'] test_data = dataset_map['test_data'] # scaling_data = pickle_map['scaling_data'] # use scaling data from above # Setup some params b_predict_actions = "treatment_rnn" in net_name b_propensity_weight = "rnn_propensity_weighted" in net_name b_use_actions_only = "treatment_rnn_action_inputs_only" in net_name # In[*]: Compute base MSEs # Extract only relevant trajs and shift data training_processed = core.get_processed_data(training_data, b_predict_actions, b_use_actions_only, b_use_predicted_confounders, b_use_oracle_confounders, b_remove_x1) validation_processed = core.get_processed_data(validation_data, b_predict_actions, b_use_actions_only, b_use_predicted_confounders, b_use_oracle_confounders, b_remove_x1) test_processed = core.get_processed_data(test_data, b_predict_actions, b_use_actions_only, b_use_predicted_confounders, b_use_oracle_confounders, b_remove_x1) num_features = training_processed['scaled_inputs'].shape[-1] # 4 if not b_use_actions_only else 3 num_outputs = training_processed['scaled_outputs'].shape[-1] # 1 if not b_predict_actions else 3 # 5 # Pull remaining params dropout_rate = config[1] memory_multiplier = config[2] / num_features num_epochs = config[3] minibatch_size = config[4] learning_rate = config[5] max_norm = config[6] backprop_length = 60 # we've fixed this hidden_activation = activation_map[net_name][0] output_activation = activation_map[net_name][1] # Run tests model_folder = os.path.join(MODEL_ROOT, net_name) means, output, mse, test_states \ = test(training_processed, validation_processed, test_processed, tf_config, net_name, expt_name, dropout_rate, num_features, num_outputs, memory_multiplier, num_epochs, minibatch_size, learning_rate, max_norm, hidden_activation, output_activation, model_folder, b_use_state_initialisation=False, b_dump_all_states=True) active_entries = test_processed['active_entries'] def get_mse_at_follow_up_time(mean, output, active_entires): mses = np.sum(np.sum((mean - output) ** 2 * active_entires, axis=-1), axis=0) \ / active_entires.sum(axis=0).sum(axis=-1) return pd.Series(mses, index=[idx for idx in range(len(mses))], name=net_name) mse = get_mse_at_follow_up_time(means, output, active_entries) projection_map[net_name] = mse mse.to_csv(os.path.join(MODEL_ROOT, "results_mse.csv")) return mse