def main(): eps = float(sys.argv[1]) seed = int(sys.argv[2]) model_fname = os.system( "ls ./train_models/ | grep {} | grep {} >> model_fnames.txt".format( eps, seed)) model_fnames_file = open("model_fnames.txt", "r") model_fnames = model_fnames_file.readlines() model_fnames_file.close() model_fname = model_fnames[0][:-1] os.system("rm model_fnames.txt") train_models = pd.read_pickle('./train_models/{}'.format(model_fname)) for i_rep, train_model in enumerate(train_models): train_variable_types = { key: train_variable_types_base[key] for key in train_model.param_dims.keys() } print(i_rep) syn_data = fast_sample(train_model, train_variable_types, N) female_syn_data = syn_data[syn_data["is.female"] == 1] male_syn_data = syn_data[syn_data["is.female"] == 0] female_syn_decoded = decode_data(female_syn_data, maps, for_poisson=False) male_syn_decoded = decode_data(male_syn_data, maps, for_poisson=False) female_syn_decoded.to_csv('./syn_data/female_data_{}_{}_{}.csv'.format( seed, np.round(eps, 2), i_rep), index=False) male_syn_decoded.to_csv('./syn_data/male_data_{}_{}_{}.csv'.format( seed, np.round(eps, 2), i_rep), index=False)
def main(): eps = float(sys.argv[1]) seed = int(sys.argv[2]) model_fname = os.system( "ls ./male_models/ | grep {} | grep {} >> model_fnames.txt".format( eps, seed)) model_fnames_file = open("model_fnames.txt", "r") model_fnames = model_fnames_file.readlines() model_fnames_file.close() model_fname = [fname for fname in model_fnames][0][:-1] print(model_fname) os.system("rm model_fnames.txt") male_models = pd.read_pickle('./male_models/{}'.format(model_fname)) for i_rep, male_model in enumerate(male_models): male_variable_types = { key: male_variable_types_[key] for key in male_model.param_dims.keys() } print(i_rep) male_syn_data = fast_sample(male_model, male_variable_types, N_male) #male_syn_data[male_syn_data["ep"] == 0]["lex.dur"] = 1.0 male_syn_decoded = decode_data(male_syn_data, maps, for_poisson=False) male_syn_decoded.to_csv('./syn_data/male_data_{}_{}_{}.csv'.format( seed, np.round(eps, 2), i_rep), index=False)
def main(): eps = float(sys.argv[1]) seed = int(sys.argv[2]) model_fname = os.system("ls ./female_models/ | grep {} | grep {} >> model_fnames.txt".format(eps, seed)) model_fnames_file = open("model_fnames.txt", "r") model_fnames = model_fnames_file.readlines() model_fnames_file.close() alive_model_fname = [fname for fname in model_fnames if 'alive' in fname][0][:-1] dead_model_fname = [fname for fname in model_fnames if 'dead' in fname][0][:-1] os.system("rm model_fnames.txt") alive_female_models = pd.read_pickle('./female_models/{}'.format(alive_model_fname)) dead_female_models = pd.read_pickle('./female_models/{}'.format(dead_model_fname)) for i_rep, (alive_female_model, dead_female_model) in enumerate(zip(alive_female_models, dead_female_models)): alive_female_variable_types = {key : female_variable_types[key] for \ key in alive_female_model.param_dims.keys()} dead_female_variable_types = {key : female_variable_types[key] for key in dead_female_model.param_dims.keys()} print(i_rep) noisy_dead_proportion = (N_female_dead+np.random.laplace(scale=(1./0.01)))/N_female N_syn_female_alive = int((1.-noisy_dead_proportion)*N_female) N_syn_female_dead = int(noisy_dead_proportion*N_female) alive_female_syn_data = fast_sample(alive_female_model, alive_female_variable_types, N_syn_female_alive) dead_female_syn_data = fast_sample(dead_female_model, dead_female_variable_types, N_syn_female_dead) alive_female_syn_data['ep'] = 0 alive_female_syn_data['lex.dur'] = 1.0 female_syn_data = pd.concat([alive_female_syn_data, dead_female_syn_data]) female_syn_decoded = decode_data(female_syn_data, maps, for_poisson=False) female_syn_decoded.to_csv('./syn_data/female_data_{}_{}_{}.csv'.format(seed, np.round(eps, 2), i_rep), index=False)
def main(): eps = float(sys.argv[1]) os.system("python3 join_models.py {}".format(eps)) alive_female_models = pd.read_pickle( './female_models/alive_female_models_{}.p'.format(np.round(eps, 2))) dead_female_models = pd.read_pickle( './female_models/dead_female_models_{}.p'.format(np.round(eps, 2))) for i_rep, (alive_female_model, dead_female_model) in enumerate( zip(alive_female_models, dead_female_models)): alive_female_variable_types = {key : female_variable_types[key] for \ key in alive_female_model.param_dims.keys()} dead_female_variable_types = { key: female_variable_types[key] for key in dead_female_model.param_dims.keys() } print(i_rep) alive_female_syn_data = fast_sample(alive_female_model, alive_female_variable_types, int(208148 * 0.8)) dead_female_syn_data = fast_sample(dead_female_model, dead_female_variable_types, int(208148 * 0.2)) alive_female_syn_data['ep'] = 0 alive_female_syn_data['lex.dur'] = 1.0 female_syn_data = pd.concat( [alive_female_syn_data, dead_female_syn_data]) female_syn_decoded = decode_data(female_syn_data, maps, for_poisson=False) female_syn_decoded.to_csv('./syn_data/female_data_{}_{}.csv'.format( np.round(eps, 2), i_rep), index=False)