Пример #1
0
def main():
    eps = float(sys.argv[1])
    seed = int(sys.argv[2])
    model_fname = os.system(
        "ls ./train_models/ | grep {} | grep {} >> model_fnames.txt".format(
            eps, seed))
    model_fnames_file = open("model_fnames.txt", "r")
    model_fnames = model_fnames_file.readlines()
    model_fnames_file.close()
    model_fname = model_fnames[0][:-1]
    os.system("rm model_fnames.txt")
    train_models = pd.read_pickle('./train_models/{}'.format(model_fname))
    for i_rep, train_model in enumerate(train_models):
        train_variable_types = {
            key: train_variable_types_base[key]
            for key in train_model.param_dims.keys()
        }
        print(i_rep)
        syn_data = fast_sample(train_model, train_variable_types, N)
        female_syn_data = syn_data[syn_data["is.female"] == 1]
        male_syn_data = syn_data[syn_data["is.female"] == 0]
        female_syn_decoded = decode_data(female_syn_data,
                                         maps,
                                         for_poisson=False)
        male_syn_decoded = decode_data(male_syn_data, maps, for_poisson=False)
        female_syn_decoded.to_csv('./syn_data/female_data_{}_{}_{}.csv'.format(
            seed, np.round(eps, 2), i_rep),
                                  index=False)
        male_syn_decoded.to_csv('./syn_data/male_data_{}_{}_{}.csv'.format(
            seed, np.round(eps, 2), i_rep),
                                index=False)
Пример #2
0
def main():
    eps = float(sys.argv[1])
    seed = int(sys.argv[2])
    model_fname = os.system(
        "ls ./male_models/ | grep {} | grep {} >> model_fnames.txt".format(
            eps, seed))
    model_fnames_file = open("model_fnames.txt", "r")
    model_fnames = model_fnames_file.readlines()
    model_fnames_file.close()
    model_fname = [fname for fname in model_fnames][0][:-1]
    print(model_fname)
    os.system("rm model_fnames.txt")
    male_models = pd.read_pickle('./male_models/{}'.format(model_fname))
    for i_rep, male_model in enumerate(male_models):
        male_variable_types = {
            key: male_variable_types_[key]
            for key in male_model.param_dims.keys()
        }

        print(i_rep)
        male_syn_data = fast_sample(male_model, male_variable_types, N_male)
        #male_syn_data[male_syn_data["ep"] == 0]["lex.dur"] = 1.0
        male_syn_decoded = decode_data(male_syn_data, maps, for_poisson=False)
        male_syn_decoded.to_csv('./syn_data/male_data_{}_{}_{}.csv'.format(
            seed, np.round(eps, 2), i_rep),
                                index=False)
def main():
	eps = float(sys.argv[1])
	seed = int(sys.argv[2])
	model_fname = os.system("ls ./female_models/ | grep {} | grep {} >> model_fnames.txt".format(eps, seed))
	model_fnames_file = open("model_fnames.txt", "r")
	model_fnames = model_fnames_file.readlines()
	model_fnames_file.close()
	alive_model_fname = [fname for fname in model_fnames if 'alive' in fname][0][:-1]
	dead_model_fname = [fname for fname in model_fnames if 'dead' in fname][0][:-1]
	os.system("rm model_fnames.txt")
	alive_female_models = pd.read_pickle('./female_models/{}'.format(alive_model_fname))
	dead_female_models = pd.read_pickle('./female_models/{}'.format(dead_model_fname))
	for i_rep, (alive_female_model, dead_female_model) in enumerate(zip(alive_female_models, dead_female_models)):
		alive_female_variable_types = {key : female_variable_types[key] for \
						key in alive_female_model.param_dims.keys()}
		dead_female_variable_types = {key : female_variable_types[key] for key in dead_female_model.param_dims.keys()}

		print(i_rep)
		noisy_dead_proportion = (N_female_dead+np.random.laplace(scale=(1./0.01)))/N_female
		N_syn_female_alive = int((1.-noisy_dead_proportion)*N_female) 
		N_syn_female_dead = int(noisy_dead_proportion*N_female) 
		alive_female_syn_data = fast_sample(alive_female_model, alive_female_variable_types, N_syn_female_alive)
		dead_female_syn_data = fast_sample(dead_female_model, dead_female_variable_types, N_syn_female_dead)
		alive_female_syn_data['ep'] = 0
		alive_female_syn_data['lex.dur'] = 1.0
		female_syn_data = pd.concat([alive_female_syn_data, dead_female_syn_data])
		female_syn_decoded = decode_data(female_syn_data, maps, for_poisson=False)
		female_syn_decoded.to_csv('./syn_data/female_data_{}_{}_{}.csv'.format(seed, np.round(eps, 2), i_rep), index=False)
def main():
    eps = float(sys.argv[1])
    os.system("python3 join_models.py {}".format(eps))
    alive_female_models = pd.read_pickle(
        './female_models/alive_female_models_{}.p'.format(np.round(eps, 2)))
    dead_female_models = pd.read_pickle(
        './female_models/dead_female_models_{}.p'.format(np.round(eps, 2)))
    for i_rep, (alive_female_model, dead_female_model) in enumerate(
            zip(alive_female_models, dead_female_models)):
        alive_female_variable_types = {key : female_variable_types[key] for \
            key in alive_female_model.param_dims.keys()}
        dead_female_variable_types = {
            key: female_variable_types[key]
            for key in dead_female_model.param_dims.keys()
        }

        print(i_rep)
        alive_female_syn_data = fast_sample(alive_female_model,
                                            alive_female_variable_types,
                                            int(208148 * 0.8))
        dead_female_syn_data = fast_sample(dead_female_model,
                                           dead_female_variable_types,
                                           int(208148 * 0.2))
        alive_female_syn_data['ep'] = 0
        alive_female_syn_data['lex.dur'] = 1.0
        female_syn_data = pd.concat(
            [alive_female_syn_data, dead_female_syn_data])
        female_syn_decoded = decode_data(female_syn_data,
                                         maps,
                                         for_poisson=False)
        female_syn_decoded.to_csv('./syn_data/female_data_{}_{}.csv'.format(
            np.round(eps, 2), i_rep),
                                  index=False)