parser.add_argument("--gpu", type=str, required=True) args = parser.parse_args() experiment_type = args.experiment_type gpu = args.gpu print('STARTING EXPERIMENTS <{}> WITH VAE'.format(experiment_type)) device = 'cuda:{}'.format(gpu) if torch.cuda.is_available() else 'cpu' data_path = DOWN_DATA_PATH if experiment_type == 'down_sample' else DATA_PATH demo_path = DOWN_DEMO_PATH if experiment_type == 'down_sample' else DEMO_PATH for fold_n in trange(5, desc='folds'): log_val_str = LOG_VAL_STR.format('vae', experiment_type, now, fold_n) log_te_str = LOG_TE_STR.format('vae', experiment_type, now, fold_n) ds = DataSplitter(data_path, demo_path, out_dir=OUT_DIR) pandas_dir_path, scipy_dir_path, uids_dic_path, tids_path = ds.get_paths( fold_n=fold_n) if experiment_type == 'up_sample': ds.up_sample_train_data_path(pandas_dir_path, scipy_dir_path, 'gender') # Setting seed for reproducibility reproducible(EXP_SEED) # --- Data --- # tr_loader = DataLoader(LFM2bDataset( scipy_dir_path,
type=str, required=True, choices=['standard', 'up_sample', 'down_sample']) args = parser.parse_args() experiment_type = args.experiment_type print('STARTING EXPERIMENTS <{}> WITH ITEMKNN'.format(experiment_type)) data_path = DOWN_DATA_PATH if experiment_type == 'down_sample' else DATA_PATH demo_path = DOWN_DEMO_PATH if experiment_type == 'down_sample' else DEMO_PATH for fold_n in trange(5, desc='folds'): log_val_str = LOG_VAL_STR.format('itemknn', experiment_type, now, fold_n) log_te_str = LOG_TE_STR.format('itemknn', experiment_type, now, fold_n) ds = DataSplitter(data_path, demo_path, out_dir=OUT_DIR) pandas_dir_path, scipy_dir_path, uids_dic_path, tids_path = ds.get_paths( fold_n=fold_n) if experiment_type == 'up_sample': up_tr_data_path, up_sp_tr_data_path = ds.up_sample_train_data_path( pandas_dir_path, scipy_dir_path, 'gender') # --- Data --- # sp_tr_data = sp.load_npz( up_sp_tr_data_path if experiment_type == 'up_sample' else os.path.join(scipy_dir_path, 'sp_tr_data.npz')) sp_vd_tr_data = sp.load_npz( os.path.join(scipy_dir_path, 'sp_vd_tr_data.npz'))
from utils.eval import eval_proced print('STARTING CONTROLLED EXPERIMENTS WITH ESAE') print('SEEDS ARE: {}'.format(SEEDS)) grid = {'lam': [1, 1e1, 1e2, 5e2, 1e3, 1e4, 1e5, 1e6, 1e7]} pg = ParameterGrid(grid) now = datetime.now() for trait in tqdm(TRAITS, desc='traits'): print('WORKING ON TRAIT: ' + trait) for seed in tqdm(SEEDS, desc='seeds'): log_val_str = LOG_VAL_STR.format('ease', now, trait, seed) log_te_str = LOG_TE_STR.format('ease', now, trait, seed) ds = DataSplitter(DATA_PATH, PERS_PATH, out_dir=OUT_DIR) pandas_dir_path, scipy_dir_path, uids_dic_path, tids_path = ds.get_paths( seed, trait) # Load data sp_tr_data = sp.load_npz(os.path.join(scipy_dir_path, 'sp_tr_data.npz')) sp_vd_tr_data = sp.load_npz( os.path.join(scipy_dir_path, 'sp_vd_tr_data.npz')) sp_vd_te_data = sp.load_npz( os.path.join(scipy_dir_path, 'sp_vd_te_data.npz')) sp_te_tr_data = sp.load_npz( os.path.join(scipy_dir_path, 'sp_te_tr_data.npz')) sp_te_te_data = sp.load_npz(
experiment_type = args.experiment_type gpu = args.gpu print('STARTING EVALUATION <{}> WITH VAE'.format(experiment_type)) device = 'cuda:{}'.format(gpu) if torch.cuda.is_available() else 'cpu' data_path = DOWN_DATA_PATH if experiment_type == 'down_sample' else DATA_PATH demo_path = DOWN_DEMO_PATH if experiment_type == 'down_sample' else DEMO_PATH experiment_datetime = best_configs[experiment_type] if not experiment_datetime: raise ValueError('Configuration <{}> for <{}> not found!'.format(experiment_datetime, experiment_type)) for fold_n in trange(5, desc='folds'): log_val_str = LOG_VAL_STR.format('vae', experiment_type, experiment_datetime, fold_n) log_te_str = LOG_TE_STR.format('vae', experiment_type, experiment_datetime, fold_n) ds = DataSplitter(data_path, demo_path, out_dir=OUT_DIR) pandas_dir_path, scipy_dir_path, uids_dic_path, tids_path = ds.get_paths(fold_n=fold_n) if experiment_type == 'up_sample': ds.up_sample_train_data_path(pandas_dir_path, scipy_dir_path, 'gender') # Setting seed for reproducibility reproducible(EXP_SEED) # --- Data --- # tr_loader = DataLoader(LFM2bDataset(scipy_dir_path, which='train', up_sample=experiment_type == 'up_sample'), batch_size=64, shuffle=True, num_workers=10) te_loader = DataLoader(LFM2bDataset(scipy_dir_path, pandas_dir_path, uids_dic_path, which='test'), batch_size=128,
grid = { "alpha": [5e-1, 1e-1, 1e-2, 1e-3], "l1_ratio": [1, 1e-1, 1e-2], "max_iter": [500] } pg = ParameterGrid(grid) now = datetime.now() for trait in tqdm(TRAITS, desc='traits'): print('WORKING ON TRAIT: ' + trait) for seed in tqdm(SEEDS, desc='seeds'): log_val_str = LOG_VAL_STR.format('slim', now, trait, seed) log_te_str = LOG_TE_STR.format('slim', now, trait, seed) ds = DataSplitter(DATA_PATH, PERS_PATH, out_dir=OUT_DIR) pandas_dir_path, scipy_dir_path, uids_dic_path, tids_path = ds.get_paths( seed, trait) # Load data sp_tr_data = sp.load_npz(os.path.join(scipy_dir_path, 'sp_tr_data.npz')) sp_vd_tr_data = sp.load_npz( os.path.join(scipy_dir_path, 'sp_vd_tr_data.npz')) sp_vd_te_data = sp.load_npz( os.path.join(scipy_dir_path, 'sp_vd_te_data.npz')) sp_te_tr_data = sp.load_npz( os.path.join(scipy_dir_path, 'sp_te_tr_data.npz')) sp_te_te_data = sp.load_npz(