def main(): global args use_cuda = torch.cuda.is_available() initialize_environment(random_seed=cfg.RNG_SEED, use_cuda=use_cuda) args = parser.parse_args() datadir = args.db_dir outputdir = get_output_dir(args.db_dir) nepoch = args.nepoch step = args.step_epoch dropout = args.dropout n_layers = cfg.N_LAYERS input_dim = cfg.INPUT_DIM hidden_dims = cfg.HIDDEN_DIMS # logging information loggin_dir = os.path.join(outputdir, 'runs', 'pretraining') if not os.path.exists(loggin_dir): os.makedirs(loggin_dir) tensorboard_logger.configure(os.path.join(loggin_dir, '%s' % (args.id))) trainset = EncodedTextDataset(root=datadir, train=True) testset = EncodedTextDataset(root=datadir, train=False) kwargs = {'num_workers': 0, 'pin_memory': True} if use_cuda else {} trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.batchsize, shuffle=True, **kwargs) testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=True, **kwargs) pretrain( outputdir, { 'nlayers': n_layers, 'dropout': dropout, 'reluslope': 0.0, 'nepoch': nepoch, 'lrate': [args.lr], 'wdecay': [0.0], 'step': step, 'input_dim': input_dim, 'hidden_dims': hidden_dims }, use_cuda, trainloader, testloader)
def check_prelim(): """Initial setup code. Eventually this will set options.""" directory_structure = utils.initialize_environment(config) return directory_structure
return args args = get_args() # n_clusters = 4 # data_dir = 'data/ag_news/' data_dir = args.data_dir n_clusters = args.n_clusters use_cuda = torch.cuda.is_available() random_seed = args.seed recons_lam = args.recons_lam cluster_lam = args.cluster_lam batch_size = args.batch_size tol = args.tol lr = args.lr initialize_environment(random_seed=random_seed, use_cuda=use_cuda) feat_path = os.path.join(data_dir, cfg.TRAIN_TEXT_FEAT_FILE_NAME) feat, labels, ids = load_feat(feat_path) outputdir = get_output_dir(data_dir) net_filename = os.path.join(outputdir, cfg.PRETRAINED_FAE_FILENAME) checkpoint = torch.load(net_filename) net = extract_sdae_model(input_dim=cfg.INPUT_DIM, hidden_dims=cfg.HIDDEN_DIMS) net.load_state_dict(checkpoint['state_dict']) if use_cuda: net.cuda() dcn = DCN(n_clusters, net, cfg.HIDDEN_DIMS[-1],
default=100, help='the number of seed for each class') parser.add_argument('--verbose', help='whether to print log', action='store_true') args = parser.parse_args() return args args = get_args() data_dir = args.data_dir random_seed = args.seed seed_num = args.seed_num verbose = args.verbose initialize_environment(random_seed=random_seed) _, labels, ids = load_csv_corpus( os.path.join(data_dir, cfg.TRAIN_DATA_NAME + '.csv')) dic = defaultdict(list) for tmp_id, tmp_label in zip(ids, labels): dic[tmp_label].append(tmp_id) results = [] for l, tmp_ids in dic.items(): random.shuffle(tmp_ids) tmp_ids = tmp_ids[:seed_num] results.extend([(tmp_id, l) for tmp_id in tmp_ids]) results.sort()
else: with bz2.open(memory_path, 'rb') as zipped_pickle_file: return pickle.load(zipped_pickle_file) def save_memory(memory, memory_path, disable_bzip): if disable_bzip: with open(memory_path, 'wb') as pickle_file: pickle.dump(memory, pickle_file) else: with bz2.open(memory_path, 'wb') as zipped_pickle_file: pickle.dump(memory, zipped_pickle_file) # Environment env, test_env = initialize_environment(args) n_actions = env.action_space.n # Agent dqn = Agent(args, env) # If a model is provided, and evaluate is fale, presumably we want to resume, so try to load memory if args.model is not None and not args.evaluate: if not args.memory: raise ValueError('Cannot resume training without memory save path. Aborting...') elif not os.path.exists(args.memory): raise ValueError( 'Could not find memory file at {path}. Aborting...'.format(path=args.memory)) mem = load_memory(args.memory, args.disable_bzip_memory)