def test(): alphas = [(2, 3, 3)] parts = [((2, ), (1, 1, 1), (1, 1, 1))] pset = set() pkls = [] mem_usg = [0] for alpha in alphas: print('Computing sparse pickles for: {}'.format(alpha)) #parts = partition_parts(alpha) for idx, p in enumerate(parts): other = (p[0], p[2], p[1]) if other in pset: continue np_pkl = gen_pickle_name('pickles', alpha, p) th_pkl = gen_pickle_name('pickles_sparse', alpha, p) gen_th_pkl(np_pkl, th_pkl) print('Done with {} | {}'.format(alpha, p)) curr = check_memory() print('{:2} irreps | {:30}: {:9.2f} | '.format( idx + 1, str(p), curr - mem_usg[-1]), end=' | ') mem_usg.append(curr) pset.add(p) print('Done') check_memory()
def proc_baseline_df(idx, df, mat): global all_df i = 0 start =time.time() correct = np.zeros(len(df)) chosen_cubes = np.zeros(len(df), dtype=int) print('Proc {:2} started | '.format(idx), end='') check_memory() for c in (df.index): nbrs = neighbors_fixed_core_small(c) nbr_df = all_df.loc[nbrs] nbr_idx = nbr_df['index'] # need this for indexing into mat min_dist = nbr_df.distance.min() min_cubes = nbr_df[nbr_df.distance == min_dist] vals = mat[nbr_idx] n_idx = np.argmin(vals) min_irrep_cube = nbrs[n_idx] # this gives the index correct[i] = (min_irrep_cube in min_cubes.index) chosen_cubes[i] = all_df.loc[min_irrep_cube]['index'] i += 1 end = time.time() print('Proc {:2} done: {:.2f}mins'.format(idx, (end - start) / 60.)) return correct, chosen_cubes
def test_th_pkl(np_pkl, th_pkl): print('Testing equivalence') np_dict = load_pkl(np_pkl) th_dict = load_sparse_pkl(th_pkl) compare(np_dict, th_dict) print('All equal between numpy and torch versions!!') check_memory()
def par_ft(partition, fname, savedir, ncpu=16): if not os.path.exists(savedir): try: print('Directory {} doesnt exist. creating it now'.format(savedir)) os.makedirs(savedir) except: print('Directory {} didnt exist. Tried to make it. Already made. Continuing...'.format(savedir)) ferrers = FerrersDiagram(partition) print('Ferrers:') print(ferrers) df = pd.read_csv(fname, header=None, dtype={0: str, 1:int}) check_memory() df_chunk = np.array_split(df, ncpu) arg_tups = [(chunk, ferrers) for chunk in df_chunk] savename = os.path.join(savedir, str(partition)) print('Saving in: {}'.format(savename)) if os.path.exists(savename): print('{} exists. Not running'.format(savename)) with Pool(ncpu) as p: map_res = p.starmap(fts, arg_tups) # sum of these matrices is what we wnat fourier_mat = sum(map_res) np.save(savename, fourier_mat) return fourier_mat
def par_inv_ft(partition, fname, savedir, ncpu=16): if not os.path.exists(savedir): try: print('Directory {} doesnt exist. creating it now'.format(savedir)) os.makedirs(savedir) except: print( 'Directory {} didnt exist. Tried to make it. Already made. Continuing...' .format(savedir)) ferrers = FerrersDiagram(partition) df = pd.read_csv(fname, header=None, dtype={0: str, 1: int}) check_memory() df_chunk = np.array_split(df, ncpu) arg_tups = [(chunk, ferrers) for chunk in df_chunk] savename = os.path.join(savedir, str(partition)) + '.csv' print('Saving in: {}'.format(savename)) if os.path.exists(savename): print('{} exists. Not running'.format(savename)) return with Pool(ncpu) as p: results = p.starmap(inv_transform, arg_tups) concat_results = sum(results, []) df[1] = concat_results df.to_csv(savename, header=None, index=False) return df
def test(): start = time.time() alpha = (2, 3, 3) parts = ((2,), (3,), (3,)) df = load_df('/scratch/hopan/cube/') irrep_dict = load_irrep('/scratch/hopan/cube/', alpha, parts) end = time.time() check_memory()
def par_cube_ift(rank, size, alpha, parts): start = time.time() try: df = load_df('/scratch/hopan/cube/') irrep_dict = load_irrep('/scratch/hopan/cube/', alpha, parts) fhat = np.load('/scratch/hopan/cube/fourier/{}/{}.npy'.format( alpha, parts)) except Exception as e: print('rank {} | memory usg: {} | exception {}'.format( rank, check_memory(verbose=False), e)) print( 'Rank {:3d} / {} | load irrep: {:.2f}s | mem: {:.2f}mb | {} {}'.format( rank, size, time.time() - start, check_memory(verbose=False), alpha, parts)) cos_reps = coset_reps(sn(8), young_subgroup_perm(alpha)) save_dict = {} cyc_irrep_func = cyclic_irreps(alpha) chunk_size = len(df) // size start_idx = chunk_size * rank mat = np.zeros(chunk_size, dtype=fhat.dtype) fhat_t_ravel = fhat.T.ravel() #print('Rank {} | {:7d}-{:7d}'.format(rank, start_idx, start_idx + chunk_size)) if rank == 0: print( 'Rank {} | elapsed: {:.2f}s | {:.2f}mb | mat shape: {} | done load | {} {}' .format(rank, time.time() - start, check_memory(verbose=False), fhat.shape, alpha, parts)) for idx in range(start_idx, start_idx + chunk_size): row = df.loc[idx] otup = tuple(int(i) for i in row[0]) perm_tup = tuple(int(i) for i in row[1]) #dist = int(row[2]) # actually want the inverse wmat = wreath_rep(otup, perm_tup, irrep_dict, cos_reps, cyc_irrep_func) wmat_inv = wmat.conj().T # trace(rho(ginv) fhat) = trace(fhat rho(ginv)) = vec(fhat.T).dot(vec(rho(ginv))) #feval = np.dot(fhat.T.ravel(), wmat_inv.ravel()) feval = np.dot(fhat_t_ravel, wmat_inv.ravel()) mat[idx - start_idx] = fhat.shape[0] * feval if rank == 0: print('Rank {} | elapsed: {:.2f}s | {:.2f}mb | done add'.format( rank, time.time() - start, check_memory(verbose=False))) del irrep_dict if rank == 0: print('Rank {} | elapsed: {:.2f}s | {:.2f}mb | done matrix conversion'. format(rank, time.time() - start, check_memory(verbose=False))) return mat
def main(alpha, parts, savedir): st = time.time() irrep_dict = load_irrep(savedir, alpha, parts) end = time.time() check_memory() print('Load time {:.2f}s | {} {}'.format(end - st, alpha, parts)) sp_dict = convert(alpha, parts, irrep_dict=irrep_dict) print('Convert time {:.2f}s'.format(time.time() - end)) check_memory() save_sp_pkl(sp_dict, savedir, alpha, parts)
def mpi_main(alpha, parts): savename = '/scratch/hopan/cube/fourier_sym_eval/{}/{}.npy'.format( alpha, parts) if os.path.exists(savename): print('File {} exists! Skipping'.format(savename)) exit() #print('File {} exists! Running anyway!'.format(savename)) comm = MPI.COMM_WORLD size = MPI.COMM_WORLD.Get_size() rank = MPI.COMM_WORLD.Get_rank() name = MPI.Get_processor_name() if rank == 0: print('starting {} | {}'.format(alpha, parts)) _start = time.time() start = time.time() # mat = par_cube_ft(alpha, parts, irrep_dict, lst) mat = par_cube_ift(rank, size, alpha, parts) #all_mats = comm.gather(mat, root=0) if rank == 0: print('post par cube ft: {:.2f}s | mem: {:.2f}mb'.format( time.time() - start, check_memory(verbose=False))) sendmat = mat recvmat = None if rank == 0: recvmat = np.empty([size, *sendmat.shape], dtype=sendmat.dtype) print('Before gather: {:.2f}s | mem {:.2f}mb'.format( time.time() - start, check_memory(verbose=False))) comm.Gather(sendmat, recvmat, root=0) if rank == 0: print('Elapsed for gather: {:.2f}s | mem {:.2f}mb'.format( time.time() - start, check_memory(verbose=False))) #res_mat = np.sum(recvmat, axis=0) res_mat = recvmat.reshape(-1) print('All done | {:.2f}s | shape {} | mem {:.2f}mb'.format( time.time() - _start, res_mat.shape, check_memory(verbose=False))) # save dir if not os.path.exists( '/scratch/hopan/cube/fourier_sym_eval/{}'.format(alpha)): os.makedirs( '/scratch/hopan/cube/fourier_sym_eval/{}'.format(alpha)) savename = '/scratch/hopan/cube/fourier_sym_eval/{}/{}'.format( alpha, parts) np.save(savename, res_mat) print('Done saving in {}! | Total time: {:.2f}s'.format( savename, time.time() - _start))
def par_main(par_f, ncpu): global all_df all_df = load_cube_df_indexed() start = time.time() df_chunk = np.array_split(all_df, ncpu) idx_to_nbrs, idx_to_cube, idx_to_dist, cube_to_idx = load_pkls() arg_tups = [(idx, _d) for idx, _d in enumerate(df_chunk)] print('Starting par proc with {} processes...'.format(ncpu)) check_memory() with Pool(ncpu) as p: map_res = p.starmap(par_f, arg_tups) print('Elapsed proc time: {:.2f}min'.format( (time.time() - start) / 60. )) return map_res
def full_transform(args, alpha, parts, split_chunks): print('Computing full transform for alpha: {} | parts: {}'.format( alpha, parts)) savedir_alpha = os.path.join(args.savedir, args.alpha) savename = os.path.join(savedir_alpha, '{}'.format(parts)) print('Savename: {}'.format(savename)) if os.path.exists(savename + '.npy'): print('Skipping. Already computed fourier matrix for: {} | {}'.format( alpha, parts)) exit() manager = Manager() irrep_dict = load_pkl( os.path.join(args.pkldir, args.alpha, '{}.pkl'.format(parts))) mem_usg = check_memory(verbose=False) if not os.path.exists(savedir_alpha): print('Making: {}'.format(savedir_alpha)) os.makedirs(savedir_alpha) if args.par > 1: print('Par process with {} processes...'.format(len(split_chunks))) mem_dict = manager.dict() with Pool(len(split_chunks)) as p: arg_tups = [(_fn, irrep_dict, alpha, parts, mem_dict) for _fn in split_chunks] matrices = p.starmap(text_split_transform, arg_tups) np.save(savename, sum(matrices)) else: print('Single thread...') matrices = [] block_size = wreath_dim(parts) n_cosets = coset_size(alpha) shape = (block_size * n_cosets, block_size * n_cosets) result = np.zeros(shape, dtype=np.complex128) mem_dict = {} for _fn in split_chunks: res = text_split_transform(_fn, irrep_dict, alpha, parts) matrices.append(res) result += res np.save(savename, sum(matrices)) print('Post loading pickle mem usg: {:.4}mb | Final mem usg: {:.4f}mb'. format(mem_usg, check_memory(False))) print('Processes') for pid, usg in mem_dict.items(): print('{} | {:.4f}mb'.format(pid, usg)) print('Done!')
def text_split_transform(fsplit_lst, irrep_dict, alpha, parts, mem_dict=None): ''' fsplit_pkl: list of split file names of the distance values for a chunk of the total distance values irrep_dict: irrep dict alpha: weak partition parts: list/iterable of partitions of the parts of alpha ''' print(' Computing transform on splits: {}'.format(fsplit_lst)) cos_reps = coset_reps(sn(8), young_subgroup_perm(alpha)) save_dict = {} cyc_irrep_func = cyclic_irreps(alpha) pid = os.getpid() for split_f in fsplit_lst: with open(split_f, 'r') as f: for line in tqdm(f): otup, perm_tup, dist = clean_line(line) perm_rep = irrep_dict[ perm_tup] # perm_rep is a dict of (i, j) -> matrix block_cyclic_rep = block_cyclic_irreps(otup, cos_reps, cyc_irrep_func) mult_yor_block(perm_rep, dist, block_cyclic_rep, save_dict) if mem_dict is not None: mem_dict[pid] = max(check_memory(verbose=False), mem_dict.get(pid, 0)) block_size = wreath_dim(parts) n_cosets = coset_size(alpha) mat = convert_yor_matrix(save_dict, block_size, n_cosets) return mat
def split_transform(fsplit_lst, irrep_dict, alpha, parts, mem_dict=None): ''' fsplit_pkl: list of pkl file names of the distance values for a chunk of the total distance values irrep_dict: irrep dict alpha: weak partition parts: list/iterable of partitions of the parts of alpha ''' print(' Computing transform on splits: {}'.format(fsplit_lst)) cos_reps = coset_reps(sn(8), young_subgroup_perm(alpha)) save_dict = {} cyc_irrep_func = cyclic_irreps(alpha) pid = os.getpid() for fsplit_pkl in fsplit_lst: with open(fsplit_pkl, 'r') as f: # dict of function values pkl_dict = load_pkl(fsplit_pkl) for perm_tup, tup_dict in pkl_dict.items(): for tup, dists in tup_dict.items(): dist_tot = sum(dists) perm_rep = irrep_dict[ perm_tup] # perm_rep is a dict of (i, j) -> matrix block_cyclic_rep = block_cyclic_irreps( tup, cos_reps, cyc_irrep_func) mult_yor_block(perm_rep, dist_tot, block_cyclic_rep, save_dict) if mem_dict is not None: mem_dict[pid] = max(check_memory(verbose=False), mem_dict.get(pid, 0)) del pkl_dict block_size = wreath_dim(parts) n_cosets = coset_size(alpha) mat = convert_yor_matrix(save_dict, block_size, n_cosets) return mat
def par_irrep_main(par_f, alpha, parts, ncpu): global all_df all_df = load_cube_df_indexed() df_chunk = np.array_split(all_df[:20000], ncpu) real_mat = irrep_feval(alpha, parts).real arg_tups = [(idx, _d, real_mat) for idx, _d in enumerate(df_chunk)] print('Before pool | ', end='') check_memory() with Pool(ncpu) as p: map_res = p.starmap(par_f, arg_tups) par_correct, par_chosen_cubes = zip(*map_res) cat_correct = np.concatenate(par_correct) cat_chosen = np.concatenate(par_chosen_cubes) return cat_correct, cat_chosen
def gen_th_pkl(np_pkl, th_pkl): if os.path.exists(th_pkl): print('Skipping pkl: {}'.format(th_pkl)) #return else: print('Not skipping pkl: {}'.format(th_pkl)) if not os.path.exists(np_pkl): print(np_pkl, 'doesnt exist! Exiting!') exit() else: dirname = os.path.dirname(th_pkl) try: os.makedirs(dirname) # rp except: print('makedirs: Director already exists {}? {}'.format( dirname, os.path.exists(dirname))) print('trying to open: {}'.format(np_pkl)) with open(np_pkl, 'rb') as f: ydict = pickle.load(f) check_memory() print('after loading {}'.format(np_pkl)) sparse_tdict = {} for perm_tup, rep_dict in tqdm(ydict.items()): idx, vreal, size = to_block_sparse(rep_dict) sparse_tdict[perm_tup] = { 'idx': idx, 'real': vreal, } check_memory() print('making the sparse dict loading {}'.format(th_pkl)) del ydict # hacky way to assign this sparse_tdict['size'] = size #with open(th_pkl, 'wb') as f: # pickle.dump(sparse_tdict, f, protocol=pickle.HIGHEST_PROTOCOL) print('Created:', th_pkl) del sparse_tdict
def bfs(fname): #start = init_pyraminx() start = init_pyraminx_tip() to_visit = deque([(start, 0)]) visited = set() with open(fname, 'w') as f: while to_visit: curr, dist = to_visit.popleft() if curr in visited: continue #f.write('{},{}\n'.format(pyraminx_str(curr), dist)) f.write('{},{}\n'.format(pyraminx_tip_str(curr), dist)) visited.add(curr) for nbr in pyraminx_tip_nbrs(curr): if nbr not in visited: to_visit.append((nbr, dist + 1)) check_memory()
def bfs(root, fname): print('Writing to: {}'.format(fname)) with open(fname, 'w') as f: to_visit = deque([(root, 0)]) dist_dict = {np_to_tup(root): 0} #{np_to_tup(root): 0} f.write('{},0\n'.format(np_to_str(root))) while to_visit: curr, dist = to_visit.popleft() ctup = np_to_tup(curr) for nbr in neighbors(curr).keys(): ntup = np_to_tup(nbr) if ntup not in dist_dict: dist_dict[ntup] = dist + 1 f.write('{},{}\n'.format(tup_to_str(ntup), dist + 1)) # append the grid not the nbr to_visit.append((nbr, dist + 1)) check_memory() return dist_dict
def test(seed): cnt = 10 random.seed(seed) print('A star with seed: {} | cnt: {}'.format(seed, cnt)) size = 3 puzzle = TileEnv(size) puzzles = [] man_nodes = [] irrep_nodes = [] for idx in range(cnt): puzzle.reset() puzzles.append(puzzle.tup_state()) str_state = tup_to_str(puzzle.tup_state()) #resh = a_star(puzzle.grid, hamming_grid) #print('Hamming | ', end='') #print(resh) resm = a_star(puzzle.grid, manhattan_grid) man_nodes.append(resm['nodes_explored']) print('{:3} | {}'.format(idx, resm)) for idx, perm in enumerate(puzzles): puzzle._assign_perm(perm) parts = [(9,), (8, 1)] irrep_manh = irrep_gen_func(parts, 'manhattan_eval') resi = a_star(puzzle.grid, irrep_manh) irrep_nodes.append(resm['nodes_explored']) print('{:3} | {}'.format(idx, resi)) #parts = [(9,), (8, 1)] #print('Hamming heuristic using parts: {}'.format(parts), end='') #irrep_hamm = irrep_gen_func(parts, 'hamming_eval') #resi = a_star(puzzle.grid, irrep_hamm) #print(resi) #print('=' * 80) puzzle_strs = [tup_to_str(t) for t in puzzles] df = pd.DataFrame({'perms': puzzle_strs, 'manhattan': man_nodes, 'manhattan_irrep': irrep_nodes}) df.to_csv('./results/results_{}.csv'.format(seed), header=True) check_memory()
def test(ntrials=100): start = time.time() alpha = (2,3,3) parts = ((2,), (1, 1, 1), (1, 1, 1)) env = Cube2IrrepEnv(alpha, parts) setup_time = time.time() - start print('Done loading: {:.2f}s'.format(setup_time)) res = env.reset() stuff = [] for _ in range(ntrials): action = random.choice(range(1, 7)) res, _, _, _ = env.step(action) stuff.append(res) check_memory() end = time.time() sim_time = (end - start) - setup_time per_action_time = sim_time / ntrials print('Setup time: {:.4f}s'.format(setup_time)) print('Total time: {:.4f}s'.format(sim_time)) print('Per action: {:.4f}s'.format(per_action_time))
def par_cube_ft(rank, size, alpha, parts): start = time.time() try: df = load_df('/scratch/hopan/cube/') irrep_dict = load_irrep('/scratch/hopan/cube/', alpha, parts) except Exception as e: print('rank {} | memory usg: {} | exception {}'.format(rank, check_memory(verbose=False), e)) print('Rank {:3d} / {} | load irrep: {:.2f}s | mem: {}mb'.format(rank, size, time.time() - start, check_memory(verbose=False))) cos_reps = coset_reps(sn(8), young_subgroup_perm(alpha)) save_dict = {} cyc_irrep_func = cyclic_irreps(alpha) chunk_size = len(df) // size start_idx = chunk_size * rank #print('Rank {} | {:7d}-{:7d}'.format(rank, start_idx, start_idx + chunk_size)) if rank == 0: print('Rank {} | elapsed: {:.2f}s | {:.2f}mb | done load'.format(rank, time.time() - start, check_memory(verbose=False))) for idx in range(start_idx, start_idx + chunk_size): row = df.loc[idx] otup = tuple(int(i) for i in row[0]) perm_tup = tuple(int(i) for i in row[1]) dist = int(row[2]) perm_rep = irrep_dict[perm_tup] # perm_rep is a dict of (i, j) -> matrix block_cyclic_rep = block_cyclic_irreps(otup, cos_reps, cyc_irrep_func) mult_yor_block(perm_rep, dist, block_cyclic_rep, save_dict) if rank == 0: print('Rank {} | elapsed: {:.2f}s | {:.2f}mb | done add'.format(rank, time.time() - start, check_memory(verbose=False))) del irrep_dict block_size = wreath_dim(parts) n_cosets = coset_size(alpha) mat = convert_yor_matrix(save_dict, block_size, n_cosets) if rank == 0: print('Rank {} | elapsed: {:.2f}s | {:.2f}mb | done matrix conversion'.format(rank, time.time() - start, check_memory(verbose=False))) return mat
def main(hparams): partitions = eval(hparams['partitions']) env = TileIrrepEnv(hparams['tile_size'], partitions, hparams['reward']) if hparams['model_type'] == 'IrrepDVN': log.info('Making IrrepDVN') pol_net = IrrepDVN(partitions) targ_net = IrrepDVN(partitions) elif hparams['model_type'] == 'IrrepDQN': log.info('Making IrrepDQN') pol_net = IrrepDQN(partitions, nactions=4) targ_net = IrrepDQN(partitions, nactions=4) elif hparams['model_type'] == 'IrrepOnehotDVN': log.info('Making IrrepOnehotDVN') pol_net = IrrepOnehotDVN(env.onehot_shape, env.irrep_shape, hparams['n_hid'], partitions) targ_net = IrrepOnehotDVN(env.onehot_shape, env.irrep_shape, hparams['n_hid'], partitions) opt = torch.optim.Adam(pol_net.parameters(), hparams['lr']) memory = SimpleMemory(hparams['capacity'], pol_net.mem_dict(env), pol_net.dtype_dict()) torch.manual_seed(hparams['seed']) np.random.seed(hparams['seed']) random.seed(hparams['seed']) n_updates = 0 iters = 0 losses = [] dones = [] rews = set() for e in range(hparams['epochs'] + 1): shuffle_len = random.randint(hparams['shuffle_min'], hparams['shuffle_max']) states = env.shuffle(shuffle_len) #grid_state = env.reset(output='grid') # is this a grid state? #for i in range(hparams['max_iters']): for dist, (grid_state, _x, _y) in enumerate(states): #_x, _y = env.x, env.y # C nbrs, onehot_nbrs = env.all_nbrs(grid_state, _x, _y) if random.random() < exp_rate(hparams['max_exp_epochs'], e, hparams['min_exp_rate']): action = random.choice(env.valid_moves(_x, _y)) else: if hparams['model_type'] == 'IrrepDVN': action = pol_net.get_action(env, grid_state, e, all_nbrs=nbrs, x=_x, y=_y) elif hparams['model_type'] == 'IrrepDQN': action = pol_net.get_action_grid(env, grid_state, x=_x, y=_y) new_irrep_state, reward, done, info = env.peek( grid_state, _x, _y, action) rews.add(reward) #new_irrep_state, reward, done, info = env.step(action) # c if hparams['model_type'] == 'IrrepDVN': memory.push({ 'grid_state': grid_state, 'irrep_state': env.cat_irreps(grid_state), 'irrep_nbrs': nbrs, 'action': action, 'reward': reward, 'done': done, 'next_irrep_state': new_irrep_state, 'dist': iters }) elif hparams['model_type'] == 'IrrepDQN': memory.push({ 'grid_state': grid_state, 'irrep_state': env.cat_irreps(grid_state), 'irrep_nbrs': nbrs, 'action': action, 'reward': reward, 'done': done, 'next_irrep_state': new_irrep_state, 'dist': iters }) elif hparams['model_type'] == 'IrrepOnehotDVN': memory.push({ #'grid_state': grid_state, 'onehot_state': grid_to_onehot(grid_state), 'irrep_state': env.cat_irreps(grid_state), 'irrep_nbrs': nbrs, 'onehot_nbrs': onehot_nbrs, 'action': action, 'reward': reward, 'done': done, 'next_irrep_state': new_irrep_state, 'dist': iters }) #grid_state = info['grid'] # c iters += 1 if iters % hparams['update_int'] == 0 and iters > 0: if hparams['model_type'] == 'IrrepDVN': batch = memory.sample(hparams['batch_size']) loss = pol_net.update(targ_net, env, batch, opt, hparams['discount'], e) n_updates += 1 losses.append(loss) elif hparams['model_type'] == 'IrrepDQN': batch = memory.sample(hparams['batch_size']) loss = pol_net.update(targ_net, env, batch, opt, hparams['discount'], e) n_updates += 1 losses.append(loss) elif hparams['model_type'] == 'IrrepOnehotDVN': batch = memory.sample(hparams['batch_size']) loss = pol_net.update(targ_net, env, batch, opt, hparams['discount'], e) n_updates += 1 losses.append(loss) if done: break if iters % hparams['update_int'] == 0 and e > 0: targ_net.load_state_dict(pol_net.state_dict()) dones.append(done) if e % hparams['log_int'] == 0 and e > 0: log.info( 'Ep: {:4} | Last {} avg loss: {:.3f} | Exp rate: {:.4} | Updates: {}' .format( e, hparams['log_int'], np.mean(losses[-hparams['log_int']:]), exp_rate(hparams['max_exp_epochs'], e, hparams['min_exp_rate']), n_updates)) if e % hparams['val_int'] == 0 and e > 0: if hparams['tile_size'] == 2: eval_model(pol_net, env, 200, 8) else: eval_model(pol_net, env, 200, 40) print('-------------------------') try: if hparams['savename']: torch.save(pol_net, './irrep_models/{}.pt'.format(hparams['savename'])) except: log.info('Cant save') if hparams['tile_size'] == 2: show_vals(pol_net, env) check_memory() log.info('Rewards seed: {}'.format(rews)) eval_model(pol_net, env, 200, 8)
def log_mem(log): mem = check_memory(False) log.info('Memory usage: {:.2f}mb'.format(mem))
def main(hparams): partitions = eval(hparams['partitions']) #env = TileIrrepEnv(hparams['tile_size'], partitions, hparams['reward']) env = TileEnv(hparams['tile_size'], one_hot=True, reward=hparams['reward']) pol_net = TileBaselineQ(env.observation_space.shape[0], hparams['nhid'], env.actions) targ_net = TileBaselineQ(env.observation_space.shape[0], hparams['nhid'], env.actions) opt = torch.optim.Adam(pol_net.parameters(), hparams['lr']) # this is probably something each individual model should own mem_dict = { 'onehot_state': (env.observation_space.shape[0], ), 'next_onehot_state': (env.observation_space.shape[0], ), 'action': (1, ), 'reward': (1, ), 'done': (1, ), 'dist': (1, ), 'scramble_dist': (1, ), } dtype_dict = { 'action': int, 'scramble_dist': int, } memory = SimpleMemory(hparams['capacity'], mem_dict, dtype_dict) torch.manual_seed(hparams['seed']) np.random.seed(hparams['seed']) random.seed(hparams['seed']) print('Before training') #eval_model(pol_net, env, 100, 100) iters = 0 losses = [] dones = [] tot_dists = [] for e in range(hparams['epochs'] + 1): onehot_state = env.reset() #states = env.shuffle(hparams['shuffle_len']) for i in range(hparams['max_iters']): # states are onehot vectors #for dist, (grid_state, _x, _y) in enumerate(states): #onehot_state = grid_to_onehot(grid_state) _x, _y = env.x, env.y if random.random() < exp_rate(hparams['max_exp_epochs'], e, hparams['min_exp_rate']): action = random.choice(env.valid_moves(_x, _y)) else: action = pol_net.get_action(onehot_state) # need option to do peek instead of step if we want to use a shuffle trajectory! new_state, reward, done, _ = env.step(action) #new_grid, reward, done, info = env.peek(grid_state, _x, _y, action) #new_state = grid_to_onehot(new_grid) memory.push({ 'onehot_state': onehot_state, 'action': action, 'reward': reward, 'done': done, 'next_onehot_state': new_state, 'dist': 0 }) state = new_state onehot_state = new_state iters += 1 if iters % hparams['update_int'] == 0 and iters > 0: batch = memory.sample(hparams['batch_size']) #loss = pol_net.update(targ_net, env, batch, opt, hparams['discount'], e) loss = pol_net.update_simple(targ_net, env, batch, opt, hparams['discount'], e) losses.append(loss) if iters % hparams['update_int'] == 0 and e > 0: targ_net.load_state_dict(pol_net.state_dict()) #tot_dists.append(dist) if e % hparams['log_int'] == 0 and e > 0: _k = 100 log.info( 'Ep: {:4} | Last {} avg loss: {:.3f} | Exp rate: {:.4}'.format( e, hparams['log_int'], np.mean(losses[-hparams['log_int']:]), exp_rate(hparams['max_exp_epochs'], e, hparams['min_exp_rate']))) try: if not (hparams['savename'] is None): log.info('Saving model to: {}'.format(hparams['savename'])) torch.save(pol_net, './models/{}.pt'.format(hparams['savename'])) except: pdb.set_trace() eval_model(pol_net, env, 100, 100) check_memory()
type=str, default='[(4,), (3,1), (2, 1, 1), (2, 2), (1, 1, 1, 1)]') parser.add_argument('--lr', type=float, default=0.01) parser.add_argument('--tile_size', type=int, default=2) parser.add_argument('--capacity', type=int, default=10000) parser.add_argument('--epochs', type=int, default=2000) parser.add_argument('--max_iters', type=int, default=30) parser.add_argument('--max_exp_epochs', type=int, default=500) parser.add_argument('--min_exp_rate', type=float, default=0.05) parser.add_argument('--batch_size', type=int, default=64) parser.add_argument('--discount', type=float, default=0.9) parser.add_argument('--reward', type=str, default='penalty') parser.add_argument('--nhid', type=int, default=16) parser.add_argument('--seed', type=int, default=0) parser.add_argument('--shuffle_len', type=int, default=50) parser.add_argument('--log_int', type=int, default=100) parser.add_argument('--update_int', type=int, default=20) parser.add_argument('--target_int', type=int, default=20) parser.add_argument('--update_type', type=int, default=1) parser.add_argument('--savename', type=str, default='model') args = parser.parse_args() hparams = vars(args) print(args) try: main(hparams) except KeyboardInterrupt: print('Keyboard escape!') check_memory()
def test_main(alpha, parts): ''' Computes the ft via the sparse wreath rep and the non-sparse wreath rep to double check that the sparse wreath rep is actually correct. ''' _start = time.time() st = time.time() sp_irrep_dict = load_pkl( '/scratch/hopan/cube/pickles_sparse/{}/{}.pkl'.format(alpha, parts)) end = time.time() print('Loading sparse irrep dict: {:.2f}s'.format(time.time() - st)) check_memory() st = time.time() irrep_dict = load_irrep('/scratch/hopan/cube/', alpha, parts) print('Loading irrep dict: {:.2f}s'.format(time.time() - st)) check_memory() # generate a random group element? st = time.time() df = load_df('/scratch/hopan/cube/') fhat = np.load('/scratch/hopan/cube/fourier/{}/{}.npy'.format( alpha, parts)) print('Loading df: {:.2f}s'.format(time.time() - st)) check_memory() cyc_irrep_func = cyclic_irreps(alpha) cos_reps = coset_reps(sn(8), young_subgroup_perm(alpha)) st = time.time() cyc_irrs = all_cyc_irreps(cos_reps, cyc_irrep_func) print('Time to compute all cyc irreps: {:.5f}s'.format(time.time() - st)) sp_times = [] sp_mult_times = [] sp_results = np.zeros(len(df), dtype=np.complex128) coo_times = [] th_sp_times = [] times = [] mult_times = [] z3_irreps = [] results = np.zeros(len(df), dtype=np.complex128) fhat_t_ravel = fhat.T.ravel() loop_start = time.time() for idx in range(len(df)): row = df.loc[idx] otup = tuple(int(i) for i in row[0]) perm_tup = tuple(int(i) for i in row[1]) # compute wreath rep st = time.time() wmat = wreath_rep(otup, perm_tup, irrep_dict, cos_reps, cyc_irrep_func) reg_time = time.time() - st # compute wreath rep multiply st = time.time() wmat_inv = wmat.conj().T feval = np.dot(fhat_t_ravel, wmat_inv.ravel()) reg_mult_time = time.time() - st results[idx] = feval # compute sparse wreath rep st = time.time() wmat_sp = wreath_rep_sp(otup, perm_tup, sp_irrep_dict, cos_reps, cyc_irrep_func, cyc_irrs) sp_time = time.time() - st if not np.allclose(wmat, wmat_sp.todense()): print('unequal! | idx = {}'.format(idx)) pdb.set_trace() # compute sparse wreath rep multiply st = time.time() wmat_inv_sp = wmat_sp.conj().T feval_sp = (wmat_inv_sp.multiply(fhat.T)).sum() sp_mult_time = time.time() - st sp_results[idx] = feval_sp times.append(reg_time) sp_times.append(sp_time) mult_times.append(reg_mult_time) sp_mult_times.append(sp_mult_time) st = time.time() coo = wmat_sp.tocoo() end = time.time() coo_times.append(end - st) st = time.time() ix = torch.LongTensor([coo.row, coo.col]) th_sp_re = torch.sparse.FloatTensor(ix, torch.FloatTensor(coo.data.real), torch.Size(coo.shape)) th_sp_cplx = torch.sparse.FloatTensor(ix, torch.FloatTensor(coo.data.imag), torch.Size(coo.shape)) end = time.time() th_sp_times.append(end - st) st = time.time() block_scalars = block_cyclic_irreps(otup, cos_reps, cyc_irrep_func) end = time.time() z3_irreps.append(end - st) if idx > 200: break print('Normal time: {:.6f}s | Sparse time: {:.6f}s'.format( np.mean(times), np.mean(sp_times))) print('Mult time: {:.6f}s | Spmult time: {:.6f}s'.format( np.mean(mult_times), np.mean(sp_mult_times))) print('To coo time: {:.6f}s | Torchsptime: {:.6f}s'.format( np.mean(coo_times), np.mean(th_sp_times))) print('irrep time: {:.6f}s'.format(np.mean(z3_irreps))) print('Loop time: {:.2f}s'.format(time.time() - loop_start)) print('Total time: {:.2f}s'.format(time.time() - _start))
def main(hparams): logfname = get_logdir(hparams['logdir'], hparams['savename']) if not os.path.exists(hparams['logdir']): os.makedirs(hparams['logdir']) savedir = get_logdir(hparams['logdir'], hparams['savename']) os.makedirs(savedir) sumdir = os.path.join(savedir, 'logs') os.makedirs(sumdir) logfile = os.path.join(savedir, 'log.txt') logger = SummaryWriter(sumdir) with open(os.path.join(savedir, 'args.json'), 'w') as f: json.dump(hparams, f, indent=4) log = get_logger(logfile) log.debug('Saving in {}'.format(savedir)) log.debug('hparams: {}'.format(hparams)) torch.manual_seed(hparams['seed']) random.seed(hparams['seed']) alpha = eval(hparams['alpha']) parts = eval(hparams['parts']) log.info('alpha: {} | parts: {}'.format(alpha, parts)) size = IRREP_SIZE[(alpha, parts)] pol_net = IrrepLinreg(size * size) targ_net = IrrepLinreg(size * size) if not hparams['init']: log.info('Loading fourier') pol_net.loadnp(NP_IRREP_FMT.format(str(alpha), str(parts))) targ_net.loadnp(NP_IRREP_FMT.format(str(alpha), str(parts))) else: pol_net.init(hparams['init']) targ_net.init(hparams['init']) log.info('Init model using mode: {}'.format(hparams['init'])) if hparams['noise']: log.info('Adding noise: {}'.format(hparams['noise'])) mu = torch.zeros(pol_net.wr.size()) std = torch.zeros(pol_net.wr.size()) + hparams['noise'] wr_noise = torch.normal(mu, std) wi_noise = torch.normal(mu, std) pol_net.wr.data.add_(wr_noise) pol_net.wi.data.add_(wi_noise) wr_noise = torch.normal(mu, std) wi_noise = torch.normal(mu, std) targ_net.wr.data.add_(wr_noise) targ_net.wi.data.add_(wi_noise) env = Cube2IrrepEnv(alpha, parts, solve_rew=hparams['solve_rew']) log.info('env solve reward: {}'.format(env.solve_rew)) if hparams['opt'] == 'sgd': log.info('Using sgd') optimizer = torch.optim.SGD(pol_net.parameters(), lr=hparams['lr'], momentum=hparams['momentum']) elif hparams['opt'] == 'rms': log.info('Using rmsprop') optimizer = torch.optim.RMSprop(pol_net.parameters(), lr=hparams['lr'], momentum=hparams['momentum']) memory = ReplayMemory(hparams['capacity']) if hparams['meminit']: init_memory(memory, env) niter = 0 nupdates = 0 totsolved = 0 solved_lens = [] rewards = np.zeros(hparams['logint']) log.info('Before any training:') val_avg, val_prop, val_time, solve_lens = val_model(pol_net, env, hparams) log.info( 'Validation | avg solve length: {:.4f} | solve prop: {:.4f} | time: {:.2f}s' .format(val_avg, val_prop, val_time)) log.info( 'Validation | LQ: {:.3f} | MQ: {:.3f} | UQ: {:.3f} | Max: {}'.format( np.percentile(solve_lens, 25), np.percentile(solve_lens, 50), np.percentile(solve_lens, 75), max(solve_lens))) scramble_lens = [] for e in range(hparams['epochs']): if hparams['curric']: dist = curriculum_dist(hparams['max_dist'], e, hparams['epochs']) else: dist = hparams['max_dist'] state = env.reset_fixed(max_dist=dist) epoch_rews = 0 scramble_lens.append(dist) for i in range(hparams['maxsteps']): if hparams['norandom']: action = get_action(env, pol_net, state) elif random.random() < explore_rate( e, hparams['epochs'] * hparams['explore_proportion'], hparams['eps_min']): action = random.randint(0, env.action_space.n - 1) else: action = get_action(env, pol_net, state) ns, rew, done, _ = env.step(action, irrep=False) memory.push(state, action, ns, rew, done) epoch_rews += rew state = ns niter += 1 if (not hparams['noupdate'] ) and niter > 0 and niter % hparams['update_int'] == 0: sample = memory.sample(hparams['batch_size']) _loss = update(env, pol_net, targ_net, sample, optimizer, hparams, logger, nupdates) logger.add_scalar('loss', _loss, nupdates) nupdates += 1 if done: solved_lens.append(i + 1) totsolved += 1 break rewards[e % len(rewards)] = epoch_rews logger.add_scalar('reward', epoch_rews, e) if e % hparams['logint'] == 0 and e > 0: val_avg, val_prop, val_time, _ = val_model(pol_net, env, hparams) logger.add_scalar('last_{}_solved'.format(hparams['logint']), len(solved_lens) / hparams['logint'], e) if len(solved_lens) > 0: logger.add_scalar( 'last_{}_solved_len'.format(hparams['logint']), np.mean(solved_lens), e) logger.add_scalar('val_solve_avg', val_avg, e) logger.add_scalar('val_prop', val_prop, e) log.info( '{:7} | dist: {:4.1f} | avg rew: {:5.2f} | solve prop: {:5.3f}, len: {:5.2f} | exp: {:.2f} | ups {:7} | val avg {:.3f} prop {:.3f}' .format( e, np.mean(scramble_lens), np.mean(rewards), len(solved_lens) / hparams['logint'], 0 if len(solved_lens) == 0 else np.mean(solved_lens), explore_rate( e, hparams['epochs'] * hparams['explore_proportion'], hparams['eps_min']), nupdates, val_avg, val_prop, )) solved_lens = [] scramble_lens = [] if e % hparams['updatetarget'] == 0 and e > 0: targ_net.load_state_dict(pol_net.state_dict()) log.info('Total updates: {}'.format(nupdates)) log.info('Total solved: {:8} | Prop solved: {:.4f}'.format( totsolved, totsolved / hparams['epochs'])) logger.export_scalars_to_json(os.path.join(savedir, 'summary.json')) logger.close() torch.save(pol_net, os.path.join(savedir, 'model.pt')) check_memory() hparams['val_size'] = 10 * hparams['val_size'] val_avg, val_prop, val_time, _ = val_model(pol_net, env, hparams) log.info( 'Validation avg solve length: {:.4f} | solve prop: {:.4f} | time: {:.2f}s' .format(val_avg, val_prop, val_time))