Пример #1
0
def test():
    alphas = [(2, 3, 3)]
    parts = [((2, ), (1, 1, 1), (1, 1, 1))]
    pset = set()
    pkls = []
    mem_usg = [0]
    for alpha in alphas:
        print('Computing sparse pickles for: {}'.format(alpha))
        #parts = partition_parts(alpha)
        for idx, p in enumerate(parts):
            other = (p[0], p[2], p[1])
            if other in pset:
                continue

            np_pkl = gen_pickle_name('pickles', alpha, p)
            th_pkl = gen_pickle_name('pickles_sparse', alpha, p)
            gen_th_pkl(np_pkl, th_pkl)

            print('Done with {} | {}'.format(alpha, p))
            curr = check_memory()
            print('{:2} irreps | {:30}: {:9.2f} | '.format(
                idx + 1, str(p), curr - mem_usg[-1]),
                  end=' | ')
            mem_usg.append(curr)
            pset.add(p)
    print('Done')
    check_memory()
Пример #2
0
def proc_baseline_df(idx, df, mat):
    global all_df
    i = 0
    start =time.time()
    correct = np.zeros(len(df))
    chosen_cubes = np.zeros(len(df), dtype=int)

    print('Proc {:2} started | '.format(idx), end='')
    check_memory()
 
    for c in (df.index):
        nbrs = neighbors_fixed_core_small(c)
        nbr_df = all_df.loc[nbrs]
        nbr_idx = nbr_df['index'] # need this for indexing into mat
        min_dist = nbr_df.distance.min()
        min_cubes = nbr_df[nbr_df.distance == min_dist]

        vals = mat[nbr_idx]
        n_idx = np.argmin(vals)
        min_irrep_cube = nbrs[n_idx] # this gives the index
        correct[i] = (min_irrep_cube in min_cubes.index)
        chosen_cubes[i] = all_df.loc[min_irrep_cube]['index']
        i += 1

    end = time.time()
    print('Proc {:2} done: {:.2f}mins'.format(idx, (end - start) / 60.))
    return correct, chosen_cubes
Пример #3
0
def test_th_pkl(np_pkl, th_pkl):
    print('Testing equivalence')
    np_dict = load_pkl(np_pkl)
    th_dict = load_sparse_pkl(th_pkl)
    compare(np_dict, th_dict)
    print('All equal between numpy and torch versions!!')
    check_memory()
Пример #4
0
def par_ft(partition, fname, savedir, ncpu=16):
    if not os.path.exists(savedir):
        try:
            print('Directory {} doesnt exist. creating it now'.format(savedir))
            os.makedirs(savedir)
        except:
            print('Directory {} didnt exist. Tried to make it. Already made. Continuing...'.format(savedir))

    ferrers = FerrersDiagram(partition)
    print('Ferrers:')
    print(ferrers)
    df = pd.read_csv(fname, header=None, dtype={0: str, 1:int})
    check_memory()

    df_chunk = np.array_split(df, ncpu)
    arg_tups = [(chunk, ferrers) for chunk in df_chunk]
    savename = os.path.join(savedir, str(partition))
    print('Saving in: {}'.format(savename))
    if os.path.exists(savename):
        print('{} exists. Not running'.format(savename))

    with Pool(ncpu) as p:
        map_res = p.starmap(fts, arg_tups)
        # sum of these matrices is what we wnat
        fourier_mat = sum(map_res)
        np.save(savename, fourier_mat)
        return fourier_mat
Пример #5
0
def par_inv_ft(partition, fname, savedir, ncpu=16):
    if not os.path.exists(savedir):
        try:
            print('Directory {} doesnt exist. creating it now'.format(savedir))
            os.makedirs(savedir)
        except:
            print(
                'Directory {} didnt exist. Tried to make it. Already made. Continuing...'
                .format(savedir))

    ferrers = FerrersDiagram(partition)
    df = pd.read_csv(fname, header=None, dtype={0: str, 1: int})
    check_memory()

    df_chunk = np.array_split(df, ncpu)
    arg_tups = [(chunk, ferrers) for chunk in df_chunk]
    savename = os.path.join(savedir, str(partition)) + '.csv'
    print('Saving in: {}'.format(savename))
    if os.path.exists(savename):
        print('{} exists. Not running'.format(savename))
        return

    with Pool(ncpu) as p:
        results = p.starmap(inv_transform, arg_tups)
        concat_results = sum(results, [])
        df[1] = concat_results
        df.to_csv(savename, header=None, index=False)

    return df
Пример #6
0
def test():
    start = time.time()
    alpha = (2, 3, 3)
    parts = ((2,), (3,), (3,))
    df = load_df('/scratch/hopan/cube/')
    irrep_dict = load_irrep('/scratch/hopan/cube/', alpha, parts)
    end = time.time()
    check_memory()
Пример #7
0
def par_cube_ift(rank, size, alpha, parts):
    start = time.time()
    try:
        df = load_df('/scratch/hopan/cube/')
        irrep_dict = load_irrep('/scratch/hopan/cube/', alpha, parts)
        fhat = np.load('/scratch/hopan/cube/fourier/{}/{}.npy'.format(
            alpha, parts))
    except Exception as e:
        print('rank {} | memory usg: {} | exception {}'.format(
            rank, check_memory(verbose=False), e))

    print(
        'Rank {:3d} / {} | load irrep: {:.2f}s | mem: {:.2f}mb | {} {}'.format(
            rank, size,
            time.time() - start, check_memory(verbose=False), alpha, parts))

    cos_reps = coset_reps(sn(8), young_subgroup_perm(alpha))
    save_dict = {}
    cyc_irrep_func = cyclic_irreps(alpha)

    chunk_size = len(df) // size
    start_idx = chunk_size * rank
    mat = np.zeros(chunk_size, dtype=fhat.dtype)
    fhat_t_ravel = fhat.T.ravel()
    #print('Rank {} | {:7d}-{:7d}'.format(rank, start_idx, start_idx + chunk_size))
    if rank == 0:
        print(
            'Rank {} | elapsed: {:.2f}s | {:.2f}mb | mat shape: {} | done load | {} {}'
            .format(rank,
                    time.time() - start, check_memory(verbose=False),
                    fhat.shape, alpha, parts))

    for idx in range(start_idx, start_idx + chunk_size):
        row = df.loc[idx]
        otup = tuple(int(i) for i in row[0])
        perm_tup = tuple(int(i) for i in row[1])
        #dist = int(row[2])
        # actually want the inverse
        wmat = wreath_rep(otup, perm_tup, irrep_dict, cos_reps, cyc_irrep_func)
        wmat_inv = wmat.conj().T
        # trace(rho(ginv) fhat) = trace(fhat rho(ginv)) = vec(fhat.T).dot(vec(rho(ginv)))
        #feval = np.dot(fhat.T.ravel(), wmat_inv.ravel())
        feval = np.dot(fhat_t_ravel, wmat_inv.ravel())
        mat[idx - start_idx] = fhat.shape[0] * feval

    if rank == 0:
        print('Rank {} | elapsed: {:.2f}s | {:.2f}mb | done add'.format(
            rank,
            time.time() - start, check_memory(verbose=False)))

    del irrep_dict
    if rank == 0:
        print('Rank {} | elapsed: {:.2f}s | {:.2f}mb | done matrix conversion'.
              format(rank,
                     time.time() - start, check_memory(verbose=False)))

    return mat
Пример #8
0
def main(alpha, parts, savedir):
    st = time.time()
    irrep_dict = load_irrep(savedir, alpha, parts)
    end = time.time()
    check_memory()

    print('Load time {:.2f}s | {} {}'.format(end - st, alpha, parts))
    sp_dict = convert(alpha, parts, irrep_dict=irrep_dict)
    print('Convert time {:.2f}s'.format(time.time() - end))
    check_memory()

    save_sp_pkl(sp_dict, savedir, alpha, parts)
Пример #9
0
def mpi_main(alpha, parts):
    savename = '/scratch/hopan/cube/fourier_sym_eval/{}/{}.npy'.format(
        alpha, parts)
    if os.path.exists(savename):
        print('File {} exists! Skipping'.format(savename))
        exit()
        #print('File {} exists! Running anyway!'.format(savename))

    comm = MPI.COMM_WORLD
    size = MPI.COMM_WORLD.Get_size()
    rank = MPI.COMM_WORLD.Get_rank()
    name = MPI.Get_processor_name()
    if rank == 0:
        print('starting {} | {}'.format(alpha, parts))

    _start = time.time()
    start = time.time()
    # mat = par_cube_ft(alpha, parts, irrep_dict, lst)
    mat = par_cube_ift(rank, size, alpha, parts)
    #all_mats = comm.gather(mat, root=0)
    if rank == 0:
        print('post par cube ft: {:.2f}s | mem: {:.2f}mb'.format(
            time.time() - start, check_memory(verbose=False)))

    sendmat = mat
    recvmat = None
    if rank == 0:
        recvmat = np.empty([size, *sendmat.shape], dtype=sendmat.dtype)
        print('Before gather: {:.2f}s | mem {:.2f}mb'.format(
            time.time() - start, check_memory(verbose=False)))
    comm.Gather(sendmat, recvmat, root=0)

    if rank == 0:
        print('Elapsed for gather: {:.2f}s | mem {:.2f}mb'.format(
            time.time() - start, check_memory(verbose=False)))
        #res_mat = np.sum(recvmat, axis=0)
        res_mat = recvmat.reshape(-1)
        print('All done | {:.2f}s | shape {} | mem {:.2f}mb'.format(
            time.time() - _start, res_mat.shape, check_memory(verbose=False)))

        # save dir
        if not os.path.exists(
                '/scratch/hopan/cube/fourier_sym_eval/{}'.format(alpha)):
            os.makedirs(
                '/scratch/hopan/cube/fourier_sym_eval/{}'.format(alpha))
        savename = '/scratch/hopan/cube/fourier_sym_eval/{}/{}'.format(
            alpha, parts)
        np.save(savename, res_mat)
        print('Done saving in {}! | Total time: {:.2f}s'.format(
            savename,
            time.time() - _start))
Пример #10
0
def par_main(par_f, ncpu):
    global all_df
    all_df = load_cube_df_indexed()
    start = time.time()
    df_chunk = np.array_split(all_df, ncpu)
    idx_to_nbrs, idx_to_cube, idx_to_dist, cube_to_idx = load_pkls()
    arg_tups = [(idx, _d) for idx, _d in enumerate(df_chunk)]

    print('Starting par proc with {} processes...'.format(ncpu))
    check_memory()
    with Pool(ncpu) as p:
        map_res = p.starmap(par_f, arg_tups)

    print('Elapsed proc time: {:.2f}min'.format( (time.time() - start) / 60. ))
    return map_res
Пример #11
0
def full_transform(args, alpha, parts, split_chunks):
    print('Computing full transform for alpha: {} | parts: {}'.format(
        alpha, parts))
    savedir_alpha = os.path.join(args.savedir, args.alpha)
    savename = os.path.join(savedir_alpha, '{}'.format(parts))
    print('Savename: {}'.format(savename))
    if os.path.exists(savename + '.npy'):
        print('Skipping. Already computed fourier matrix for: {} | {}'.format(
            alpha, parts))
        exit()

    manager = Manager()
    irrep_dict = load_pkl(
        os.path.join(args.pkldir, args.alpha, '{}.pkl'.format(parts)))
    mem_usg = check_memory(verbose=False)

    if not os.path.exists(savedir_alpha):
        print('Making: {}'.format(savedir_alpha))
        os.makedirs(savedir_alpha)

    if args.par > 1:
        print('Par process with {} processes...'.format(len(split_chunks)))
        mem_dict = manager.dict()
        with Pool(len(split_chunks)) as p:
            arg_tups = [(_fn, irrep_dict, alpha, parts, mem_dict)
                        for _fn in split_chunks]
            matrices = p.starmap(text_split_transform, arg_tups)
            np.save(savename, sum(matrices))
    else:
        print('Single thread...')
        matrices = []
        block_size = wreath_dim(parts)
        n_cosets = coset_size(alpha)
        shape = (block_size * n_cosets, block_size * n_cosets)
        result = np.zeros(shape, dtype=np.complex128)
        mem_dict = {}
        for _fn in split_chunks:
            res = text_split_transform(_fn, irrep_dict, alpha, parts)
            matrices.append(res)
            result += res
        np.save(savename, sum(matrices))

    print('Post loading pickle mem usg: {:.4}mb | Final mem usg: {:.4f}mb'.
          format(mem_usg, check_memory(False)))
    print('Processes')
    for pid, usg in mem_dict.items():
        print('{} | {:.4f}mb'.format(pid, usg))
    print('Done!')
Пример #12
0
def text_split_transform(fsplit_lst, irrep_dict, alpha, parts, mem_dict=None):
    '''
    fsplit_pkl: list of split file names of the distance values for a chunk of the total distance values
    irrep_dict: irrep dict
    alpha: weak partition
    parts: list/iterable of partitions of the parts of alpha
    '''
    print('     Computing transform on splits: {}'.format(fsplit_lst))
    cos_reps = coset_reps(sn(8), young_subgroup_perm(alpha))
    save_dict = {}
    cyc_irrep_func = cyclic_irreps(alpha)
    pid = os.getpid()

    for split_f in fsplit_lst:
        with open(split_f, 'r') as f:
            for line in tqdm(f):
                otup, perm_tup, dist = clean_line(line)
                perm_rep = irrep_dict[
                    perm_tup]  # perm_rep is a dict of (i, j) -> matrix
                block_cyclic_rep = block_cyclic_irreps(otup, cos_reps,
                                                       cyc_irrep_func)
                mult_yor_block(perm_rep, dist, block_cyclic_rep, save_dict)

        if mem_dict is not None:
            mem_dict[pid] = max(check_memory(verbose=False),
                                mem_dict.get(pid, 0))

    block_size = wreath_dim(parts)
    n_cosets = coset_size(alpha)
    mat = convert_yor_matrix(save_dict, block_size, n_cosets)
    return mat
Пример #13
0
def split_transform(fsplit_lst, irrep_dict, alpha, parts, mem_dict=None):
    '''
    fsplit_pkl: list of pkl file names of the distance values for a chunk of the total distance values
    irrep_dict: irrep dict 
    alpha: weak partition
    parts: list/iterable of partitions of the parts of alpha
    '''
    print('     Computing transform on splits: {}'.format(fsplit_lst))
    cos_reps = coset_reps(sn(8), young_subgroup_perm(alpha))
    save_dict = {}
    cyc_irrep_func = cyclic_irreps(alpha)
    pid = os.getpid()

    for fsplit_pkl in fsplit_lst:
        with open(fsplit_pkl, 'r') as f:
            # dict of function values
            pkl_dict = load_pkl(fsplit_pkl)
            for perm_tup, tup_dict in pkl_dict.items():
                for tup, dists in tup_dict.items():
                    dist_tot = sum(dists)
                    perm_rep = irrep_dict[
                        perm_tup]  # perm_rep is a dict of (i, j) -> matrix
                    block_cyclic_rep = block_cyclic_irreps(
                        tup, cos_reps, cyc_irrep_func)
                    mult_yor_block(perm_rep, dist_tot, block_cyclic_rep,
                                   save_dict)
            if mem_dict is not None:
                mem_dict[pid] = max(check_memory(verbose=False),
                                    mem_dict.get(pid, 0))
            del pkl_dict

    block_size = wreath_dim(parts)
    n_cosets = coset_size(alpha)
    mat = convert_yor_matrix(save_dict, block_size, n_cosets)
    return mat
Пример #14
0
def par_irrep_main(par_f, alpha, parts, ncpu):
    global all_df
    all_df = load_cube_df_indexed()
    df_chunk = np.array_split(all_df[:20000], ncpu)
    real_mat = irrep_feval(alpha, parts).real
    arg_tups = [(idx, _d, real_mat) for idx, _d in enumerate(df_chunk)]

    print('Before pool | ', end='')
    check_memory()

    with Pool(ncpu) as p:
        map_res = p.starmap(par_f, arg_tups)
        par_correct, par_chosen_cubes = zip(*map_res) 

        cat_correct = np.concatenate(par_correct)
        cat_chosen = np.concatenate(par_chosen_cubes)

    return cat_correct, cat_chosen
Пример #15
0
def gen_th_pkl(np_pkl, th_pkl):
    if os.path.exists(th_pkl):
        print('Skipping pkl: {}'.format(th_pkl))
        #return
    else:
        print('Not skipping pkl: {}'.format(th_pkl))

    if not os.path.exists(np_pkl):
        print(np_pkl, 'doesnt exist! Exiting!')
        exit()
    else:
        dirname = os.path.dirname(th_pkl)
        try:
            os.makedirs(dirname)  # rp
        except:
            print('makedirs: Director already exists {}? {}'.format(
                dirname, os.path.exists(dirname)))
    print('trying to open: {}'.format(np_pkl))
    with open(np_pkl, 'rb') as f:
        ydict = pickle.load(f)

    check_memory()
    print('after loading {}'.format(np_pkl))

    sparse_tdict = {}
    for perm_tup, rep_dict in tqdm(ydict.items()):
        idx, vreal, size = to_block_sparse(rep_dict)
        sparse_tdict[perm_tup] = {
            'idx': idx,
            'real': vreal,
        }

    check_memory()
    print('making the sparse dict loading {}'.format(th_pkl))
    del ydict

    # hacky way to assign this
    sparse_tdict['size'] = size

    #with open(th_pkl, 'wb') as f:
    #    pickle.dump(sparse_tdict, f, protocol=pickle.HIGHEST_PROTOCOL)

    print('Created:', th_pkl)
    del sparse_tdict
Пример #16
0
def bfs(fname):
    #start = init_pyraminx()
    start = init_pyraminx_tip()
    to_visit = deque([(start, 0)])
    visited = set()

    with open(fname, 'w') as f:
        while to_visit:
            curr, dist = to_visit.popleft()
            if curr in visited:
                continue

            #f.write('{},{}\n'.format(pyraminx_str(curr), dist))
            f.write('{},{}\n'.format(pyraminx_tip_str(curr), dist))
            visited.add(curr)
            for nbr in pyraminx_tip_nbrs(curr):
                if nbr not in visited:
                    to_visit.append((nbr, dist + 1))
    check_memory()
Пример #17
0
def bfs(root, fname):
    print('Writing to: {}'.format(fname))
    with open(fname, 'w') as f:
        to_visit = deque([(root, 0)])
        dist_dict = {np_to_tup(root): 0} #{np_to_tup(root): 0}
        f.write('{},0\n'.format(np_to_str(root)))

        while to_visit:
            curr, dist = to_visit.popleft()
            ctup = np_to_tup(curr)

            for nbr in neighbors(curr).keys():
                ntup = np_to_tup(nbr)
                if ntup not in dist_dict:
                    dist_dict[ntup] = dist + 1
                    f.write('{},{}\n'.format(tup_to_str(ntup), dist + 1))
                    # append the grid not the nbr
                    to_visit.append((nbr, dist + 1))
    check_memory()
    return dist_dict
Пример #18
0
def test(seed):
    cnt = 10
    random.seed(seed)
    print('A star with seed: {} | cnt: {}'.format(seed, cnt))

    size = 3
    puzzle = TileEnv(size)
    puzzles = []
    man_nodes = []
    irrep_nodes = []
    for idx in range(cnt):
        puzzle.reset()
        puzzles.append(puzzle.tup_state())
        str_state = tup_to_str(puzzle.tup_state())

        #resh = a_star(puzzle.grid, hamming_grid)
        #print('Hamming | ', end='')
        #print(resh)
        resm = a_star(puzzle.grid, manhattan_grid)
        man_nodes.append(resm['nodes_explored'])
        print('{:3} | {}'.format(idx, resm))

    for idx, perm in enumerate(puzzles):
        puzzle._assign_perm(perm)
        parts = [(9,), (8, 1)]
        irrep_manh = irrep_gen_func(parts, 'manhattan_eval')
        resi = a_star(puzzle.grid, irrep_manh)
        irrep_nodes.append(resm['nodes_explored'])
        print('{:3} | {}'.format(idx, resi))

        #parts = [(9,), (8, 1)]
        #print('Hamming heuristic using parts: {}'.format(parts), end='')
        #irrep_hamm = irrep_gen_func(parts, 'hamming_eval')
        #resi = a_star(puzzle.grid, irrep_hamm)
        #print(resi)
        #print('=' * 80)

    puzzle_strs = [tup_to_str(t) for t in puzzles]
    df = pd.DataFrame({'perms': puzzle_strs, 'manhattan': man_nodes, 'manhattan_irrep': irrep_nodes})
    df.to_csv('./results/results_{}.csv'.format(seed), header=True)
    check_memory()
Пример #19
0
def test(ntrials=100):
    start = time.time()
    alpha = (2,3,3)
    parts = ((2,), (1, 1, 1), (1, 1, 1))

    env = Cube2IrrepEnv(alpha, parts)
    setup_time = time.time() - start
    print('Done loading: {:.2f}s'.format(setup_time))

    res = env.reset()
    stuff = []
    for _ in range(ntrials):
        action = random.choice(range(1, 7))
        res, _, _, _ = env.step(action)
        stuff.append(res)

    check_memory()
    end = time.time()
    sim_time = (end - start) - setup_time
    per_action_time = sim_time / ntrials
    print('Setup time: {:.4f}s'.format(setup_time))
    print('Total time: {:.4f}s'.format(sim_time))
    print('Per action: {:.4f}s'.format(per_action_time))
Пример #20
0
def par_cube_ft(rank, size, alpha, parts):
    start = time.time()
    try:
        df = load_df('/scratch/hopan/cube/')
        irrep_dict = load_irrep('/scratch/hopan/cube/', alpha, parts)
    except Exception as e:
        print('rank {} | memory usg: {} | exception {}'.format(rank, check_memory(verbose=False), e))

    print('Rank {:3d} / {} | load irrep: {:.2f}s | mem: {}mb'.format(rank, size, time.time() - start, check_memory(verbose=False)))

    cos_reps = coset_reps(sn(8), young_subgroup_perm(alpha))
    save_dict = {}
    cyc_irrep_func = cyclic_irreps(alpha)

    chunk_size = len(df) // size
    start_idx  = chunk_size * rank
    #print('Rank {} | {:7d}-{:7d}'.format(rank, start_idx, start_idx + chunk_size))
    if rank == 0:
        print('Rank {} | elapsed: {:.2f}s | {:.2f}mb | done load'.format(rank, time.time() - start, check_memory(verbose=False)))

    for idx in range(start_idx, start_idx + chunk_size):
        row = df.loc[idx]
        otup = tuple(int(i) for i in row[0])
        perm_tup = tuple(int(i) for i in row[1])
        dist = int(row[2])
 
        perm_rep = irrep_dict[perm_tup]  # perm_rep is a dict of (i, j) -> matrix
        block_cyclic_rep = block_cyclic_irreps(otup, cos_reps, cyc_irrep_func)
        mult_yor_block(perm_rep, dist, block_cyclic_rep, save_dict)

    if rank == 0:
        print('Rank {} | elapsed: {:.2f}s | {:.2f}mb | done add'.format(rank, time.time() - start, check_memory(verbose=False)))

    del irrep_dict
    block_size = wreath_dim(parts)
    n_cosets = coset_size(alpha)
    mat = convert_yor_matrix(save_dict, block_size, n_cosets)
    if rank == 0:
        print('Rank {} | elapsed: {:.2f}s | {:.2f}mb | done matrix conversion'.format(rank, time.time() - start, check_memory(verbose=False)))

    return mat 
Пример #21
0
def main(hparams):
    partitions = eval(hparams['partitions'])
    env = TileIrrepEnv(hparams['tile_size'], partitions, hparams['reward'])

    if hparams['model_type'] == 'IrrepDVN':
        log.info('Making IrrepDVN')
        pol_net = IrrepDVN(partitions)
        targ_net = IrrepDVN(partitions)
    elif hparams['model_type'] == 'IrrepDQN':
        log.info('Making IrrepDQN')
        pol_net = IrrepDQN(partitions, nactions=4)
        targ_net = IrrepDQN(partitions, nactions=4)
    elif hparams['model_type'] == 'IrrepOnehotDVN':
        log.info('Making IrrepOnehotDVN')
        pol_net = IrrepOnehotDVN(env.onehot_shape, env.irrep_shape,
                                 hparams['n_hid'], partitions)
        targ_net = IrrepOnehotDVN(env.onehot_shape, env.irrep_shape,
                                  hparams['n_hid'], partitions)

    opt = torch.optim.Adam(pol_net.parameters(), hparams['lr'])
    memory = SimpleMemory(hparams['capacity'], pol_net.mem_dict(env),
                          pol_net.dtype_dict())

    torch.manual_seed(hparams['seed'])
    np.random.seed(hparams['seed'])
    random.seed(hparams['seed'])

    n_updates = 0
    iters = 0
    losses = []
    dones = []
    rews = set()
    for e in range(hparams['epochs'] + 1):
        shuffle_len = random.randint(hparams['shuffle_min'],
                                     hparams['shuffle_max'])
        states = env.shuffle(shuffle_len)
        #grid_state = env.reset(output='grid') # is this a grid state?
        #for i in range(hparams['max_iters']):
        for dist, (grid_state, _x, _y) in enumerate(states):
            #_x, _y = env.x, env.y # C
            nbrs, onehot_nbrs = env.all_nbrs(grid_state, _x, _y)
            if random.random() < exp_rate(hparams['max_exp_epochs'], e,
                                          hparams['min_exp_rate']):
                action = random.choice(env.valid_moves(_x, _y))
            else:
                if hparams['model_type'] == 'IrrepDVN':
                    action = pol_net.get_action(env,
                                                grid_state,
                                                e,
                                                all_nbrs=nbrs,
                                                x=_x,
                                                y=_y)
                elif hparams['model_type'] == 'IrrepDQN':
                    action = pol_net.get_action_grid(env,
                                                     grid_state,
                                                     x=_x,
                                                     y=_y)

            new_irrep_state, reward, done, info = env.peek(
                grid_state, _x, _y, action)
            rews.add(reward)
            #new_irrep_state, reward, done, info = env.step(action) # c
            if hparams['model_type'] == 'IrrepDVN':
                memory.push({
                    'grid_state': grid_state,
                    'irrep_state': env.cat_irreps(grid_state),
                    'irrep_nbrs': nbrs,
                    'action': action,
                    'reward': reward,
                    'done': done,
                    'next_irrep_state': new_irrep_state,
                    'dist': iters
                })
            elif hparams['model_type'] == 'IrrepDQN':
                memory.push({
                    'grid_state': grid_state,
                    'irrep_state': env.cat_irreps(grid_state),
                    'irrep_nbrs': nbrs,
                    'action': action,
                    'reward': reward,
                    'done': done,
                    'next_irrep_state': new_irrep_state,
                    'dist': iters
                })
            elif hparams['model_type'] == 'IrrepOnehotDVN':
                memory.push({
                    #'grid_state': grid_state,
                    'onehot_state': grid_to_onehot(grid_state),
                    'irrep_state': env.cat_irreps(grid_state),
                    'irrep_nbrs': nbrs,
                    'onehot_nbrs': onehot_nbrs,
                    'action': action,
                    'reward': reward,
                    'done': done,
                    'next_irrep_state': new_irrep_state,
                    'dist': iters
                })

            #grid_state = info['grid'] # c
            iters += 1
            if iters % hparams['update_int'] == 0 and iters > 0:
                if hparams['model_type'] == 'IrrepDVN':
                    batch = memory.sample(hparams['batch_size'])
                    loss = pol_net.update(targ_net, env, batch, opt,
                                          hparams['discount'], e)
                    n_updates += 1
                    losses.append(loss)
                elif hparams['model_type'] == 'IrrepDQN':
                    batch = memory.sample(hparams['batch_size'])
                    loss = pol_net.update(targ_net, env, batch, opt,
                                          hparams['discount'], e)
                    n_updates += 1
                    losses.append(loss)
                elif hparams['model_type'] == 'IrrepOnehotDVN':
                    batch = memory.sample(hparams['batch_size'])
                    loss = pol_net.update(targ_net, env, batch, opt,
                                          hparams['discount'], e)
                    n_updates += 1
                    losses.append(loss)
            if done:
                break

            if iters % hparams['update_int'] == 0 and e > 0:
                targ_net.load_state_dict(pol_net.state_dict())

        dones.append(done)
        if e % hparams['log_int'] == 0 and e > 0:
            log.info(
                'Ep: {:4} | Last {} avg loss: {:.3f} | Exp rate: {:.4} | Updates: {}'
                .format(
                    e, hparams['log_int'],
                    np.mean(losses[-hparams['log_int']:]),
                    exp_rate(hparams['max_exp_epochs'], e,
                             hparams['min_exp_rate']), n_updates))

        if e % hparams['val_int'] == 0 and e > 0:
            if hparams['tile_size'] == 2:
                eval_model(pol_net, env, 200, 8)
            else:
                eval_model(pol_net, env, 200, 40)

    print('-------------------------')
    try:
        if hparams['savename']:
            torch.save(pol_net,
                       './irrep_models/{}.pt'.format(hparams['savename']))
    except:
        log.info('Cant save')

    if hparams['tile_size'] == 2:
        show_vals(pol_net, env)
    check_memory()
    log.info('Rewards seed: {}'.format(rews))
    eval_model(pol_net, env, 200, 8)
Пример #22
0
def log_mem(log):
    mem = check_memory(False)
    log.info('Memory usage: {:.2f}mb'.format(mem))
Пример #23
0
def main(hparams):
    partitions = eval(hparams['partitions'])
    #env = TileIrrepEnv(hparams['tile_size'], partitions, hparams['reward'])
    env = TileEnv(hparams['tile_size'], one_hot=True, reward=hparams['reward'])
    pol_net = TileBaselineQ(env.observation_space.shape[0], hparams['nhid'],
                            env.actions)
    targ_net = TileBaselineQ(env.observation_space.shape[0], hparams['nhid'],
                             env.actions)

    opt = torch.optim.Adam(pol_net.parameters(), hparams['lr'])

    # this is probably something each individual model should own
    mem_dict = {
        'onehot_state': (env.observation_space.shape[0], ),
        'next_onehot_state': (env.observation_space.shape[0], ),
        'action': (1, ),
        'reward': (1, ),
        'done': (1, ),
        'dist': (1, ),
        'scramble_dist': (1, ),
    }
    dtype_dict = {
        'action': int,
        'scramble_dist': int,
    }
    memory = SimpleMemory(hparams['capacity'], mem_dict, dtype_dict)
    torch.manual_seed(hparams['seed'])
    np.random.seed(hparams['seed'])
    random.seed(hparams['seed'])

    print('Before training')
    #eval_model(pol_net, env, 100, 100)

    iters = 0
    losses = []
    dones = []
    tot_dists = []
    for e in range(hparams['epochs'] + 1):
        onehot_state = env.reset()
        #states = env.shuffle(hparams['shuffle_len'])
        for i in range(hparams['max_iters']):
            # states are onehot vectors
            #for dist, (grid_state, _x, _y) in enumerate(states):
            #onehot_state = grid_to_onehot(grid_state)
            _x, _y = env.x, env.y
            if random.random() < exp_rate(hparams['max_exp_epochs'], e,
                                          hparams['min_exp_rate']):
                action = random.choice(env.valid_moves(_x, _y))
            else:
                action = pol_net.get_action(onehot_state)

            # need option to do peek instead of step if we want to use a shuffle trajectory!
            new_state, reward, done, _ = env.step(action)
            #new_grid, reward, done, info = env.peek(grid_state, _x, _y, action)
            #new_state = grid_to_onehot(new_grid)
            memory.push({
                'onehot_state': onehot_state,
                'action': action,
                'reward': reward,
                'done': done,
                'next_onehot_state': new_state,
                'dist': 0
            })
            state = new_state
            onehot_state = new_state
            iters += 1

            if iters % hparams['update_int'] == 0 and iters > 0:
                batch = memory.sample(hparams['batch_size'])
                #loss = pol_net.update(targ_net, env, batch, opt, hparams['discount'], e)
                loss = pol_net.update_simple(targ_net, env, batch, opt,
                                             hparams['discount'], e)
                losses.append(loss)

            if iters % hparams['update_int'] == 0 and e > 0:
                targ_net.load_state_dict(pol_net.state_dict())

        #tot_dists.append(dist)

        if e % hparams['log_int'] == 0 and e > 0:
            _k = 100
            log.info(
                'Ep: {:4} | Last {} avg loss: {:.3f} | Exp rate: {:.4}'.format(
                    e, hparams['log_int'],
                    np.mean(losses[-hparams['log_int']:]),
                    exp_rate(hparams['max_exp_epochs'], e,
                             hparams['min_exp_rate'])))

    try:
        if not (hparams['savename'] is None):
            log.info('Saving model to: {}'.format(hparams['savename']))
            torch.save(pol_net, './models/{}.pt'.format(hparams['savename']))
    except:
        pdb.set_trace()
    eval_model(pol_net, env, 100, 100)
    check_memory()
Пример #24
0
        type=str,
        default='[(4,), (3,1), (2, 1, 1), (2, 2), (1, 1, 1, 1)]')
    parser.add_argument('--lr', type=float, default=0.01)
    parser.add_argument('--tile_size', type=int, default=2)
    parser.add_argument('--capacity', type=int, default=10000)
    parser.add_argument('--epochs', type=int, default=2000)
    parser.add_argument('--max_iters', type=int, default=30)
    parser.add_argument('--max_exp_epochs', type=int, default=500)
    parser.add_argument('--min_exp_rate', type=float, default=0.05)
    parser.add_argument('--batch_size', type=int, default=64)
    parser.add_argument('--discount', type=float, default=0.9)
    parser.add_argument('--reward', type=str, default='penalty')
    parser.add_argument('--nhid', type=int, default=16)
    parser.add_argument('--seed', type=int, default=0)

    parser.add_argument('--shuffle_len', type=int, default=50)
    parser.add_argument('--log_int', type=int, default=100)
    parser.add_argument('--update_int', type=int, default=20)
    parser.add_argument('--target_int', type=int, default=20)
    parser.add_argument('--update_type', type=int, default=1)
    parser.add_argument('--savename', type=str, default='model')

    args = parser.parse_args()
    hparams = vars(args)
    print(args)
    try:
        main(hparams)
    except KeyboardInterrupt:
        print('Keyboard escape!')
        check_memory()
Пример #25
0
def test_main(alpha, parts):
    '''
    Computes the ft via the sparse wreath rep and the non-sparse wreath rep
    to double check that the sparse wreath rep is actually correct.
    '''
    _start = time.time()
    st = time.time()
    sp_irrep_dict = load_pkl(
        '/scratch/hopan/cube/pickles_sparse/{}/{}.pkl'.format(alpha, parts))
    end = time.time()
    print('Loading sparse irrep dict: {:.2f}s'.format(time.time() - st))
    check_memory()

    st = time.time()
    irrep_dict = load_irrep('/scratch/hopan/cube/', alpha, parts)
    print('Loading irrep dict: {:.2f}s'.format(time.time() - st))
    check_memory()

    # generate a random group element?
    st = time.time()
    df = load_df('/scratch/hopan/cube/')
    fhat = np.load('/scratch/hopan/cube/fourier/{}/{}.npy'.format(
        alpha, parts))
    print('Loading df: {:.2f}s'.format(time.time() - st))
    check_memory()

    cyc_irrep_func = cyclic_irreps(alpha)
    cos_reps = coset_reps(sn(8), young_subgroup_perm(alpha))
    st = time.time()
    cyc_irrs = all_cyc_irreps(cos_reps, cyc_irrep_func)
    print('Time to compute all cyc irreps: {:.5f}s'.format(time.time() - st))

    sp_times = []
    sp_mult_times = []
    sp_results = np.zeros(len(df), dtype=np.complex128)

    coo_times = []
    th_sp_times = []
    times = []
    mult_times = []
    z3_irreps = []
    results = np.zeros(len(df), dtype=np.complex128)
    fhat_t_ravel = fhat.T.ravel()
    loop_start = time.time()
    for idx in range(len(df)):
        row = df.loc[idx]
        otup = tuple(int(i) for i in row[0])
        perm_tup = tuple(int(i) for i in row[1])

        # compute wreath rep
        st = time.time()
        wmat = wreath_rep(otup, perm_tup, irrep_dict, cos_reps, cyc_irrep_func)
        reg_time = time.time() - st

        # compute wreath rep multiply
        st = time.time()
        wmat_inv = wmat.conj().T
        feval = np.dot(fhat_t_ravel, wmat_inv.ravel())
        reg_mult_time = time.time() - st
        results[idx] = feval

        # compute sparse wreath rep
        st = time.time()
        wmat_sp = wreath_rep_sp(otup, perm_tup, sp_irrep_dict, cos_reps,
                                cyc_irrep_func, cyc_irrs)
        sp_time = time.time() - st

        if not np.allclose(wmat, wmat_sp.todense()):
            print('unequal! | idx = {}'.format(idx))
            pdb.set_trace()

        # compute sparse wreath rep multiply
        st = time.time()
        wmat_inv_sp = wmat_sp.conj().T
        feval_sp = (wmat_inv_sp.multiply(fhat.T)).sum()
        sp_mult_time = time.time() - st
        sp_results[idx] = feval_sp

        times.append(reg_time)
        sp_times.append(sp_time)
        mult_times.append(reg_mult_time)
        sp_mult_times.append(sp_mult_time)

        st = time.time()
        coo = wmat_sp.tocoo()
        end = time.time()
        coo_times.append(end - st)

        st = time.time()
        ix = torch.LongTensor([coo.row, coo.col])
        th_sp_re = torch.sparse.FloatTensor(ix,
                                            torch.FloatTensor(coo.data.real),
                                            torch.Size(coo.shape))
        th_sp_cplx = torch.sparse.FloatTensor(ix,
                                              torch.FloatTensor(coo.data.imag),
                                              torch.Size(coo.shape))
        end = time.time()
        th_sp_times.append(end - st)

        st = time.time()
        block_scalars = block_cyclic_irreps(otup, cos_reps, cyc_irrep_func)
        end = time.time()
        z3_irreps.append(end - st)
        if idx > 200:
            break

    print('Normal time: {:.6f}s | Sparse time: {:.6f}s'.format(
        np.mean(times), np.mean(sp_times)))
    print('Mult time:   {:.6f}s | Spmult time: {:.6f}s'.format(
        np.mean(mult_times), np.mean(sp_mult_times)))
    print('To coo time: {:.6f}s | Torchsptime: {:.6f}s'.format(
        np.mean(coo_times), np.mean(th_sp_times)))
    print('irrep time:  {:.6f}s'.format(np.mean(z3_irreps)))
    print('Loop time: {:.2f}s'.format(time.time() - loop_start))
    print('Total time: {:.2f}s'.format(time.time() - _start))
Пример #26
0
def main(hparams):
    logfname = get_logdir(hparams['logdir'], hparams['savename'])
    if not os.path.exists(hparams['logdir']):
        os.makedirs(hparams['logdir'])
    savedir = get_logdir(hparams['logdir'], hparams['savename'])
    os.makedirs(savedir)
    sumdir = os.path.join(savedir, 'logs')
    os.makedirs(sumdir)
    logfile = os.path.join(savedir, 'log.txt')
    logger = SummaryWriter(sumdir)

    with open(os.path.join(savedir, 'args.json'), 'w') as f:
        json.dump(hparams, f, indent=4)

    log = get_logger(logfile)
    log.debug('Saving in {}'.format(savedir))
    log.debug('hparams: {}'.format(hparams))

    torch.manual_seed(hparams['seed'])
    random.seed(hparams['seed'])

    alpha = eval(hparams['alpha'])
    parts = eval(hparams['parts'])
    log.info('alpha: {} | parts: {}'.format(alpha, parts))
    size = IRREP_SIZE[(alpha, parts)]
    pol_net = IrrepLinreg(size * size)
    targ_net = IrrepLinreg(size * size)

    if not hparams['init']:
        log.info('Loading fourier')
        pol_net.loadnp(NP_IRREP_FMT.format(str(alpha), str(parts)))
        targ_net.loadnp(NP_IRREP_FMT.format(str(alpha), str(parts)))
    else:
        pol_net.init(hparams['init'])
        targ_net.init(hparams['init'])
        log.info('Init model using mode: {}'.format(hparams['init']))

    if hparams['noise']:
        log.info('Adding noise: {}'.format(hparams['noise']))
        mu = torch.zeros(pol_net.wr.size())
        std = torch.zeros(pol_net.wr.size()) + hparams['noise']
        wr_noise = torch.normal(mu, std)
        wi_noise = torch.normal(mu, std)
        pol_net.wr.data.add_(wr_noise)
        pol_net.wi.data.add_(wi_noise)

        wr_noise = torch.normal(mu, std)
        wi_noise = torch.normal(mu, std)
        targ_net.wr.data.add_(wr_noise)
        targ_net.wi.data.add_(wi_noise)

    env = Cube2IrrepEnv(alpha, parts, solve_rew=hparams['solve_rew'])
    log.info('env solve reward: {}'.format(env.solve_rew))
    if hparams['opt'] == 'sgd':
        log.info('Using sgd')
        optimizer = torch.optim.SGD(pol_net.parameters(),
                                    lr=hparams['lr'],
                                    momentum=hparams['momentum'])
    elif hparams['opt'] == 'rms':
        log.info('Using rmsprop')
        optimizer = torch.optim.RMSprop(pol_net.parameters(),
                                        lr=hparams['lr'],
                                        momentum=hparams['momentum'])
    memory = ReplayMemory(hparams['capacity'])
    if hparams['meminit']:
        init_memory(memory, env)
    niter = 0
    nupdates = 0
    totsolved = 0
    solved_lens = []
    rewards = np.zeros(hparams['logint'])

    log.info('Before any training:')
    val_avg, val_prop, val_time, solve_lens = val_model(pol_net, env, hparams)
    log.info(
        'Validation | avg solve length: {:.4f} | solve prop: {:.4f} | time: {:.2f}s'
        .format(val_avg, val_prop, val_time))
    log.info(
        'Validation | LQ: {:.3f} | MQ: {:.3f} | UQ: {:.3f} | Max: {}'.format(
            np.percentile(solve_lens, 25), np.percentile(solve_lens, 50),
            np.percentile(solve_lens, 75), max(solve_lens)))
    scramble_lens = []
    for e in range(hparams['epochs']):
        if hparams['curric']:
            dist = curriculum_dist(hparams['max_dist'], e, hparams['epochs'])
        else:
            dist = hparams['max_dist']
        state = env.reset_fixed(max_dist=dist)
        epoch_rews = 0
        scramble_lens.append(dist)

        for i in range(hparams['maxsteps']):
            if hparams['norandom']:
                action = get_action(env, pol_net, state)
            elif random.random() < explore_rate(
                    e, hparams['epochs'] * hparams['explore_proportion'],
                    hparams['eps_min']):
                action = random.randint(0, env.action_space.n - 1)
            else:
                action = get_action(env, pol_net, state)

            ns, rew, done, _ = env.step(action, irrep=False)
            memory.push(state, action, ns, rew, done)
            epoch_rews += rew
            state = ns
            niter += 1

            if (not hparams['noupdate']
                ) and niter > 0 and niter % hparams['update_int'] == 0:
                sample = memory.sample(hparams['batch_size'])
                _loss = update(env, pol_net, targ_net, sample, optimizer,
                               hparams, logger, nupdates)
                logger.add_scalar('loss', _loss, nupdates)
                nupdates += 1

            if done:
                solved_lens.append(i + 1)
                totsolved += 1
                break

        rewards[e % len(rewards)] = epoch_rews
        logger.add_scalar('reward', epoch_rews, e)

        if e % hparams['logint'] == 0 and e > 0:
            val_avg, val_prop, val_time, _ = val_model(pol_net, env, hparams)
            logger.add_scalar('last_{}_solved'.format(hparams['logint']),
                              len(solved_lens) / hparams['logint'], e)
            if len(solved_lens) > 0:
                logger.add_scalar(
                    'last_{}_solved_len'.format(hparams['logint']),
                    np.mean(solved_lens), e)
            logger.add_scalar('val_solve_avg', val_avg, e)
            logger.add_scalar('val_prop', val_prop, e)
            log.info(
                '{:7} | dist: {:4.1f} | avg rew: {:5.2f} | solve prop: {:5.3f}, len: {:5.2f} | exp: {:.2f} | ups {:7} | val avg {:.3f} prop {:.3f}'
                .format(
                    e,
                    np.mean(scramble_lens),
                    np.mean(rewards),
                    len(solved_lens) / hparams['logint'],
                    0 if len(solved_lens) == 0 else np.mean(solved_lens),
                    explore_rate(
                        e, hparams['epochs'] * hparams['explore_proportion'],
                        hparams['eps_min']),
                    nupdates,
                    val_avg,
                    val_prop,
                ))
            solved_lens = []
            scramble_lens = []

        if e % hparams['updatetarget'] == 0 and e > 0:
            targ_net.load_state_dict(pol_net.state_dict())

    log.info('Total updates: {}'.format(nupdates))
    log.info('Total solved: {:8} | Prop solved: {:.4f}'.format(
        totsolved, totsolved / hparams['epochs']))
    logger.export_scalars_to_json(os.path.join(savedir, 'summary.json'))
    logger.close()
    torch.save(pol_net, os.path.join(savedir, 'model.pt'))
    check_memory()

    hparams['val_size'] = 10 * hparams['val_size']
    val_avg, val_prop, val_time, _ = val_model(pol_net, env, hparams)
    log.info(
        'Validation avg solve length: {:.4f} | solve prop: {:.4f} | time: {:.2f}s'
        .format(val_avg, val_prop, val_time))