Пример #1
0
def add_part_indicatormap(data_dir, save_dir, mdim, rate, filter_size, stride):
    """
    This function is used for generating part indicator map for old data
    data_dir is the directory that you put all batch_datayes
    """
    allfile = iu.getfilelist(data_dir, 'data_batch_\d+')
    meta_path = iu.fullfile(data_dir, 'batches.meta')
    iu.ensure_dir(save_dir)
    if iu.exists(meta_path, 'file'):
        d_meta = myio.unpickle(meta_path)
        if 'savedata_info' not in d_meta:
            d_meta['savedata_info'] = dict()
            d_meta['savedata_info']['indmap_para'] = dict()
        d_meta['savedata_info']['indmap_para']['filter_size'] = filter_size
        d_meta['savedata_info']['indmap_para']['stride'] = stride
        d_meta['savedata_info']['indmap_para']['rate'] = rate
        myio.pickle(iu.fullfile(save_dir, 'batches.meta'), d_meta)
    for fn in allfile:
        print 'Processing %s' % fn
        d = myio.unpickle(iu.fullfile(data_dir, fn))
        ndata = d['data'].shape[-1]
        nparts = 7
        d['indmap'] = np.zeros((nparts, mdim[0], mdim[1], ndata),
                               dtype=np.bool)
        for i in range(ndata):
            jts = d['joints8'][..., i]
            d['indmap'][..., i] = HMLPE.create_part_indicatormap(
                jts, part_idx, mdim, rate, filter_size, stride)
        myio.pickle(iu.fullfile(save_dir, fn), d)
Пример #2
0
def add_part_indicatormap(data_dir, save_dir, mdim, rate, filter_size, stride):
    """
    This function is used for generating part indicator map for old data
    data_dir is the directory that you put all batch_datayes
    """
    allfile = iu.getfilelist(data_dir, 'data_batch_\d+')
    meta_path = iu.fullfile(data_dir, 'batches.meta')
    iu.ensure_dir(save_dir)
    if iu.exists(meta_path, 'file'): 
        d_meta = myio.unpickle(meta_path)
        if 'savedata_info' not in d_meta:
            d_meta['savedata_info'] = dict()
            d_meta['savedata_info']['indmap_para'] = dict()
        d_meta['savedata_info']['indmap_para']['filter_size'] = filter_size
        d_meta['savedata_info']['indmap_para']['stride'] = stride
        d_meta['savedata_info']['indmap_para']['rate'] = rate 
        myio.pickle(iu.fullfile(save_dir, 'batches.meta'), d_meta)        
    for fn in allfile:
        print 'Processing %s' % fn
        d = myio.unpickle(iu.fullfile(data_dir, fn))
        ndata = d['data'].shape[-1]
        nparts = 7
        d['indmap'] = np.zeros((nparts, mdim[0], mdim[1], ndata), dtype=np.bool) 
        for i in range(ndata):
            jts = d['joints8'][...,i]
            d['indmap'][...,i] = HMLPE.create_part_indicatormap(jts, part_idx,  mdim, rate, filter_size, stride)
        myio.pickle(iu.fullfile(save_dir, fn), d)
Пример #3
0
def merge_batch_data(data_dir_list,
                     save_dir,
                     is_symbolic=True,
                     batch_start_num=1):
    """
    This function will merge all the data_batches in data_dir into one folder
     and rename them accordining.
       Of cause, meta data will be updated 
    """
    import os
    import shutil
    iu.ensure_dir(save_dir)
    meta = None
    for ddir in data_dir_list:
        cur_meta = myio.unpickle(iu.fullfile(ddir, 'batches.meta'))
        meta = HMLPE.merge_meta(meta, cur_meta)

    myio.pickle(iu.fullfile(save_dir, 'batches.meta'), meta)
    cur_id = batch_start_num
    for ddir in data_dir_list:
        all_file = iu.getfilelist(ddir, 'data_batch_\d+')
        print 'I find %d batches in %s' % (len(all_file), ddir)
        if is_symbolic:
            for fn in all_file:
                sn = iu.fullfile(save_dir, 'data_batch_%d' % cur_id)
                if iu.exists(sn, 'file'):
                    os.remove(sn)
                os.symlink(iu.fullfile(ddir, fn), sn)
                cur_id = cur_id + 1
        else:
            for fn in all_file:
                shutil.copyfile(
                    iu.fullfile(ddir, fn),
                    iu.fullfile(save_dir, 'data_batch_%d' % cur_id))
                cur_id = cur_id + 1
Пример #4
0
def test_addmeta(metadir):
    metapath = iu.fullfile(metadir, 'batches.meta')
    d_meta = myio.unpickle(metapath)
    d_meta['ind_dim'] = dict()
    d_meta['ind_dim']['part_indmap'] = (8, 8)
    d_meta['ind_dim']['joint_indmap'] = (8, 8)
    myio.pickle(metapath, d_meta)
Пример #5
0
def test_addmeta(metadir):
    metapath = iu.fullfile(metadir, 'batches.meta')
    d_meta = myio.unpickle(metapath)
    d_meta['ind_dim'] = dict()
    d_meta['ind_dim']['part_indmap'] = (8,8)
    d_meta['ind_dim']['joint_indmap'] = (8,8)
    myio.pickle(metapath, d_meta)    
Пример #6
0
def merge_batch_data(data_dir_list, save_dir, is_symbolic = True, batch_start_num = 1):
    """
    This function will merge all the data_batches in data_dir into one folder
     and rename them accordining.
       Of cause, meta data will be updated 
    """
    import os
    import shutil
    iu.ensure_dir(save_dir)
    meta = None
    for ddir in data_dir_list:
        cur_meta = myio.unpickle(iu.fullfile(ddir, 'batches.meta'))    
        meta = HMLPE.merge_meta(meta, cur_meta)

    myio.pickle(iu.fullfile(save_dir, 'batches.meta'), meta)
    cur_id = batch_start_num
    for ddir in data_dir_list:
        all_file = iu.getfilelist(ddir, 'data_batch_\d+')
        print 'I find %d batches in %s' % (len(all_file), ddir)
        if is_symbolic:
            for fn in all_file:
                sn = iu.fullfile(save_dir, 'data_batch_%d' %  cur_id)
                if iu.exists(sn, 'file'):
                    os.remove(sn)
                os.symlink(iu.fullfile(ddir, fn), sn)
                cur_id = cur_id + 1
        else:
            for fn in all_file:
                shutil.copyfile(iu.fullfile(ddir, fn), iu.fullfile(save_dir, 'data_batch_%d' %  cur_id))
                cur_id = cur_id + 1
Пример #7
0
def shuffle_data(source_dir, target_dir, max_per_file=4000):
    """
    This function will shuflle all the data in source_dir
    and save it to target_dir
    """
    if source_dir == target_dir:
        raise HMLPEError('source dir can not be the same as target dir')
    import shutil
    import sys
    iu.ensure_dir(target_dir)
    shutil.copy(iu.fullfile(source_dir, 'batches.meta'), \
                iu.fullfile(target_dir, 'batches.meta'))
    meta = myio.unpickle(iu.fullfile(source_dir, 'batches.meta'))
    ndata = meta['ndata']
    nbatch = (ndata - 1) / max_per_file + 1
    nparts = meta['nparts']
    njoints = meta['njoints']
    newdim = meta['savedata_info']['newdim']
    filter_size = meta['savedata_info']['indmap_para']['filter_size']
    stride = meta['savedata_info']['indmap_para']['stride']
    joint_filter_size = meta['savedata_info']['indmap_para'][
        'joint_filter_size']
    joint_stride = meta['savedata_info']['indmap_para']['joint_stride']
    mdim = HMLPE.get_indmapdim(newdim, filter_size, stride)
    jtmdim = HMLPE.get_indmapdim(newdim, joint_filter_size, joint_stride)
    print('There are %d data in total, I need %d batch to hold it' %
          (ndata, nbatch))
    print 'Begin creating empty files'
    rest = ndata
    d = HMLPE.prepare_savebuffer({'data':newdim, 'part_indmap':mdim, \
                                       'joint_indmap': jtmdim}, max_per_file, nparts, njoints)
    HMLPE.adjust_savebuffer_shape(d)
    for b in range(nbatch):
        cur_n = min(max_per_file, rest)
        if b != nbatch - 1:
            saved = d
        else:
            saved = HMLPE.prepare_savebuffer({'data':newdim, 'part_indmap':mdim, \
                                       'joint_indmap': jtmdim}, cur_n, nparts, njoints)
            HMLPE.adjust_savebuffer_shape(saved)
        myio.pickle(iu.fullfile(target_dir, 'data_batch_%d' % (b + 1)), saved)
        rest = rest - cur_n
    print 'End creating'
    allbatchfn = iu.getfilelist(source_dir, 'data_batch_\d+')
    np.random.seed(7)
    perm = range(ndata)
    np.random.shuffle(perm)
    buf_cap = 12  # store six batch at most
    nround = (nbatch - 1) / buf_cap + 1
    for rd in range(nround):
        print('Round %d of %d' % (rd, nround))
        buf = dict()
        offset = 0
        for fn in allbatchfn:
            print('Processing %s' % fn)
            d = myio.unpickle(iu.fullfile(source_dir, fn))
            cur_n = d['data'].shape[-1]
            for b in range(rd * buf_cap, min(nbatch, (rd + 1) * buf_cap)):
                sys.stdout.write('\rpadding %d of %d' % (b + 1, nbatch))
                sys.stdout.flush()
                sidx = b * max_per_file
                eidx = min(ndata, sidx + max_per_file)
                cur_idx_list = [
                    i for i in range(cur_n)
                    if perm[offset + i] >= sidx and perm[offset + i] < eidx
                ]
                if len(cur_idx_list) == 0:
                    continue
                if not b in buf:
                    dsave = myio.unpickle(
                        iu.fullfile(target_dir, 'data_batch_%d' % (b + 1)))
                    buf[b] = dsave
                else:
                    dsave = buf[b]
                save_idx_list = [perm[x + offset] - sidx for x in cur_idx_list]
                HMLPE.selective_copydic(d, dsave, cur_idx_list, save_idx_list)
                # myio.pickle(iu.fullfile(target_dir, 'data_batch_%d' % (b+1)), dsave)
            print 'Finished %s' % fn
            offset = offset + cur_n
        for b in range(rd * buf_cap, min(nbatch, (rd + 1) * buf_cap)):
            myio.pickle(iu.fullfile(target_dir, 'data_batch_%d' % (b + 1)),
                        buf[b])
Пример #8
0
def shuffle_data(source_dir, target_dir, max_per_file = 4000):
    """
    This function will shuflle all the data in source_dir
    and save it to target_dir
    """
    if source_dir == target_dir:
        raise HMLPEError('source dir can not be the same as target dir')
    import shutil
    import sys
    iu.ensure_dir( target_dir)
    shutil.copy(iu.fullfile(source_dir, 'batches.meta'), \
                iu.fullfile(target_dir, 'batches.meta'))
    meta = myio.unpickle(iu.fullfile(source_dir, 'batches.meta'))
    ndata = meta['ndata']
    nbatch = (ndata  - 1) / max_per_file + 1
    nparts = meta['nparts']
    njoints = meta['njoints']
    newdim = meta['savedata_info']['newdim']
    filter_size = meta['savedata_info']['indmap_para']['filter_size']
    stride = meta['savedata_info']['indmap_para']['stride']
    joint_filter_size = meta['savedata_info']['indmap_para']['joint_filter_size']
    joint_stride = meta['savedata_info']['indmap_para']['joint_stride']
    mdim = HMLPE.get_indmapdim(newdim, filter_size, stride)
    jtmdim = HMLPE.get_indmapdim(newdim, joint_filter_size, joint_stride)
    print('There are %d data in total, I need %d batch to hold it' %(ndata, nbatch))
    print 'Begin creating empty files'
    rest = ndata
    d = HMLPE.prepare_savebuffer({'data':newdim, 'part_indmap':mdim, \
                                       'joint_indmap': jtmdim}, max_per_file, nparts, njoints)
    HMLPE.adjust_savebuffer_shape(d)
    for b in range(nbatch):
        cur_n = min(max_per_file, rest)
        if b != nbatch - 1:
            saved = d
        else:
            saved = HMLPE.prepare_savebuffer({'data':newdim, 'part_indmap':mdim, \
                                       'joint_indmap': jtmdim}, cur_n, nparts, njoints)
            HMLPE.adjust_savebuffer_shape(saved)
        myio.pickle(iu.fullfile(target_dir, 'data_batch_%d' % (b + 1)), saved)
        rest = rest - cur_n
    print 'End creating'
    allbatchfn = iu.getfilelist(source_dir, 'data_batch_\d+')
    np.random.seed(7)
    perm = range(ndata)
    np.random.shuffle(perm)
    buf_cap = 12 # store six batch at most
    nround = (nbatch - 1)/buf_cap + 1
    for rd in range(nround):
        print ('Round %d of %d' % (rd,nround))
        buf = dict()
        offset = 0
        for fn in allbatchfn:
            print( 'Processing %s' % fn )
            d = myio.unpickle(iu.fullfile(source_dir, fn))
            cur_n = d['data'].shape[-1]
            for b in range(rd * buf_cap, min(nbatch, (rd+1)*buf_cap)):
                sys.stdout.write('\rpadding %d of %d' % (b + 1, nbatch))
                sys.stdout.flush() 
                sidx = b * max_per_file
                eidx = min(ndata, sidx + max_per_file)
                cur_idx_list = [i for i in range(cur_n) if perm[offset + i] >= sidx and perm[offset + i] < eidx]
                if len(cur_idx_list) == 0:
                    continue
                if not b in buf:
                    dsave = myio.unpickle(iu.fullfile(target_dir, 'data_batch_%d' % (b+1)))
                    buf[b] = dsave
                else:
                    dsave = buf[b]
                save_idx_list = [perm[ x + offset] - sidx for x in cur_idx_list]
                HMLPE.selective_copydic(d, dsave, cur_idx_list, save_idx_list)
                # myio.pickle(iu.fullfile(target_dir, 'data_batch_%d' % (b+1)), dsave)
            print 'Finished %s' % fn
            offset = offset + cur_n
        for b in range(rd * buf_cap, min(nbatch, (rd+1)*buf_cap)):
            myio.pickle(iu.fullfile(target_dir, 'data_batch_%d' % (b+1)), buf[b])