def test_writes(self): from pysnptools.distreader import DistData, DistHdf5, DistNpz, DistMemMap, Bgen from pysnptools.kernelreader.test import _fortesting_JustCheckExists the_class_and_suffix_list = [(DistNpz,"npz",None,None), (Bgen,"bgen",None,lambda filename,distdata: Bgen.write(filename,distdata,bits=32)), (DistHdf5,"hdf5",None,None), (DistMemMap,"memmap",None,None)] cant_do_col_prop_none_set = {'bgen'} cant_do_col_len_0_set = {'bgen'} cant_do_row_count_zero_set = {'bgen'} can_swap_0_2_set = {} can_change_col_names_set = {} ignore_fam_id_set = {} ignore_pos1_set = {'bgen'} ignore_pos_set = {} erase_any_write_dir = {} #=================================== # Starting main function #=================================== logging.info("starting 'test_writes'") np.random.seed(0) output_template = "tempdir/distreader/writes.{0}.{1}" create_directory_if_necessary(output_template.format(0,"npz")) i = 0 for row_count in [0,5,2,1]: for col_count in [4,2,1,0]: val=np.random.random(size=[row_count,col_count,3]) val /= val.sum(axis=2,keepdims=True) #make probabilities sum to 1 val[val==3]=np.NaN row = [('0','0'),('1','1'),('2','2'),('3','3'),('4','4')][:row_count] col = ['s0','s1','s2','s3','s4'][:col_count] for is_none in [True,False]: row_prop = None col_prop = None if is_none else [(x,x,x) for x in range(5)][:col_count] distdata = DistData(iid=row,sid=col,val=val,pos=col_prop,name=str(i)) for the_class,suffix,constructor,writer in the_class_and_suffix_list: constructor = constructor or (lambda filename: the_class(filename)) writer = writer or (lambda filename,distdata: the_class.write(filename,distdata)) if col_count == 0 and suffix in cant_do_col_len_0_set: continue if col_prop is None and suffix in cant_do_col_prop_none_set: continue if row_count==0 and suffix in cant_do_row_count_zero_set: continue filename = output_template.format(i,suffix) logging.info(filename) i += 1 if suffix in erase_any_write_dir and os.path.exists(filename): shutil.rmtree(filename) ret = writer(filename,distdata) assert ret is not None for subsetter in [None, np.s_[::2,::3]]: reader = constructor(filename) _fortesting_JustCheckExists().input(reader) subreader = reader if subsetter is None else reader[subsetter[0],subsetter[1]] readdata = subreader.read(order='C') expected = distdata if subsetter is None else distdata[subsetter[0],subsetter[1]].read() if not suffix in can_swap_0_2_set: assert np.allclose(readdata.val,expected.val,equal_nan=True) else: for col_index in range(readdata.col_count): assert (np.allclose(readdata.val[:,col_index],expected.val[:,col_index],equal_nan=True) or np.allclose(readdata.val[:,col_index]*-1+2,expected.val[:,col_index],equal_nan=True)) if not suffix in ignore_fam_id_set: assert np.array_equal(readdata.row,expected.row) else: assert np.array_equal(readdata.row[:,1],expected.row[:,1]) if not suffix in can_change_col_names_set: assert np.array_equal(readdata.col,expected.col) else: assert readdata.col_count==expected.col_count assert np.array_equal(readdata.row_property,expected.row_property) or (readdata.row_property.shape[1]==0 and expected.row_property.shape[1]==0) if suffix in ignore_pos1_set: assert np.allclose(readdata.col_property[:,[0,2]],expected.col_property[:,[0,2]],equal_nan=True) or (readdata.col_property.shape[1]==0 and expected.col_property.shape[1]==0) elif not suffix in ignore_pos_set: assert np.allclose(readdata.col_property,expected.col_property,equal_nan=True) or (readdata.col_property.shape[1]==0 and expected.col_property.shape[1]==0) else: assert len(readdata.col_property)==len(expected.col_property) try: os.remove(filename) except: pass logging.info("done with 'test_writes'")
tracemalloc.stop() if False: logging.info("test info") from pysnptools.distreader import Bgen, DistGen for iid_count, sid_count in [(50, 5765294)]: print("iid_count=,sid_count=", iid_count, sid_count) dist_gen = DistGen(seed=332, iid_count=iid_count, sid_count=sid_count) filename = r"m:\deldir\fakeuk{0}x{1}.bgen".format( iid_count, sid_count) Bgen.write(filename, dist_gen, bits=8, compression="zlib", cleanup_temp_files=False) # print(os.path.getsize(filename)) # if False: #!!!c,l # from pysnptools.distreader import Bgen # bgen = Bgen(r'D:\OneDrive\programs\hide\bgen-reader-py\bgen_reader\_example\complex.23bits.no.samples.bgen',allow_complex=True) # print(bgen.sid_count) if False: from pysnptools.distreader import Bgen bgen = Bgen(r"M:\deldir\2500x100.bgen") bgen.read() print(bgen.shape)
#bits=8 ##iid_count = 1 ##sid_count = 1*1000*1000 #iid_count = 2500 #sid_count = 100 #iid_count = 2500 #sid_count = 500*1000 #bits=16 iid_count = 25 sid_count = 1000 bits = 16 from pysnptools.distreader import DistGen from pysnptools.distreader import Bgen distgen = DistGen(seed=332, iid_count=iid_count, sid_count=sid_count) Bgen.write('M:\deldir\{0}x{1}.bgen'.format(iid_count, sid_count), distgen, bits) if False: from pysnptools.distreader import Bgen bgen = Bgen(r'M:\deldir\500000x100.bgen') #1x1000000.bgen') print(bgen.iid) distdata = bgen.read(dtype='float32') if False: logging.basicConfig(level=logging.INFO) bgen = Bgen(r'M:\deldir\2500x500000.bgen', sid_function='id') # Bgen(r'M:\deldir\10x5000000.bgen') sid_index = int(.5 * bgen.sid_count) distdata = bgen[:, sid_index].read() print(distdata.val) if False: from pysnptools.distreader import DistHdf5, Bgen import pysnptools.util as pstutil