def test1(self): logging.info("in TestPstMemMap test1") old_dir = os.getcwd() os.chdir(os.path.dirname(os.path.realpath(__file__))) filename2 = "tempdir/tiny.pst.memmap" pstutil.create_directory_if_necessary(filename2) pstreader2 = PstMemMap.empty(row=['a','b','c'],col=['y','z'],filename=filename2,row_property=['A','B','C'],order="F",dtype=np.float64) assert isinstance(pstreader2.val,np.memmap) pstreader2.val[:,:] = [[1,2],[3,4],[np.nan,6]] assert np.array_equal(pstreader2[[0],[0]].read(view_ok=True).val,np.array([[1.]])) pstreader2.flush() assert isinstance(pstreader2.val,np.memmap) assert np.array_equal(pstreader2[[0],[0]].read(view_ok=True).val,np.array([[1.]])) pstreader2.flush() pstreader3 = PstMemMap(filename2) assert np.array_equal(pstreader3[[0],[0]].read(view_ok=True).val,np.array([[1.]])) assert isinstance(pstreader3.val,np.memmap) pstreader = PstMemMap('../examples/tiny.pst.memmap') assert pstreader.row_count == 3 assert pstreader.col_count == 2 assert isinstance(pstreader.val,np.memmap) pstdata = pstreader.read(view_ok=True) assert isinstance(pstdata.val,np.memmap) os.chdir(old_dir)
def write(filename, pstdata): """Writes a :class:`PstData` to :class:`PstMemMap` format and returns the :class:`.PstMemMap`. :param filename: the name of the file to create :type filename: string :param pstdata: The in-memory data that should be written to disk. :type pstdata: :class:`PstData` :rtype: :class:`.PstMemMap` >>> import pysnptools.util as pstutil >>> from pysnptools.pstreader import PstData, PstMemMap >>> data1 = PstData(row=['a','b','c'],col=['y','z'],val=[[1,2],[3,4],[np.nan,6]],row_property=['A','B','C']) >>> pstutil.create_directory_if_necessary("tempdir/tiny.pst.memmap") >>> PstMemMap.write("tempdir/tiny.pst.memmap",data1) # Write data1 in PstMemMap format PstMemMap('tempdir/tiny.pst.memmap') """ self = PstMemMap.empty(pstdata.row, pstdata.col, filename+'.temp', row_property=pstdata.row_property, col_property=pstdata.col_property,order=PstMemMap._order(pstdata),dtype=pstdata.val.dtype, val_shape=pstdata.val_shape) if pstdata.val_shape is None: self.val[:,:] = pstdata.val else: self.val[:,:,:] = pstdata.val self.flush() if os.path.exists(filename): os.remove(filename) shutil.move(filename+'.temp',filename) logging.debug("Done writing " + filename) return PstMemMap(filename)
def test_respect_read_inputs(self): from pysnptools.pstreader import _MergeRows, _MergeCols previous_wd = os.getcwd() os.chdir(os.path.dirname(os.path.realpath(__file__))) for pstreader in [ PstNpz('../examples/toydata10.snp.npz'), _MergeRows([ PstHdf5('../examples/toydata.snpmajor.snp.hdf5') [:5, :].read(), PstHdf5('../examples/toydata.snpmajor.snp.hdf5')[ 5:, :].read() ]), _MergeCols([ PstHdf5( '../examples/toydata.snpmajor.snp.hdf5')[:, :5].read(), PstHdf5('../examples/toydata.snpmajor.snp.hdf5') [:, 5:].read() ]), PstHdf5('../examples/toydata.snpmajor.snp.hdf5')[::2, ::2], PstHdf5('../examples/toydata.snpmajor.dist.hdf5').read(), PstHdf5('../examples/toydata.kernel.hdf5'), PstMemMap('../examples/tiny.pst.memmap') ]: logging.info(str(pstreader)) for order in ['F', 'C', 'A']: for dtype in [np.float32, np.float64]: for force_python_only in [True, False]: for view_ok in [True, False]: val = pstreader.read( order=order, dtype=dtype, force_python_only=force_python_only, view_ok=view_ok).val has_right_order = order == "A" or ( order == "C" and val.flags["C_CONTIGUOUS"] ) or (order == "F" and val.flags["F_CONTIGUOUS"]) if hasattr(pstreader, 'val') and not view_ok: assert pstreader.val is not val if (hasattr(pstreader, 'val') and view_ok and pstreader.val is not val and (order == 'A' or (order == 'F' and pstreader.val.flags['F_CONTIGUOUS']) or (order == 'C' and pstreader.val.flags['C_CONTIGUOUS'])) and (dtype is None or pstreader.val.dtype == dtype)): logging.info( "{0} could have read a view, but didn't". format(pstreader)) assert val.dtype == dtype and has_right_order os.chdir(previous_wd)
def empty(row, col, filename, row_property=None, col_property=None, order="F", dtype=np.float64, val_shape=None): '''Create an empty :class:`.PstMemMap` on disk. :param row: The :attr:`PstReader.row` information :type row: an array of anything :param col: The :attr:`PstReader.col` information :type col: an array of anything :param filename: name of memory-mapped file to create :type filename: string :param row_property: optional -- The additional :attr:`PstReader.row_property` information associated with each row. Default: None :type row_property: an array of anything :param col_property: optional -- The additional :attr:`PstReader.col_property` information associated with each col. Default: None :type col_property: an array of anything :param order: {'F' (default), 'C'}, optional -- Specify the order of the ndarray. :type order: string or None :param dtype: {numpy.float64 (default), numpy.float32}, optional -- The data-type for the :attr:`PstMemMap.val` ndarray. :type dtype: data-type :param val_shape: (Default: None), optional -- The shape of the last dimension of :attr:`PstMemMap.val`. *None* means each value is a scalar. :type val_shape: None or a number :rtype: :class:`.PstMemMap` >>> import pysnptools.util as pstutil >>> from pysnptools.pstreader import PstMemMap >>> filename = "tempdir/tiny.pst.memmap" >>> pstutil.create_directory_if_necessary(filename) >>> pst_mem_map = PstMemMap.empty(row=['a','b','c'],col=['y','z'],filename=filename,row_property=['A','B','C'],order="F",dtype=np.float64) >>> pst_mem_map.val[:,:] = [[1,2],[3,4],[np.nan,6]] >>> pst_mem_map.flush() ''' dtype = np.dtype(dtype) self = PstMemMap(filename) self._empty_inner(row, col, filename, row_property, col_property, order, dtype, val_shape) return self
def write(filename, snpdata): """Writes a :class:`SnpData` to :class:`SnpMemMap` format. :param filename: the name of the file to create :type filename: string :param snpdata: The in-memory data that should be written to disk. :type snpdata: :class:`SnpData` :rtype: :class:`.SnpMemMap` >>> import pysnptools.util as pstutil >>> from pysnptools.snpreader import SnpData, SnpMemMap >>> data1 = SnpData(iid=[['fam0','iid0'],['fam0','iid1']], sid=['snp334','snp349','snp921'],val= [[0.,2.,0.],[0.,1.,2.]]) >>> pstutil.create_directory_if_necessary("tempdir/tiny.snp.memmap") #LATER should we just promise to create directories? >>> SnpMemMap.write("tempdir/tiny.snp.memmap",data1) # Write data1 in SnpMemMap format SnpMemMap('tempdir/tiny.snp.memmap') """ #We write iid and sid in ascii for compatibility between Python 2 and Python 3 formats. row_ascii = np.array(snpdata.row, dtype='S') #!!!avoid this copy when not needed col_ascii = np.array(snpdata.col, dtype='S') #!!!avoid this copy when not needed self = PstMemMap.empty(row_ascii, col_ascii, filename + '.temp', row_property=snpdata.row_property, col_property=snpdata.col_property, order=PstMemMap._order(snpdata), dtype=snpdata.val.dtype) self.val[:, :] = snpdata.val self.flush() if os.path.exists(filename): os.remove(filename) shutil.move(filename + '.temp', filename) logging.debug("Done writing " + filename) return SnpMemMap(filename)
def write(filename, snpreader, standardizer=Identity(), order='A', dtype=None, block_size=None, num_threads=None): """Writes a :class:`SnpReader` to :class:`SnpMemMap` format. :param filename: the name of the file to create :type filename: string :param snpreader: The data that should be written to disk. :type snpreader: :class:`SnpReader` :rtype: :class:`.SnpMemMap` >>> import pysnptools.util as pstutil >>> from pysnptools.util import example_file # Download and return local file name >>> from pysnptools.snpreader import Bed, SnpMemMap >>> bed_file = example_file("pysnptools/examples/toydata.5chrom.*","*.bed") >>> bed = Bed(bed_file) >>> pstutil.create_directory_if_necessary("tempdir/toydata.5chrom.snp.memmap") #LATER should we just promise to create directories? >>> SnpMemMap.write("tempdir/toydata.5chrom.snp.memmap",bed) # Write bed in SnpMemMap format SnpMemMap('tempdir/toydata.5chrom.snp.memmap') """ block_size = block_size or max( (100_000) // max(1, snpreader.row_count), 1) if hasattr(snpreader, 'val'): order = PstMemMap._order(snpreader) if order == 'A' else order dtype = dtype or snpreader.val.dtype else: order = 'F' if order == 'A' else order dtype = dtype or np.float64 dtype = np.dtype(dtype) snpmemmap = SnpMemMap.empty(iid=snpreader.iid, sid=snpreader.sid, filename=filename + '.temp', pos=snpreader.col_property, order=order, dtype=dtype) if hasattr(snpreader, 'val'): standardizer.standardize(snpreader, num_threads=num_threads) snpmemmap.val[:, :] = snpreader.val else: with log_in_place("SnpMemMap write sid_index ", logging.INFO) as updater: for start in range(0, snpreader.sid_count, block_size): updater('{0} of {1}'.format(start, snpreader.sid_count)) snpdata = snpreader[:, start:start + block_size].read( order=order, dtype=dtype, num_threads=num_threads) standardizer.standardize(snpdata, num_threads=num_threads) snpmemmap.val[:, start:start + snpdata.sid_count] = snpdata.val snpmemmap.flush() if os.path.exists(filename): os.remove(filename) shutil.move(filename + '.temp', filename) logging.debug("Done writing " + filename) return SnpMemMap(filename)
def write(filename, distreader, order='A', dtype=None, block_size=None): """Writes a :class:`DistReader` to :class:`DistMemMap` format. :param filename: the name of the file to create :type filename: string :param distreader: The data that should be written to disk. It can also be any distreader, for example, :class:`.DistNpz`, :class:`.DistData`, or another :class:`.Bgen`. :type distreader: :class:`DistReader` :param order: {'A' (default), 'F', 'C'}, optional -- Specify the order of the ndarray. By default, will match the order of the input if knowable; otherwise, 'F' :type order: string or None :param dtype: {None (default), numpy.float64, numpy.float32}, optional -- The data-type for the :attr:`DistMemMap.val` ndarray. By default, will match the order of the input if knowable; otherwise np.float64. :type dtype: data-type :param block_size: The number of SNPs to read in a batch from *distreader*. Defaults to a *block_size* such that *block_size* \* *iid_count* is about 100,000. :type block_size: number :rtype: :class:`.DistMemMap` >>> import pysnptools.util as pstutil >>> from pysnptools.distreader import Bgen, DistMemMap >>> from pysnptools.util import example_file # Download and return local file name >>> bgen_file = example_file("pysnptools/examples/2500x100.bgen") >>> distreader = Bgen(bgen_file)[:,:10] #Create a reader for the first 10 SNPs >>> pstutil.create_directory_if_necessary("tempdir/tiny.dist.memmap") >>> DistMemMap.write("tempdir/tiny.dist.memmap",distreader) # Write distreader in DistMemMap format DistMemMap('tempdir/tiny.dist.memmap') """ #We write iid and sid in ascii for compatibility between Python 2 and Python 3 formats. row_ascii = np.array(distreader.row, dtype='S') #!!!avoid this copy when not needed col_ascii = np.array(distreader.col, dtype='S') #!!!avoid this copy when not needed block_size = block_size or max( (100 * 1000) // max(1, distreader.row_count), 1) if hasattr(distreader, 'val'): order = PstMemMap._order(distreader) if order == 'A' else order dtype = dtype or distreader.val.dtype else: order = 'F' if order == 'A' else order dtype = dtype or np.float64 dtype = np.dtype(dtype) self = PstMemMap.empty(row_ascii, col_ascii, filename + '.temp', row_property=distreader.row_property, col_property=distreader.col_property, order=order, dtype=dtype, val_shape=3) if hasattr(distreader, 'val'): self.val[:, :, :] = distreader.val else: start = 0 with log_in_place("sid_index ", logging.INFO) as updater: while start < distreader.sid_count: updater('{0} of {1}'.format(start, distreader.sid_count)) distdata = distreader[:, start:start + block_size].read( order=order, dtype=dtype) self.val[:, start:start + distdata.sid_count, :] = distdata.val start += distdata.sid_count self.flush() if os.path.exists(filename): os.remove(filename) shutil.move(filename + '.temp', filename) logging.debug("Done writing " + filename) return DistMemMap(filename)
test_suite = unittest.TestSuite([]) test_suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestPstMemMap)) return test_suite if __name__ == "__main__": logging.basicConfig(level=logging.INFO) if True: from pysnptools.pstreader import PstMemMap fn = '../examples/tiny.pst.memmap' os.getcwd() print((os.path.exists(fn))) pst_mem_map = PstMemMap(fn) print((pst_mem_map.val[0,1])) if False: a=np.ndarray([2,3]) pointer, read_only_flag = a.__array_interface__['data'] print(pointer) a*=2 pointer, read_only_flag = a.__array_interface__['data'] print(pointer) a = PstMemMap.empty(row=['a','b','c'],col=['y','z'],filename=r'c:\deldir\a.memmap',row_property=['A','B','C'],order="F",dtype=np.float64) b = PstData(row=['a','b','c'],col=['y','z'],val=[[1,2],[3,4],[np.nan,6]],row_property=['A','B','C']) pointer, read_only_flag = a.val.__array_interface__['data'] print(pointer) a.val+=1