def _read_kernel(train, standardizer, block_size=None, order='A', dtype=np.float64, force_python_only=False, view_ok=False, return_trained=False): ''' The method creates a kernel for the in-memory SNP data. It handles these cases * No standardization is needed & everything is in memory OR uses the FROM-DISK method ''' from pysnptools.pstreader import PstReader #Just do a 'python' dot, if no standardization is needed and everything is the right type if isinstance(standardizer,Identity) and train.val.dtype == dtype: ts = time.time() #is_worth_logging = train.val.shape[0] * train.val.shape[1] * test.val.shape[0] > 1e9 #if is_worth_logging: logging.info(" _read_kernel about to multiply train{0} x test{1}".format(train.val.shape,test.val.shape)) if order == 'F': #numpy's 'dot' always returns 'C' order K = (train.val.dot(train.val.T)).T else: K = train.val.dot(train.val.T) assert PstReader._array_properties_are_ok(K,order,dtype), "internal error: K is not of the expected order or dtype" #if is_worth_logging: logging.info(" _read_kernel took %.2f seconds" % (time.time()-ts)) if return_trained: return K, standardizer else: return K else: #Do things the more general SnpReader way. return SnpReader._read_kernel(train, standardizer, block_size=block_size, order=order, dtype=dtype, force_python_only=force_python_only,view_ok=view_ok, return_trained=return_trained)
def test_respect_inputs(self): np.random.seed(0) for dtype_start, decimal_start in [(np.float32, 5), (np.float64, 10)]: for order_start in ['F', 'C', 'A']: for sid_count in [20, 2]: snpdataX = SnpData( iid=[["0", "0"], ["1", "1"], ["2", "2"]], sid=[str(i) for i in range(sid_count)], val=np.array(np.random.randint(3, size=[3, sid_count]), dtype=dtype_start, order=order_start)) for stdx in [ stdizer.Beta(1, 25), stdizer.Identity(), stdizer.Unit() ]: for snpreader0 in [snpdataX, snpdataX[:, 1:]]: snpreader1 = snpreader0[1:, :] refdata0, trained_standardizer = snpreader0.read( ).standardize(stdx, return_trained=True, force_python_only=True) refval0 = refdata0.val.dot(refdata0.val.T) refdata1 = snpreader1.read().standardize( trained_standardizer, force_python_only=True ) #LATER why aren't these used? refval1 = refdata0.val.dot( refdata1.val.T) #LATER why aren't these used? for dtype_goal, decimal_goal in [(np.float32, 5), (np.float64, 10)]: for order_goal in ['F', 'C', 'A']: k = snpreader0.read_kernel( standardizer=stdx, block_size=1, order=order_goal, dtype=dtype_goal) PstReader._array_properties_are_ok( k.val, order_goal, dtype_goal) np.testing.assert_array_almost_equal( refval0, k.val, decimal=min(decimal_start, decimal_goal))
def test_respect_inputs(self): np.random.seed(0) for dtype_start,decimal_start in [(np.float32,5),(np.float64,10)]: for order_start in ['F','C','A']: for snp_count in [20,2]: snpdataX = SnpData(iid=[["0","0"],["1","1"],["2","2"]],sid=[str(i) for i in range(snp_count)],val=np.array(np.random.randint(3,size=[3,snp_count]),dtype=dtype_start,order=order_start)) for stdx in [stdizer.Beta(1,25),stdizer.Identity(),stdizer.Unit()]: for snpreader0 in [snpdataX,snpdataX[:,1:]]: snpreader1 = snpreader0[1:,:] refdata0, trained_standardizer = snpreader0.read().standardize(stdx,return_trained=True,force_python_only=True) refval0 = refdata0.val.dot(refdata0.val.T) refdata1 = snpreader1.read().standardize(trained_standardizer,force_python_only=True) refval1 = refdata0.val.dot(refdata1.val.T) for dtype_goal,decimal_goal in [(np.float32,5),(np.float64,10)]: for order_goal in ['F','C','A']: k = snpreader0.read_kernel(standardizer=stdx,block_size=1,order=order_goal,dtype=dtype_goal) PstReader._array_properties_are_ok(k.val,order_goal,dtype_goal) np.testing.assert_array_almost_equal(refval0,k.val, decimal=min(decimal_start,decimal_goal))