def fit(self, X, y): self.model.fit(X, y) #### Get p-values for the fitted model #### denom = 2.0 * (1.0 + np.cosh(self.model.decision_function(X))) denom = cp.tile(denom, (X.shape[1], 1)).T F_ij = cp.dot((X / denom).T, X) ## Fisher Information Matrix Cramer_Rao = cp.linalg.inv(F_ij) ## Inverse Information Matrix sigma_estimates = cp.sqrt(cp.diagonal(Cramer_Rao)) ## Changed below to make it equal to sklearn z_scores = (self.model.coef_.flatten() / sigma_estimates ) # z-score for eaach model coefficient # z_scores = self.model.coef_[0]/sigma_estimates # z-score for eaach model coefficient # serial on cpu but only n_features so should not be too bad ## cna look into gpu accerattion if needed p_values = [stat.norm.sf(abs(x.item())) * 2 for x in z_scores] ### two tailed test for p-values ### In case we need confidence intervals # from: https://gist.github.com/rspeare/77061e6e317896be29c6de9a85db301d#gistcomment-2267786 # alpha = 0.05 # q = stats.norm.ppf(1 - alpha / 2) # lower = self.model.coef_[0] - q * sigma_estimates # upper = self.model.coef_[0] + q * sigma_estimates # self.conf_int = np.dstack((lower, upper))[0] self.z_scores = z_scores self.p_values = p_values self.sigma_estimates = sigma_estimates self.F_ij = F_ij
def make_diagonal(D, offset=0, axis1=0, axis2=1): # Numpy doesn't offer a complement to np.diagonal: a function to create new # diagonal arrays with extra dimensions. We need such a function for the # gradient of np.diagonal and it's also quite handy to have. So here it is. if not (offset == 0 and axis1 == -1 and axis2 == -2): raise NotImplementedError( "Currently make_diagonal only supports offset=0, axis1=-1, axis2=-2" # noqa ) # We use a trick: calling np.diagonal returns a view on the original array, # so we can modify it in-place. (only valid for numpy version >= 1.10.) new_array = _cp.zeros(D.shape + (D.shape[-1], )) new_array_diag = _cp.diagonal(new_array, offset=0, axis1=-1, axis2=-2) new_array_diag.flags.writeable = True new_array_diag[:] = D return new_array
def extract(noisyimg_gpu, imgweights_gpu, A_gpu): #- Set up the equation to solve (B&S eq 4) W_gpu = cpx.scipy.sparse.spdiags(data=imgweights_gpu.ravel(), diags=[ 0, ], m=npix, n=npix) iCov_gpu = A_gpu.T.dot(W_gpu.dot(A_gpu)) y_gpu = A_gpu.T.dot(W_gpu.dot(noisyimg_gpu.ravel())) ##- Solve f (B&S eq 4) f_gpu = cp.linalg.solve(iCov_gpu.todense(), y_gpu) #requires array, not sparse object #- Eigen-decompose iCov to assist in upcoming steps u_gpu, v_gpu = cp.linalg.eigh(iCov_gpu.todense()) #- Calculate C^-1 = QQ (B&S eq 10) d_gpu = cpx.scipy.sparse.spdiags(cp.sqrt(u_gpu), 0, len(u_gpu), len(u_gpu)) Q_gpu = v_gpu.dot(d_gpu.dot(v_gpu.T)) #- normalization vector (B&S eq 11) norm_vector_gpu = cp.sum(Q_gpu, axis=1) #- Resolution matrix (B&S eq 12) R_gpu = cp.outer(norm_vector_gpu**(-1), cp.ones( norm_vector_gpu.size)) * Q_gpu #- Decorrelated covariance matrix (B&S eq 13-15) udiags_gpu = cpx.scipy.sparse.spdiags(1 / u_gpu, 0, len(u_gpu), len(u_gpu)) Cov_gpu = v_gpu.dot(udiags_gpu.dot(v_gpu.T)) Cx_gpu = R_gpu.dot(Cov_gpu.dot(R_gpu.T)) #- Decorrelated flux (B&S eq 16) fx_gpu = R_gpu.dot(f_gpu.ravel()).reshape(f_gpu.shape) #- Variance on f (B&S eq 13) varfx_gpu = cp.diagonal(Cx_gpu) return fx_gpu, varfx_gpu, R_gpu
def process(self, inputs): df = inputs[self.INPUT_PORT_NAME] all_sample_ids = df['sample_id'].unique() total_samples = len(all_sample_ids) # df = df.drop('datetime', axis=1) input_meta = self.get_input_meta() if self.INPUT_PORT_NAME in input_meta: assets = int(math.sqrt(len(input_meta[self.INPUT_PORT_NAME]) - 3)) output = {} data_ma = df[list(range(assets * assets))].values data_ma = data_ma.reshape(total_samples, -1, assets, assets) diagonzied = cupy.diagonal(data_ma, 0, 2, 3) diagonzied = cupy.sqrt(1.0 / diagonzied) # inverse variance diagonzied = diagonzied / diagonzied.sum(axis=2, keepdims=True) diagonzied = diagonzied.reshape(-1, assets) weight_df = cudf.DataFrame(diagonzied) weight_df['month'] = df['month'] weight_df['year'] = df['year'] weight_df['sample_id'] = df['sample_id'] output.update({self.OUTPUT_PORT_NAME: weight_df}) return output
def mi_model_1d_gpu_gd(x, y, biascorrect=False, demeaned=False): """Mutual information between a Gaussian and a discrete variable in bits. This method is based on ANOVA style model comparison. I = mi_model_gd(x,y) returns the MI between the (possibly multidimensional) Gaussian variable x and the discrete variable y. Parameters ---------- x, y : array_like Gaussian arrays of shape (n_epochs,) or (n_dimensions, n_epochs). y must be an array of integers biascorrect : bool | True Specifies whether bias correction should be applied to the estimated MI demeaned : bool | False Specifies whether the input data already has zero mean (true if it has been copula-normalized) Returns ------- i : float Information shared by x and y (in bits) """ # Converting to cupy array #x, y = cp.array(x), cp.array(y) x, y = cp.atleast_2d(x), cp.squeeze(y) if x.ndim > 2: raise ValueError("x must be at most 2d") if y.ndim > 1: raise ValueError("only univariate discrete variables supported") if not cp.issubdtype(y.dtype, cp.integer): raise ValueError("y should be an integer array") nvarx, ntrl = x.shape ym = cp.unique(y) if y.size != ntrl: raise ValueError("number of trials do not match") if not demeaned: x = x - x.mean(axis=1)[:, cp.newaxis] # class-conditional entropies ntrl_y = cp.zeros(len(ym)) hcond = cp.zeros(len(ym)) for n_yi, yi in enumerate(ym): idx = y == yi xm = x[:, idx] ntrl_y[n_yi] = xm.shape[1] xm = xm - xm.mean(axis=1)[:, cp.newaxis] cm = cp.dot(xm, xm.T) / float(ntrl_y[n_yi] - 1) chcm = cp.linalg.cholesky(cm) hcond[n_yi] = cp.sum(cp.log(cp.diagonal(chcm))) # class weights w = ntrl_y / float(ntrl) # unconditional entropy from unconditional Gaussian fit cx = cp.dot(x, x.T) / float(ntrl - 1) chc = cp.linalg.cholesky(cx) hunc = cp.sum(cp.log(cp.diagonal(chc))) # + c*nvarx ln2 = cp.log(2) if biascorrect: vars = cp.arange(1, nvarx + 1) psiterms = psi((ntrl - vars).astype(cp.float) / 2.) / 2. dterm = (ln2 - cp.log(float(ntrl - 1))) / 2. hunc = hunc - nvarx * dterm - psiterms.sum() dterm = (ln2 - cp.log((ntrl_y - 1).astype(cp.float))) / 2.0 psiterms = cp.zeros(len(ym)) for vi in vars: idx = ntrl_y - vi psiterms = psiterms + psi(idx.astype(cp.float) / 2.) hcond = hcond - nvarx * dterm - (psiterms / 2.) # MI in bits i = (hunc - cp.sum(w * hcond)) / ln2 return i
def cmi_1d_gpu_ggg(x, y, z, biascorrect=True, demeaned=False): """Conditional MI between two Gaussian variables conditioned on a third. I = cmi_ggg(x,y,z) returns the CMI between two (possibly multidimensional) Gaussian variables, x and y, conditioned on a third, z, with bias correction. Parameters ---------- x, y, z : array_like Gaussians arrays of shape (n_epochs,) or (n_dimensions, n_epochs). biascorrect : bool | True Specifies whether bias correction should be applied to the estimated MI demeaned : bool | False Specifies whether the input data already has zero mean (true if it has been copula-normalized) Returns ------- i : float Information shared by x and y conditioned by z (in bits) """ x, y, z = cp.atleast_2d(x), cp.atleast_2d(y), cp.atleast_2d(z) if x.ndim > 2 or y.ndim > 2 or z.ndim > 2: raise ValueError("x, y and z must be at most 2d") ntrl = x.shape[1] nvarx = x.shape[0] nvary = y.shape[0] nvarz = z.shape[0] nvaryz = nvary + nvarz nvarxy = nvarx + nvary nvarxz = nvarx + nvarz nvarxyz = nvarx + nvaryz if y.shape[1] != ntrl or z.shape[1] != ntrl: raise ValueError("number of trials do not match") # joint variable xyz = cp.vstack((x, y, z)) if not demeaned: xyz = xyz - xyz.mean(axis=1)[:, cp.newaxis] cxyz = cp.dot(xyz, xyz.T) / float(ntrl - 1) # submatrices of joint covariance cz = cxyz[nvarxy:, nvarxy:] cyz = cxyz[nvarx:, nvarx:] cxz = cp.zeros((nvarxz, nvarxz)) cxz[:nvarx, :nvarx] = cxyz[:nvarx, :nvarx] cxz[:nvarx, nvarx:] = cxyz[:nvarx, nvarxy:] cxz[nvarx:, :nvarx] = cxyz[nvarxy:, :nvarx] cxz[nvarx:, nvarx:] = cxyz[nvarxy:, nvarxy:] chcz = cp.linalg.cholesky(cz) chcxz = cp.linalg.cholesky(cxz) chcyz = cp.linalg.cholesky(cyz) chcxyz = cp.linalg.cholesky(cxyz) # entropies in nats # normalizations cancel for cmi hz = cp.sum(cp.log(cp.diagonal(chcz))) hxz = cp.sum(cp.log(cp.diagonal(chcxz))) hyz = cp.sum(cp.log(cp.diagonal(chcyz))) hxyz = cp.sum(cp.log(cp.diagonal(chcxyz))) ln2 = cp.log(2) if biascorrect: psiterms = psi( (ntrl - cp.arange(1, nvarxyz + 1)).astype(cp.float) / 2.) / 2. dterm = (ln2 - cp.log(ntrl - 1.)) / 2. hz = hz - nvarz * dterm - psiterms[:nvarz].sum() hxz = hxz - nvarxz * dterm - psiterms[:nvarxz].sum() hyz = hyz - nvaryz * dterm - psiterms[:nvaryz].sum() hxyz = hxyz - nvarxyz * dterm - psiterms[:nvarxyz].sum() # MI in bits i = (hxz + hyz - hxyz - hz) / ln2 return i
def mi_1d_gpu_gg(x, y, biascorrect=True, demeaned=False): """Mutual information (MI) between two Gaussian variables in bits. This is the GPU variant of the m1_1d_gg function, using CuPy I = mi_gg(x,y) returns the MI between two (possibly multidimensional) Gaussian variables, x and y, with bias correction. Parameters ---------- x, y : array_like Gaussian arrays of shape (n_epochs,) or (n_dimensions, n_epochs) biascorrect : bool | True Specifies whether bias correction should be applied to the estimated MI demeaned : bool | False Specifies whether the input data already has zero mean (true if it has been copula-normalized) Returns ------- i : float Information shared by x and y (in bits) """ x, y = cp.atleast_2d(x), cp.atleast_2d(y) if (x.ndim > 2) or (y.ndim > 2): raise ValueError("x and y must be at most 2d") nvarx, ntrl = x.shape nvary = y.shape[0] nvarxy = nvarx + nvary if y.shape[1] != ntrl: raise ValueError("number of trials do not match") # joint variable xy = cp.vstack((x, y)) if not demeaned: xy = xy - xy.mean(axis=1)[:, cp.newaxis] cxy = cp.dot(xy, xy.T) / float(ntrl - 1) # submatrices of joint covariance cx = cxy[:nvarx, :nvarx] cy = cxy[nvarx:, nvarx:] chcxy = cp.linalg.cholesky(cxy) chcx = cp.linalg.cholesky(cx) chcy = cp.linalg.cholesky(cy) # entropies in nats # normalizations cancel for mutual information hx = cp.sum(cp.log(cp.diagonal(chcx))) hy = cp.sum(cp.log(cp.diagonal(chcy))) hxy = cp.sum(cp.log(cp.diagonal(chcxy))) ln2 = cp.log(2) if biascorrect: psiterms = psi( (ntrl - cp.arange(1, nvarxy + 1)).astype(cp.float) / 2.) / 2. dterm = (ln2 - cp.log(ntrl - 1.)) / 2. hx = hx - nvarx * dterm - psiterms[:nvarx].sum() hy = hy - nvary * dterm - psiterms[:nvary].sum() hxy = hxy - nvarxy * dterm - psiterms[:nvarxy].sum() # MI in bits i = (hx + hy - hxy) / ln2 return i
# np.linalg.det. if x.dtype not in _floating_dtypes: raise TypeError('Only floating-point dtypes are allowed in det') return Array._new(np.linalg.det(x._array)) # Note: diagonal is the numpy top-level namespace, not np.linalg def diagonal(x: Array, /, *, offset: int = 0) -> Array: """ Array API compatible wrapper for :py:func:`np.diagonal <numpy.diagonal>`. See its docstring for more information. """ # Note: diagonal always operates on the last two axes, whereas np.diagonal # operates on the first two axes by default return Array._new(np.diagonal(x._array, offset=offset, axis1=-2, axis2=-1)) def eigh(x: Array, /) -> EighResult: """ Array API compatible wrapper for :py:func:`np.linalg.eigh <numpy.linalg.eigh>`. See its docstring for more information. """ # Note: the restriction to floating-point dtypes only is different from # np.linalg.eigh. if x.dtype not in _floating_dtypes: raise TypeError('Only floating-point dtypes are allowed in eigh') # Note: the return type here is a namedtuple, which is different from # np.eigh, which only returns a tuple.
def slogdet(a): """Returns sign and logarithm of the determinant of an array. It calculates the natural logarithm of the determinant of a given value. Args: a (cupy.ndarray): The input matrix with dimension ``(..., N, N)``. Returns: tuple of :class:`~cupy.ndarray`: It returns a tuple ``(sign, logdet)``. ``sign`` represents each sign of the determinant as a real number ``0``, ``1`` or ``-1``. 'logdet' represents the natural logarithm of the absolute of the determinant. If the determinant is zero, ``sign`` will be ``0`` and ``logdet`` will be ``-inf``. The shapes of both ``sign`` and ``logdet`` are equal to ``a.shape[:-2]``. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. warning:: To produce the same results as :func:`numpy.linalg.slogdet` for singular inputs, set the `linalg` configuration to `raise`. .. seealso:: :func:`numpy.linalg.slogdet` """ if a.ndim < 2: msg = ('%d-dimensional array given. ' 'Array must be at least two-dimensional' % a.ndim) raise linalg.LinAlgError(msg) _util._assert_nd_squareness(a) dtype = numpy.promote_types(a.dtype.char, 'f') real_dtype = numpy.dtype(dtype.char.lower()) if dtype not in (numpy.float32, numpy.float64, numpy.complex64, numpy.complex128): msg = ('dtype must be float32, float64, complex64, or complex128' ' (actual: {})'.format(a.dtype)) raise ValueError(msg) a_shape = a.shape shape = a_shape[:-2] n = a_shape[-2] if a.size == 0: # empty batch (result is empty, too) or empty matrices det([[]]) == 1 sign = cupy.ones(shape, dtype) logdet = cupy.zeros(shape, real_dtype) return sign, logdet lu, ipiv, dev_info = _decomposition._lu_factor(a, dtype) # dev_info < 0 means illegal value (in dimensions, strides, and etc.) that # should never happen even if the matrix contains nan or inf. # TODO(kataoka): assert dev_info >= 0 if synchronization is allowed for # debugging purposes. diag = cupy.diagonal(lu, axis1=-2, axis2=-1) logdet = cupy.log(cupy.abs(diag)).sum(axis=-1) # ipiv is 1-origin non_zero = cupy.count_nonzero(ipiv != cupy.arange(1, n + 1), axis=-1) if dtype.kind == "f": non_zero += cupy.count_nonzero(diag < 0, axis=-1) # Note: sign == -1 ** (non_zero % 2) sign = (non_zero % 2) * -2 + 1 if dtype.kind == "c": sign = sign * cupy.prod(diag / cupy.abs(diag), axis=-1) singular = dev_info > 0 return ( cupy.where(singular, dtype.type(0), sign.astype(dtype)).reshape(shape), cupy.where(singular, real_dtype.type('-inf'), logdet).reshape(shape), )
def extract(noisyimg_gpu, imgweights_gpu, A_gpu): #- Set up the equation to solve (B&S eq 4) #get the cpu values too noisyimg_cpu = noisyimg_gpu.get() imgweights_cpu = imgweights_gpu.get() A_cpu = A_gpu.get() W_cpu = scipy.sparse.spdiags(data=imgweights_cpu.ravel(), diags=[0,], m=npix, n=npix) #scipy sparse object W_gpu = cpx.scipy.sparse.spdiags(data=imgweights_gpu.ravel(), diags=[0,], m=npix, n=npix) #yank gpu back to cpu so we can compare W_yank = W_gpu.get() assert np.allclose(W_cpu.todense(), W_yank.todense()) #todense bc this is a sparse object #passes iCov_gpu = A_gpu.T.dot(W_gpu.dot(A_gpu)) iCov_cpu = A_cpu.T.dot(W_cpu.dot(A_cpu)) #yank gpu back to cpu so we can compare iCov_yank = iCov_gpu.get() assert np.allclose(iCov_cpu.todense(), iCov_yank.todense()) #todense bc this is sparse #passes y_gpu = A_gpu.T.dot(W_gpu.dot(noisyimg_gpu.ravel())) y_cpu = A_cpu.T.dot(W_cpu.dot(noisyimg_cpu.ravel())) #yank gpu back and compare y_yank = y_gpu.get() assert np.allclose(y_cpu, y_yank) #passes ###- Solve f (B&S eq 4) ##f_gpu_tup = cpx.scipy.sparse.linalg.lsqr(iCov_gpu, y_gpu) ###returns f_gpu as a tuple... need to reshape? #f_gpu_0 = f_gpu_tup[0] #take only zeroth element of tuple, rest are None for some reason #f_gpu = cp.asarray(f_gpu_0).reshape(nspec, nwave) #the tuple thing is dumb bc i think it goes back to the cpu, have to manually bring it back as a cupy array #f_cpu_0 = scipy.sparse.linalg.lsqr(iCov_cpu, y_cpu)[0] #need to take 0th element of tuple #f_cpu = np.asarray(f_cpu_0).reshape(nspec, nwave) #and then reshape, make less confusing in separate step ###yank back and compare #f_yank = f_gpu.get() #f_diff = f_yank - f_cpu #print("f_yank") #print(f_yank) #print("f_cpu") #print(f_cpu) #print("f_diff") #print(f_diff) #assert np.allclose(f_cpu, f_yank) ##fails! #maybe lsqr is the problem ##what was the other one? ##i think instead we want numpy solve and cupy solve ##that one at least passed our tests... #try again with np.solve and cp.solve #cp.linalg.solve f_gpu = cp.linalg.solve(iCov_gpu.todense(), y_gpu) #requires array, not sparse object f_cpu = np.linalg.solve(iCov_cpu.todense(), y_cpu) #requires array, not sparse object #yank back and compare f_yank = f_gpu.get() assert np.allclose(f_cpu, f_yank) #passes #- Eigen-decompose iCov to assist in upcoming steps u_gpu, v_gpu = cp.linalg.eigh(iCov_gpu.todense()) u, v = np.linalg.eigh(iCov_cpu.todense()) u_cpu = np.asarray(u) v_cpu = np.asarray(v) #yank back and compare u_yank = u_gpu.get() v_yank = v_gpu.get() assert np.allclose(u_cpu, u_yank) assert np.allclose(v_cpu, v_yank) #passes #- Calculate C^-1 = QQ (B&S eq 10) d_gpu = cpx.scipy.sparse.spdiags(cp.sqrt(u_gpu), 0, len(u_gpu) , len(u_gpu)) d_cpu = scipy.sparse.spdiags(np.sqrt(u_cpu), 0, len(u_cpu), len(u_cpu)) #yank back and compare d_yank = d_gpu.get() assert np.allclose(d_cpu.todense(), d_yank.todense()) #passes Q_gpu = v_gpu.dot( d_gpu.dot( v_gpu.T )) Q_cpu = v_cpu.dot( d_cpu.dot( v_cpu.T )) #yank back and compare Q_yank = Q_gpu.get() assert np.allclose(Q_cpu, Q_yank) #passes #- normalization vector (B&S eq 11) norm_vector_gpu = cp.sum(Q_gpu, axis=1) norm_vector_cpu = np.sum(Q_cpu, axis=1) #yank back and compare norm_vector_yank = norm_vector_gpu.get() assert np.allclose(norm_vector_cpu, norm_vector_yank) #passes #- Resolution matrix (B&S eq 12) R_gpu = cp.outer(norm_vector_gpu**(-1), cp.ones(norm_vector_gpu.size)) * Q_gpu R_cpu = np.outer(norm_vector_cpu**(-1), np.ones(norm_vector_cpu.size)) * Q_cpu #yank back and compare R_yank = R_gpu.get() assert np.allclose(R_cpu, R_yank) #passes #- Decorrelated covariance matrix (B&S eq 13-15) udiags_gpu = cpx.scipy.sparse.spdiags(1/u_gpu, 0, len(u_gpu), len(u_gpu)) udiags_cpu = scipy.sparse.spdiags(1/u_cpu, 0, len(u_cpu), len(u_cpu)) #yank back and compare udiags_yank = udiags_gpu.get() assert np.allclose(udiags_cpu.todense(),udiags_yank.todense()) #sparse objects #passes Cov_gpu = v_gpu.dot( udiags_gpu.dot (v_gpu.T )) Cov_cpu = v_cpu.dot( udiags_cpu.dot( v_cpu.T )) #yank back and compare Cov_yank = Cov_gpu.get() assert np.allclose(Cov_cpu, Cov_yank) #passes Cx_gpu = R_gpu.dot(Cov_gpu.dot(R_gpu.T)) Cx_cpu = R_cpu.dot(Cov_cpu.dot(R_cpu.T)) #yank back and compare Cx_yank = Cx_gpu.get() assert np.allclose(Cx_cpu, Cx_yank) #passes #- Decorrelated flux (B&S eq 16) fx_gpu = R_gpu.dot(f_gpu.ravel()).reshape(f_gpu.shape) fx_cpu = R_cpu.dot(f_cpu.ravel()).reshape(f_cpu.shape) #yank back and compare fx_yank = fx_gpu.get() assert np.allclose(fx_cpu, fx_yank) #passes #- Variance on f (B&S eq 13) varfx_gpu = cp.diagonal(Cx_gpu) varfx_cpu = np.diagonal(Cx_cpu) #yank back and compare varfx_yank = varfx_gpu.get() assert np.allclose(varfx_cpu, varfx_yank) #passes return fx_gpu, fx_cpu, varfx_gpu, varfx_cpu, R_gpu, R_cpu
def process(self, inputs): df = inputs[self.INPUT_PORT_NAME] all_sample_ids = df['sample_id'].unique() total_samples = len(all_sample_ids) window = self.conf['window'] means, cov, distance, all_dates = compute_cov_distance(total_samples, df, window=window) total_samples, num_months, assets, assets = cov.shape months_id = all_dates.dt.year * 12 + (all_dates.dt.month - 1) months_id = months_id - months_id.min() mid = (cupy.arange(months_id.max() + 1) + (all_dates.dt.month - 1)[0])[window:] minyear = all_dates.dt.year.min() if len(mid) == 0: mid = cupy.array([0]) months = mid % 12 years = mid // 12 + minyear output = {} # print(num_months, len(mid)) if self.outport_connected(self.MEAN_DF): df_mean = cudf.DataFrame( means.reshape(total_samples * num_months, -1)) df_mean['year'] = cupy.concatenate([years] * total_samples).astype( cupy.int16) df_mean['month'] = cupy.concatenate( [months] * total_samples).astype(cupy.int16) df_mean['sample_id'] = cupy.repeat( cupy.arange(total_samples) + all_sample_ids.min(), len(mid)) output.update({self.MEAN_DF: df_mean}) if self.outport_connected(self.STD_DF): data_ma = cov.reshape(total_samples * num_months, assets, assets) diagonzied = cupy.diagonal(data_ma, 0, 1, 2) # get var diagonzied = cupy.sqrt(diagonzied) # get std df_std = cudf.DataFrame(diagonzied) df_std['year'] = cupy.concatenate([years] * total_samples).astype( cupy.int16) df_std['month'] = cupy.concatenate( [months] * total_samples).astype(cupy.int16) df_std['sample_id'] = cupy.repeat( cupy.arange(total_samples) + all_sample_ids.min(), len(mid)) output.update({self.STD_DF: df_std}) if self.outport_connected(self.COV_DF): df_cov = cudf.DataFrame(cov.reshape(total_samples * num_months, -1)) df_cov['year'] = cupy.concatenate([years] * total_samples).astype( cupy.int16) df_cov['month'] = cupy.concatenate( [months] * total_samples).astype(cupy.int16) df_cov['sample_id'] = cupy.repeat( cupy.arange(total_samples) + all_sample_ids.min(), len(mid)) output.update({self.COV_DF: df_cov}) if self.outport_connected(self.CORR_DF): dis_ma = distance.reshape(total_samples * num_months, -1) dis_ma = 1 - 2.0 * dis_ma df_corr = cudf.DataFrame(dis_ma) df_corr['year'] = cupy.concatenate([years] * total_samples).astype( cupy.int16) df_corr['month'] = cupy.concatenate( [months] * total_samples).astype(cupy.int16) df_corr['sample_id'] = cupy.repeat( cupy.arange(total_samples) + all_sample_ids.min(), len(mid)) output.update({self.CORR_DF: df_corr}) if self.outport_connected(self.DISTANCE_DF): df_dis = cudf.DataFrame( distance.reshape(total_samples * num_months, -1)) df_dis['year'] = cupy.concatenate([years] * total_samples).astype( cupy.int16) df_dis['month'] = cupy.concatenate( [months] * total_samples).astype(cupy.int16) df_dis['sample_id'] = cupy.repeat( cupy.arange(total_samples) + all_sample_ids.min(), len(mid)) output.update({self.DISTANCE_DF: df_dis}) return output