def find_direction(self, grad_diffs, steps, grad, hessian_diag, idxs): grad = grad.copy() # We will change this. n_current_factors = len(idxs) # TODO: find a good name for this variable. rho = scipy.empty(n_current_factors) # TODO: vectorize this function for i in idxs: rho[i] = 1 / scipy.inner(grad_diffs[i], steps[i]) # TODO: find a good name for this variable as well. alpha = scipy.empty(n_current_factors) for i in idxs[::-1]: alpha[i] = rho[i] * scipy.inner(steps[i], grad) grad -= alpha[i] * grad_diffs[i] z = hessian_diag * grad # TODO: find a good name for this variable (surprise!) beta = scipy.empty(n_current_factors) for i in idxs: beta[i] = rho[i] * scipy.inner(grad_diffs[i], z) z += steps[i] * (alpha[i] - beta[i]) return z, {}
def _create_block(self, block_size, order, dtype): matches_order = self.is_col_major == (order =="F") opposite_order = "C" if order == "F" else "F" if matches_order: return np.empty([len(self._row),block_size], dtype=dtype, order=order), order else: return np.empty([len(self._row),block_size], dtype=dtype, order=opposite_order), opposite_order
def _init_arrays(self): super(EvoMPS_TDVP_Generic, self)._init_arrays() #Make indicies correspond to the thesis self.K = sp.empty((self.N + 1), dtype=sp.ndarray) #Elements 1..N self.C = sp.empty((self.N), dtype=sp.ndarray) #Elements 1..N-1 for n in xrange(1, self.N + 1): self.K[n] = sp.zeros((self.D[n - 1], self.D[n - 1]), dtype=self.typ, order=self.odr) if n <= self.N - self.ham_sites + 1: ham_shape = [] for i in xrange(self.ham_sites): ham_shape.append(self.q[n + i]) C_shape = tuple(ham_shape + [self.D[n - 1], self.D[n - 1 + self.ham_sites]]) self.C[n] = sp.empty(C_shape, dtype=self.typ, order=self.odr) self.eta = sp.zeros((self.N + 1), dtype=self.typ) """The per-site contributions to the norm of the TDVP tangent vector (projection of the exact time evolution onto the MPS tangent plane. Only available after calling take_step().""" self.eta.fill(sp.NaN) self.h_expect = sp.zeros((self.N + 1), dtype=self.typ) """The local energy expectation values (of each Hamiltonian term), available after calling update() or calc_K().""" self.h_expect.fill(sp.NaN) self.H_expect = sp.NaN """The energy expectation value, available after calling update()
def deref_array(data, file): """Take an array of references and dereference them""" ret = sp.empty(shape=data.shape, dtype='object') if len(data.shape) > 1: for i in xrange(data.shape[0]): for j in xrange(data.shape[1]): ref = data[i, j] dref = h5py.h5r.dereference(ref, file._id) if isinstance(dref, h5py.h5g.GroupID): ret[i, j] = get_data(dref) else: ret[i, j] = sp.empty(dref.shape, dtype=dref.dtype) dref.read(h5py.h5s.ALL, h5py.h5s.ALL, ret[i, j]) ret[i, j] = ret[i, j].T if isinstance(ret[i, j], sp.ndarray): shp = ret[i, j].shape if len(shp) == 2 and isinstance(ret[i, j][0, 0], h5py.h5r.Reference): ret[i, j] = deref_array(ret[i, j], file) elif len(shp) == 1 and isinstance(ret[i, j][0], h5py.h5r.Reference): ret[i, j] = deref_array(ret[i, j], file) else: for i in xrange(data.shape[0]): ref = data[i] dref = h5py.h5r.dereference(ref, file._id) ret[i] = sp.empty(dref.shape, dtype=dref.dtype) dref.read(h5py.h5s.ALL, h5py.h5s.ALL, ret[i]) ret[i] = ret[i].T if isinstance(ret[i], sp.ndarray): shp = ret[i].shape if len(shp) == 2 and isinstance(ret[i][0, 0], h5py.h5r.Reference): ret[i] = deref_array(ret[i], file) elif len(shp) == 1 and isinstance(ret[i][0], h5py.h5r.Reference): ret[i] = deref_array(ret[i], file) return ret
def learn_gmm(self,x,y,tau=None): ''' Function that learns the GMM from training samples It is possible to add a regularizer term Sigma = Sigma + tau*I Input: x : the training samples y : the labels tau : the value of the regularizer, if tau = None (default) no regularization Output: the mean, covariance and proportion of each class ''' ## Get information from the data C = int(y.max(0)) # Number of classes n = x.shape[0] # Number of samples d = x.shape[1] # Number of variables ## Initialization self.ni = sp.empty((C,1)) # Vector of number of samples for each class self.prop = sp.empty((C,1)) # Vector of proportion self.mean = sp.empty((C,d)) # Vector of means self.cov = sp.empty((C,d,d)) # Matrix of covariance ## Learn the parameter of the model for each class for i in range(C): j = sp.where(y==(i+1))[0] self.ni[i] = float(j.size) self.prop[i] = self.ni[i]/n self.mean[i,:] = sp.mean(x[j,:],axis=0) self.cov[i,:,:] = sp.cov(x[j,:],bias=1,rowvar=0) # Normalize by ni to be consistent with the update formulae if tau is not None: self.tau = tau*sp.eye(d)
def crossValidate(y, X, K=None, folds=3, model=None, returnModel=False): errors = SP.empty(folds) n = y.shape[0] indexes = crossValidationScheme(folds,n) predictions = SP.empty(y.shape) alpha = [] alphas = [] msePath = [] for cvRun in SP.arange(len(indexes)): testIndexes = indexes[cvRun] yTrain = y[~testIndexes] XTrain = X[~testIndexes] if K == None: model.fit(XTrain, yTrain) prediction = SP.reshape(model.predict(X[testIndexes]), (-1,1)) else: # models having population structure KTrain = K[~testIndexes] KTrain = KTrain[:,~testIndexes] KTest=K[testIndexes] KTest=KTest[:,~testIndexes] model.reset() model.kernel = KTrain #TODO: make nice integration model.fit(XTrain, yTrain) prediction = SP.reshape(model.predict(X[testIndexes], k=KTest), (-1,1)) predictions[testIndexes] = prediction errors[cvRun] = predictionError(y[testIndexes], prediction) print(('prediction error right now is', errors[cvRun])) if returnModel: alpha.append(model.alpha) alphas.append(model.alphas) msePath.append(model.mse_path) if returnModel: return indexes, predictions, errors, alpha, alphas, msePath else: return indexes, predictions, errors
def setUp(self): # Make a positive definite noise matrix, clean map, and dirty_map. self.nra = 10 self.ndec = 5 self.nf = 20 self.shape = (self.nf, self.nra, self.ndec) self.size = self.nra * self.ndec * self.nf # Clean map. clean_map = sp.empty(self.shape, dtype=float) clean_map = al.make_vect(clean_map, axis_names=('freq', 'ra', 'dec')) clean_map[...] = sp.sin(sp.arange(self.nf))[:,None,None] clean_map *= sp.cos(sp.arange(self.nra))[:,None] clean_map *= sp.cos(sp.arange(self.ndec)) # Noise inverse matrix. noise_inv = sp.empty(self.shape * 2, dtype=float) noise_inv = al.make_mat(noise_inv, axis_names=('freq', 'ra', 'dec')*2, row_axes=(0, 1, 2), col_axes=(3, 4, 5)) rand_mat = rand.randn(*((self.size,) * 2)) information_factor = 1.e6 # K**-2 rand_mat = sp.dot(rand_mat, rand_mat.transpose()) * information_factor noise_inv.flat[...] = rand_mat.flat # Dirty map. dirty_map = al.partial_dot(noise_inv, clean_map) # Store in self. self.clean_map = clean_map self.noise_inv = noise_inv self.dirty_map = dirty_map
def globs(globs): # setup mock urllib2 module to avoid downloading from mldata.org mock_datasets = { 'mnist-original': { 'data': sp.empty((70000, 784)), 'label': sp.repeat(sp.arange(10, dtype='d'), 7000), }, 'iris': { 'data': sp.empty((150, 4)), }, 'datasets-uci-iris': { 'double0': sp.empty((150, 4)), 'class': sp.empty((150,)), }, } global custom_data_home custom_data_home = tempfile.mkdtemp() makedirs(join(custom_data_home, 'mldata')) globs['custom_data_home'] = custom_data_home global _urllib2_ref _urllib2_ref = datasets.mldata.urllib2 globs['_urllib2_ref'] = _urllib2_ref globs['mock_urllib2'] = mock_urllib2(mock_datasets) return globs
def estimateBeta(X,Y,K,C=None,addBiasTerm=False,numintervals0=100,ldeltamin0=-5.0,ldeltamax0=5.0): """ compute all pvalues If numintervalsAlt==0 use EMMA-X trick (keep delta fixed over alternative models) """ n,s=X.shape; n_pheno=Y.shape[1]; S,U=LA.eigh(K); UY=SP.dot(U.T,Y); UX=SP.dot(U.T,X); if (C==None): Ucovariate=SP.dot(U.T,SP.ones([n,1])); else: if (addBiasTerm): C_=SP.concatenate((C,SP.ones([n,1])),axis=1) Ucovariate=SP.dot(U.T,C_); else: Ucovariate=SP.dot(U.T,C); n_covar=Ucovariate.shape[1]; beta = SP.empty((n_pheno,s,n_covar+1)); LL=SP.ones((n_pheno,s))*(-SP.inf); ldelta=SP.empty((n_pheno,s)); sigg2=SP.empty((n_pheno,s)); pval=SP.ones((n_pheno,s))*(-SP.inf); for phen in SP.arange(n_pheno): UY_=UY[:,phen]; ldelta[phen]=optdelta(UY_,Ucovariate,S,ldeltanull=None,numintervals=numintervals0,ldeltamin=ldeltamin0,ldeltamax=ldeltamax0); for snp in SP.arange(s): UX_=SP.hstack((UX[:,snp:snp+1],Ucovariate)); nLL_, beta_, sigg2_=nLLeval(ldelta[phen,snp],UY_,UX_,S,MLparams=True); beta[phen,snp,:]=beta_; sigg2[phen,snp]=sigg2_; LL[phen,snp]=-nLL_; return beta, ldelta
def plot_optimal_tau_for_mean_uncertainty_reduction( results_for_exp, results_for_exp_inftau): """ Plot the optimal tau for the mean of uncertainty reduction. :param results_for_exp: The results of one experiment as 4-D array of the shape (metrics, z-values, tau-values, experimental repetitions). :type results_for_exp: 4-D array :param result_list_inftau: The results of one experiment for `tau = inf` as 3-D array of the shape (metrics, z-values, experimental repetitions). :type results_for_exp_inftau: 3-D array. """ values = sp.empty((results_for_exp.shape[0], results_for_exp.shape[1])) err = sp.empty((results_for_exp.shape[0], results_for_exp.shape[1], 2, 1)) mark = sp.empty((results_for_exp.shape[0], results_for_exp.shape[1])) for m, metric in enumerate(cfg['metrics']): for z in xrange(len(cfg['zs'])): r = sp.mean(results_for_exp[m, z], axis=1) mark[m, z] = r.max() values[m, z] = sp.mean(cfg['time_scales'][r == r.max()]).magnitude r = cfg['time_scales'][r > 0.8 * r.max()] err[m, z, 0] = values[m, z] - min(r).magnitude err[m, z, 1] = max(r).magnitude + values[m, z] plot_param_per_metric_and_z(values, err) plot_bool_indicator_per_metric_and_z( sp.mean(results_for_exp_inftau, axis=2) >= mark)
def v1like_filter(hin, conv_mode, filterbank, use_cache=False): """ V1LIKE linear filtering Perform separable convolutions on an image with a set of filters Inputs: hin -- input image (a 2-dimensional array) filterbank -- TODO list of tuples with 1d filters (row, col) used to perform separable convolution use_cache -- Boolean, use internal fft_cache (works _well_ if the input shapes don't vary much, otherwise you'll blow away the memory) Outputs: hout -- a 3-dimensional array with outputs of the filters (width X height X n_filters) """ nfilters = len(filterbank) filt0 = filterbank[0] fft_shape = N.array(hin.shape) + N.array(filt0.shape) - 1 hin_fft = scipy.signal.fftn(hin, fft_shape) if conv_mode == "valid": hout_shape = list( N.array(hin.shape[:2]) - N.array(filt0.shape[:2]) + 1 ) + [nfilters] hout_new = N.empty(hout_shape, 'f') begy = filt0.shape[0] endy = begy + hout_shape[0] begx = filt0.shape[1] endx = begx + hout_shape[1] elif conv_mode == "same": hout_shape = hin.shape[:2] + (nfilters,) hout_new = N.empty(hout_shape, 'f') begy = filt0.shape[0] / 2 endy = begy + hout_shape[0] begx = filt0.shape[1] / 2 endx = begx + hout_shape[1] else: raise NotImplementedError for i in xrange(nfilters): filt = filterbank[i] if use_cache: key = (filt.tostring(), tuple(fft_shape)) if key in fft_cache: filt_fft = fft_cache[key] else: filt_fft = scipy.signal.fftn(filt, fft_shape) fft_cache[key] = filt_fft else: filt_fft = scipy.signal.fftn(filt, fft_shape) res_fft = scipy.signal.ifftn(hin_fft*filt_fft) res_fft = res_fft[begy:endy, begx:endx] hout_new[:,:,i] = N.real(res_fft) hout = hout_new return hout
def max_filter_bord(im, size=3): """The function performs a local max filter on a flat image. Border's pixels are processed. Args: im: the image to process size: the size in pixels of the local square window. Default value is 3. Returns: out: the filtered image """ ## Get the size of the image [nl, nc, d] = im.shape ## Get the size of the moving window s = (size - 1) / 2 ## Initialization of the output out = sp.empty((nl, nc, d), dtype=im.dtype.name) temp = sp.empty((nl + 2 * s, nc + 2 * s, d), dtype=im.dtype.name) # A temporary file is created temp[0:s, :, :] = sp.NaN temp[:, 0:s, :] = sp.NaN temp[-s:, :, :] = sp.NaN temp[:, -s:, :] = sp.NaN temp[s : s + nl, s:nc, :] = im ## Apply the max filter for i in range(s, nl + s): # Shift the origin to remove border effect for j in range(s, nc + s): for k in range(d): out[i - s, j - s, k] = sp.nanmax(temp[i - s : i + 1 + s, j - s : j + s + 1, k]) return out.astype(im.dtype.name)
def block_structure5(T): """ computes the block structure of the upper quasi-triangular matrix T m is the number of diagonal blocks bb is the array containing the begin of each block eb is the array containing the end of each block + 1 s is an array containing the sizes of the diagonal blocks """ n = len(T) tol = 1e-15 i,j = 0,0 bb = sp.empty(n,dtype="int") eb = sp.empty(n,dtype="int") s = sp.empty(n,dtype="int") while i < n-1: bb[j] = i if abs(T[i+1,i])<tol: i +=1 s[j] = 1 eb[j] = i else: i +=2 s[j] = 2 eb[j] = i j += 1 if i == n-1: bb[j],eb[j] = i,i+1 s[j] = 1 j+=1 bb = bb[0:j] eb = eb[0:j] s = s[0:j] return j, bb, eb, s
def rebin(Data, n_bins_combined) : """The function that acctually does the rebinning on a Data Block.""" nt = Data.data.shape[0] new_nt = nt // n_bins_combined new_shape = (new_nt,) + Data.data.shape[1:] unmask = sp.logical_not(ma.getmaskarray(Data.data)) data = Data.data.filled(0) # Allowcate memeory for the rebinned data. new_data = ma.zeros(new_shape, dtype=data.dtype) counts = sp.zeros(new_shape, dtype=int) # Add up the bins to be combined. for ii in range(n_bins_combined): new_data += data[ii:new_nt * n_bins_combined:n_bins_combined,...] counts += unmask[ii:new_nt * n_bins_combined:n_bins_combined,...] new_data[counts == 0] = ma.masked counts[counts == 0] = 1 new_data /= counts Data.set_data(new_data) # Now deal with all the other records that aren't the main data. for field_name in Data.field.iterkeys(): # DATE-OBS is a string field so we have to write special code for it. if field_name == "DATE-OBS": time_field = Data.field[field_name] new_field = sp.empty(new_nt, dtype=Data.field[field_name].dtype) # Convert to float, average, then convert back to a string. time_float = utils.time2float(time_field) for ii in range(new_nt): tmp_time = sp.mean(time_float[n_bins_combined * ii : n_bins_combined * (ii + 1)]) new_field[ii] = utils.float2time(tmp_time) Data.set_field(field_name, new_field, axis_names=Data.field_axes[field_name], format=Data.field_formats[field_name]) continue # Only change fields that have a 'time' axis. try: time_axis = list(Data.field_axes[field_name]).index('time') except ValueError: continue # For now, the time axis has to be the first axis. if time_axis != 0: msg = "Expected time to be the first axis for all fields." raise NotImplementedError(msg) field_data = Data.field[field_name] if not field_data.dtype.name == "float64": msg = "Field data type is not float. Handle explicitly." raise NotImplementedError(msg) new_field = sp.empty(field_data.shape[:time_axis] + (new_nt,) + field_data.shape[time_axis + 1:], dtype=field_data.dtype) for ii in range(new_nt): tmp_data = sp.sum(field_data[n_bins_combined * ii :n_bins_combined * (ii + 1),...], 0) tmp_data /= n_bins_combined new_field[ii,...] = tmp_data Data.set_field(field_name, new_field, axis_names=Data.field_axes[field_name], format=Data.field_formats[field_name])
def create_block(self, blocksize, dtype, order): N_original = len(self.original_iids) #similar code else where -- make a method matches_order = self.is_snp_major == (order =="F") #similar code else where -- make a method opposite_order = "C" if order == "F" else "F"#similar code else where -- make a method if matches_order: return sp.empty([N_original,blocksize], dtype=dtype, order=order) else: return sp.empty([N_original,blocksize], dtype=dtype, order=opposite_order)
def calc_BHB_prereq(self, tdvp, tdvp2): """Calculates prerequisites for the application of the effective Hamiltonian in terms of tangent vectors. This is called (indirectly) by the self.excite.. functions. Parameters ---------- tdvp2: EvoMPS_TDVP_Uniform Second state (may be the same, or another ground state). Returns ------- A lot of stuff. """ l = tdvp.l[0] r_ = tdvp2.r[0] r__sqrt = tdvp2.r_sqrt[0] r__sqrt_i = tdvp2.r_sqrt_i[0] A = tdvp.A[0] A_ = tdvp2.A[0] #Note: V has ~ D**2 * q**2 elements. We avoid making any copies of it except this one. # This one is only needed because low-level routines force V_[s] to be contiguous. # TODO: Store V instead of Vsh in tdvp_uniform too... V_ = sp.transpose(tdvp2.Vsh[0], axes=(0, 2, 1)).conj().copy(order='C') if self.ham_sites == 2: #eyeham = m.eyemat(self.q, dtype=sp.complex128) eyeham = sp.eye(self.q, dtype=sp.complex128) #diham = m.simple_diag_matrix(sp.repeat([-tdvp.h_expect.real], self.q)) diham = -tdvp.h_expect.real * sp.eye(self.q, dtype=sp.complex128) _ham_tp = self.ham_tp + [[diham, eyeham]] #subtract norm dof Ao1 = get_Aop(A, _ham_tp, 2, conj=False) AhlAo1 = [tm.eps_l_op_1s(l, A, A, o1.conj().T) for o1, o2 in _ham_tp] A_o2c = get_Aop(A_, _ham_tp, 1, conj=True) Ao1c = get_Aop(A, _ham_tp, 0, conj=True) A_Vr_ho2 = [tm.eps_r_op_1s(r__sqrt, A_, V_, o2) for o1, o2 in _ham_tp] A_A_o12c = get_A_ops(A_, A_, _ham_tp, conj=True) A_o1 = get_Aop(A_, _ham_tp, 2, conj=False) tmp = sp.empty((A_.shape[1], V_.shape[1]), dtype=A.dtype, order='C') tmp2 = sp.empty((A_.shape[1], A_o2c[0].shape[1]), dtype=A.dtype, order='C') rhs10 = 0 for al in range(len(A_o1)): tmp2 = tm.eps_r_noop_inplace(r_, A_, A_o2c[al], tmp2) tmp3 = m.mmul(tmp2, r__sqrt_i) rhs10 += tm.eps_r_noop_inplace(tmp3, A_o1[al], V_, tmp) return V_, AhlAo1, A_o2c, Ao1, Ao1c, A_Vr_ho2, A_A_o12c, rhs10, _ham_tp elif self.ham_sites == 3: return
def kalman_upd(beta, V, y, X, s, S, switch = 0, D = None, d = None, G = None, a = None, b = None): r""" This is the update step of kalman filter. .. math:: :nowrap: \begin{eqnarray*} e_t &=& y_t - X_t \beta_{t|t-1} \\ K_t &=& V_{t|t-1} X_t^T (\sigma + X_t V_{t|t-1} X_t )^{-1}\\ \beta_{t|t} &=& \beta_{t|t-1} + K_t e_t\\ V_{t|t} &=& (I - K_t X_t^T) V_{t|t-1}\\ \end{eqnarray*} """ e = y - X * beta K = V * X.T * ( s + X * V * X.T).I beta = beta + K * e if switch == 1: D = scipy.matrix(D) d = scipy.matrix(d) if DEBUG: print "beta: ", beta beta = beta - S * D.T * ( D * S * D.T).I * ( D * beta - d) if DEBUG: print "beta: ", beta elif switch == 2: G = scipy.matrix(G) a = scipy.matrix(a) b = scipy.matrix(b) n = len(beta) P = 2* V.I q = -2 * V.I.T * beta bigG = scipy.empty((2*n, n)) h = scipy.empty((2*n, 1)) bigG[:n, :] = -G bigG[n:, :] = G h[:n, :] = -a h[n:, :] = b paraset = map(cvxopt.matrix, (P, q, bigG, h, D, d)) beta = qp(*paraset)['x'] temp = K*X V = (scipy.identity(temp.shape[0]) - temp) * V return (beta, V, e, K)
def _init_arrays(self): self.D = sp.repeat(self.u_gnd_l.D, self.N + 2) self.q = sp.repeat(self.u_gnd_l.q, self.N + 2) #Make indicies correspond to the thesis #Deliberately add a None to the end to catch [-1] indexing! self.K = sp.empty((self.N + 3), dtype=sp.ndarray) #Elements 1..N self.C = sp.empty((self.N + 2), dtype=sp.ndarray) #Elements 1..N-1 self.A = sp.empty((self.N + 3), dtype=sp.ndarray) #Elements 1..N self.r = sp.empty((self.N + 3), dtype=sp.ndarray) #Elements 0..N self.l = sp.empty((self.N + 3), dtype=sp.ndarray) self.eta = sp.zeros((self.N + 1), dtype=self.typ) if (self.D.ndim != 1) or (self.q.ndim != 1): raise NameError('D and q must be 1-dimensional!') #Don't do anything pointless self.D[0] = self.u_gnd_l.D self.D[self.N + 1] = self.u_gnd_l.D self.l[0] = sp.zeros((self.D[0], self.D[0]), dtype=self.typ, order=self.odr) self.r[0] = sp.zeros((self.D[0], self.D[0]), dtype=self.typ, order=self.odr) self.K[0] = sp.zeros((self.D[0], self.D[0]), dtype=self.typ, order=self.odr) self.C[0] = sp.empty((self.q[0], self.q[1], self.D[0], self.D[1]), dtype=self.typ, order=self.odr) self.A[0] = sp.empty((self.q[0], self.D[0], self.D[0]), dtype=self.typ, order=self.odr) for n in xrange(1, self.N + 2): self.K[n] = sp.zeros((self.D[n-1], self.D[n-1]), dtype=self.typ, order=self.odr) self.r[n] = sp.zeros((self.D[n], self.D[n]), dtype=self.typ, order=self.odr) self.l[n] = sp.zeros((self.D[n], self.D[n]), dtype=self.typ, order=self.odr) self.A[n] = sp.empty((self.q[n], self.D[n-1], self.D[n]), dtype=self.typ, order=self.odr) if n < self.N + 1: self.C[n] = sp.empty((self.q[n], self.q[n+1], self.D[n-1], self.D[n+1]), dtype=self.typ, order=self.odr)
def predict_gmm(self, testSamples, featIdx=None, tau=0): """ Function that predict the label for testSamples using the learned model Inputs: testSamples: the samples to be classified featIdx: indices of features to use for classification tau: regularization parameter Outputs: predLabels: the class scores: the decision value for each class """ # Get information from the data nbTestSpl = testSamples.shape[0] # Number of testing samples # Initialization scores = sp.empty((nbTestSpl,self.C)) # If not specified, predict with all features if featIdx is None: idx = range(testSamples.shape[1]) else: idx = list(featIdx) # Allocate storage for decomposition in eigenvalues if self.idxDecomp != idx: self.vp = sp.empty((self.C,len(idx))) # array of eigenvalues self.Q = sp.empty((self.C,len(idx),len(idx))) # array of eigenvectors flagDecomp = True else: flagDecomp = False # Start the prediction for each class for c in xrange(self.C): testSamples_c = testSamples[:,idx] - self.mean[c,idx] if flagDecomp: self.vp[c,:],self.Q[c,:,:],_ = self.decomposition(self.cov[c,idx,:][:,idx]) regvp = self.vp[c,:] + tau logdet = sp.sum(sp.log(regvp)) cst = logdet - 2*sp.log(self.prop[c]) # Pre compute the constant term # compute ||lambda^{-0.5}q^T(x-mu)||^2 + cst for all samples scores[:,c] = sp.sum( sp.square( sp.dot( (self.Q[c,:,:][:,:]/sp.sqrt(regvp)).T, testSamples_c.T ) ), axis=0 ) + cst del testSamples_c self.idxDecomp = idx # Assign the label to the minimum value of scores predLabels = sp.argmin(scores,1)+1 return predLabels,scores
def searchMLEhyp(X,Y,S,D,lb,ub, ki, mx=5000,fg=-1e9): libGP.SetHypSearchPara(cint(mx),ct.c_double(fg)) ns=X.shape[0] dim = X.shape[1] Dx = [0 if sp.isnan(x[0]) else int(sum([8**i for i in x])) for x in D] hy = sp.empty(libGP.numhyp(cint(ki),cint(dim))) lk = sp.empty(1) r = libGP.HypSearchMLE(cint(dim),cint(len(Dx)),X.ctypes.data_as(ctpd),Y.ctypes.data_as(ctpd),S.ctypes.data_as(ctpd),(cint*len(Dx))(*Dx),lb.ctypes.data_as(ctpd),ub.ctypes.data_as(ctpd),cint(ki), hy.ctypes.data_as(ctpd),lk.ctypes.data_as(ctpd)) return hy
def searchMAPhyp(X,Y,S,D,m,s, ki, MAPmargin = 1.8, mx=5000,fg=-1e9): libGP.SetHypSearchPara(cint(mx),ct.c_double(fg)) ns=X.shape[0] dim = X.shape[1] Dx = [0 if sp.isnan(x[0]) else int(sum([8**i for i in x])) for x in D] hy = sp.empty(libGP.numhyp(cint(ki),cint(dim))) lk = sp.empty(1) print "datasetsize = "+str(ns) r = libGP.HypSearchMAP(cint(dim),cint(len(Dx)),X.ctypes.data_as(ctpd),Y.ctypes.data_as(ctpd),S.ctypes.data_as(ctpd),(cint*len(Dx))(*Dx),m.ctypes.data_as(ctpd),s.ctypes.data_as(ctpd),ct.c_double(MAPmargin),cint(ki), hy.ctypes.data_as(ctpd),lk.ctypes.data_as(ctpd)) #print "yyy" return hy
def __init__(self,size=None,d=None): if size is None: self.ni = [] self.prop = [] self.mean = [] self.cov =[] self.tau = None self.ids = None else: self.ni = sp.empty((size,1)) # Vector of number of samples for each class self.prop = sp.empty((size,1)) # Vector of proportion self.mean = sp.empty((size,d)) # Vector of means self.cov = sp.empty((size,d,d)) # Matrix of covariance
def __init__(self, forest, subsample): ''' Constructor ''' self.forest = forest if self.forest is not None: self.verbose = self.forest.verbose else: self.verbose = 0 self.max_depth = 0 # Estimate the potential number of nodes self.subsample = subsample self.subsample_bin = SP.zeros(self.forest.n, dtype='bool') self.subsample_bin[self.subsample] = True self.oob = SP.arange(self.forest.n)[~self.subsample_bin] nr_nodes = 4*subsample.size self.nodes = SP.zeros(nr_nodes, dtype='int') self.best_predictor = SP.empty(nr_nodes, dtype='int') self.start_index = SP.empty(nr_nodes, dtype='int') self.end_index = SP.empty(nr_nodes, dtype='int') self.left_child = SP.zeros(nr_nodes, dtype='int') self.right_child = SP.zeros(nr_nodes, dtype='int') self.parent = SP.empty(nr_nodes, dtype='int') self.mean = SP.zeros(nr_nodes) # Initialize root node self.node_ind = 0 self.nodes[self.node_ind] = 0 self.start_index[self.node_ind] = 0 self.end_index[self.node_ind] = subsample.size self.num_nodes = 1 self.num_leafs = 0 self.s = SP.ones_like(self.nodes)*float('inf') kernel = self.get_kernel() if not(self.forest.optimize_memory_use): self.X = self.forest.X[subsample] if self.verbose > 1: print('compute tree wise singular value decomposition') self.S, self.U = LA.eigh(kernel + SP.eye(subsample.size)*1e-8) self.Uy = SP.dot(self.U.T, self.forest.y[subsample]) if self.verbose > 1: print('compute tree wise bias') self.mean[0] = SC.estimate_bias(self.Uy, self.U, self.S, SP.log(self.forest.delta)) self.sample = SP.arange(subsample.size) ck = self.get_cross_kernel(self.oob, self.subsample) self.cross_core = SP.dot(ck, LA.inv(kernel + SP.eye(self.subsample.size) * self.forest.delta)) if self.verbose > 1: print('done initializing tree')
def compute_metric_gmm(direction, criterion, variables, model_cv, samples, labels, idx): """ Function that computes the accuracy of the model_cv using the variables : idx +/- one of variables Inputs: direction: 'backward' or 'forward' or 'SFFS' criterion: criterion function use to discriminate variables variables: the variable to add or delete to idx model_cv: the model build with all the variables samples,labels: the samples/label for testing idx: the pool of retained variables Output: metric: the estimated metric Used in GMM.forward_selection(), GMM.backward_selection() """ # Initialization metric = sp.zeros(variables.size) confMatrix = ConfusionMatrix() # Compute inv of covariance matrix if len(idx)==0: logdet = None Qs = None scores_t = None else: logdet = sp.empty((model_cv.C)) Qs = sp.empty((model_cv.C,len(idx),len(idx))) scores_t = sp.empty((model_cv.C,samples.shape[0])) for c in xrange(model_cv.C): # Here -> store Qs and scores_t vp,Q,rcond = model_cv.decomposition(model_cv.cov[c,idx,:][:,idx]) Qs[c,:,:] = Q/sp.sqrt(vp) logdet[c] = sp.sum(sp.log(vp)) # Pre compute score testSamples_c = samples[:,idx] - model_cv.mean[c,idx] temp = dgemm(1.,Qs[c,:,:].T,testSamples_c,trans_b=True) scores_t[c,:] = sp.sum(temp**2,axis=0) for i,var in enumerate(variables): predLabels = model_cv.predict_gmm_update(direction,samples,logdet,(i,var),Qs,scores_t,featIdx=idx)[0] confMatrix.compute_confusion_matrix(predLabels,labels) if criterion=='accuracy': metric[i] = confMatrix.get_OA() elif criterion=='F1Mean': metric[i] = confMatrix.get_F1Mean() elif criterion=='kappa': metric[i] = confMatrix.get_kappa() return metric
def simulate_genotypes_w_ld(n_sample=100, m=50000, conseq_r2=0.9, m_ld_chunk_size=100, diploid=False, verbose=False): """ Simulates genotype regions, according to consecutive simulation scheme, and estimated the D matrix. """ if verbose: print 'Simulating genotypes for %d individuals and %d markers' % (n_sample, m) if diploid: print 'Simulating diploid dosages {0,1,2}' snps = sp.zeros((m,2*n_sample),dtype='single') assert m%m_ld_chunk_size==0,'WTF?' num_chunks = m/m_ld_chunk_size for chunk_i in range(num_chunks): # Generating correlated training genotypes X = sp.empty((m_ld_chunk_size, 2*n_sample)) X[0] = stats.norm.rvs(size=2*n_sample) for j in range(1, m_ld_chunk_size): X[j] = sp.sqrt(conseq_r2) * X[j - 1] + sp.sqrt(1 - conseq_r2) * stats.norm.rvs(size=2*n_sample) start_i = chunk_i*m_ld_chunk_size stop_i = start_i+m_ld_chunk_size snps[start_i:stop_i]=X snps_means = sp.median(snps,axis=1) snps_means.shape = (m,1) bin_snps = sp.array(snps>snps_means,dtype='int8') snps = sp.array(bin_snps[:,:n_sample] + bin_snps[:,n_sample:],dtype='int8') else: snps = sp.zeros((m,n_sample),dtype='single') assert m%m_ld_chunk_size==0,'WTF?' num_chunks = m/m_ld_chunk_size for chunk_i in range(num_chunks): # Generating correlated training genotypes X = sp.empty((m_ld_chunk_size, n_sample)) X[0] = stats.norm.rvs(size=n_sample) for j in range(1, m_ld_chunk_size): X[j] = sp.sqrt(conseq_r2) * X[j - 1] + sp.sqrt(1 - conseq_r2) * stats.norm.rvs(size=n_sample) start_i = chunk_i*m_ld_chunk_size stop_i = start_i+m_ld_chunk_size snps[start_i:stop_i]=X #Normalize SNPs snps_means = sp.mean(snps,axis=1) snps_stds = sp.std(snps,axis=1) snps_means.shape = (m,1) snps_stds.shape = (m,1) snps = (snps-snps_means)/snps_stds return snps
def expectation_prop_inner(m0,V0,Y,Z,F,z,needed): #expectation propagation on multivariate gaussian for soft inequality constraint #m0,v0 are mean vector , covariance before EP #Y is inequality value, Z is sign, 1 for geq, -1 for leq, F is softness variance #z is number of ep rounds to run #returns mt, Vt the value and variance for observations created by ep m0=sp.array(m0).flatten() V0=sp.array(V0) n = V0.shape[0] print "expectation prpagation running on "+str(n)+" dimensions for "+str(z)+" loops:" mt =sp.zeros(n) Vt= sp.eye(n)*float(1e10) m = sp.empty(n) V = sp.empty([n,n]) conv = sp.empty(z) for i in xrange(z): #compute the m V give ep obs m,V = gaussian_fusion(m0,mt,V0,Vt) mtprev=mt.copy() Vtprev=Vt.copy() for j in [k for k in xrange(n) if needed[k]]: print [i,j] #the cavity dist at index j tmp = 1./(Vt[j,j]-V[j,j]) v_ = (V[j,j]*Vt[j,j])*tmp m_ = tmp*(m[j]*Vt[j, j]-mt[j]*V[j, j]) alpha = sp.sign(Z[j])*(m_-Y[j]) / (sp.sqrt(v_+F[j])) pr = PhiR(alpha) if sp.isnan(pr): pr = -alpha beta = pr*(pr+alpha)/(v_+F[j]) kappa = sp.sign(Z[j])*(pr+alpha) / (sp.sqrt(v_+F[j])) #print [alpha,beta,kappa,pr] mt[j] = m_+1./kappa #mt[j] = min(abs(mt[j]),1e5)*sp.sign(mt[j]) Vt[j,j] = min(1e10,1./beta - v_) #print sp.amax(mtprev-mt) #print sp.amax(sp.diagonal(Vtprev)-sp.diagonal(Vt)) #TODO make this a ratio instead of absolute delta = max(sp.amax(mtprev-mt),sp.amax(sp.diagonal(Vtprev)-sp.diagonal(Vt))) conv[i]=delta print "EP finished with final max deltas "+str(conv[-3:]) V = V0.dot(spl.solve(V0+Vt,Vt)) m = V.dot((spl.solve(V0,m0)+spl.solve(Vt,mt)).T) return mt, Vt
def _init_arrays(self): #Deliberately add a None to the end to catch [-1] indexing! self.A = sp.empty((self.N + 3), dtype=sp.ndarray) #Elements 1..N self.r = sp.empty((self.N + 3), dtype=sp.ndarray) #Elements 0..N self.l = sp.empty((self.N + 3), dtype=sp.ndarray) self.l[0] = sp.zeros((self.D[0], self.D[0]), dtype=self.typ, order=self.odr) self.r[0] = sp.zeros((self.D[0], self.D[0]), dtype=self.typ, order=self.odr) self.A[0] = sp.empty((self.q[0], self.D[0], self.D[0]), dtype=self.typ, order=self.odr) for n in xrange(1, self.N + 2): self.r[n] = sp.zeros((self.D[n], self.D[n]), dtype=self.typ, order=self.odr) self.l[n] = sp.zeros((self.D[n], self.D[n]), dtype=self.typ, order=self.odr) self.A[n] = sp.empty((self.q[n], self.D[n-1], self.D[n]), dtype=self.typ, order=self.odr)
def icregression(X, y, W, D, d, G, a, b, n): r""" This return the estimated weight on the following regression problem .. math:: y = X \beta constained to .. math:: :nowrap: \begin{eqnarray*} D \beta &=& d\\ a \leq G &\beta & \leq b \end{eqnarray*} This problem is translated nicely to quadratic programming problem. CVXOPT package is used to as the quadratic solver engine. :param X: Independent variable :type X: scipy.matrix<float> :param y: Dependent variable :type y: scipy.matrix<float> :param W: Weight matrix :type W: scipy.matrix<float> :param D: Equality constraint matrix :type D: scipy.matrix<float> :param d: Equality constraint vector :type d: scipy.matrix<float> :param G: Inequality constraint matrix :type G: scipy.matrix<float> :param a b: Lower and upper bound of the inequality constraints :type a b: scipy.matrix<float> :return: :math:`\hat{\beta}` :rtype: scipy.matrix<float> """ P = 2*X.T * W * X q = -2*X.T * W * y bigG = scipy.empty((2*n, n)) h = scipy.empty((2*n, 1)) bigG[:n, :] = -G bigG[n:, :] = G h[:n, :] = -a h[n:, :] = b paraset = map(cvxopt.matrix, (P, q, bigG, h, D, d)) return qp(*paraset)['x']
def _init_arrays(self): self.A = sp.empty((self.N + 1), dtype=sp.ndarray) #Elements 1..N self.r = sp.empty((self.N + 1), dtype=sp.ndarray) #Elements 0..N self.l = sp.empty((self.N + 1), dtype=sp.ndarray) self.r[0] = sp.zeros((self.D[0], self.D[0]), dtype=self.typ, order=self.odr) self.l[0] = m.eyemat(self.D[0], dtype=self.typ) for n in xrange(1, self.N + 1): self.r[n] = sp.zeros((self.D[n], self.D[n]), dtype=self.typ, order=self.odr) self.l[n] = sp.zeros((self.D[n], self.D[n]), dtype=self.typ, order=self.odr) self.A[n] = sp.zeros((self.q[n], self.D[n - 1], self.D[n]), dtype=self.typ, order=self.odr) sp.fill_diagonal(self.r[self.N], 1.)
def _init_arrays(self): self.A = sp.empty((self.N + 1), dtype=sp.ndarray) #Elements 1..N self.r = sp.empty((self.N + 1), dtype=sp.ndarray) #Elements 0..N self.l = sp.empty((self.N + 1), dtype=sp.ndarray) self.r[0] = sp.zeros((self.D[0], self.D[0]), dtype=self.typ, order=self.odr) self.l[0] = sp.eye(self.D[0], self.D[0], dtype=self.typ).copy(order=self.odr) #Already set the 0th element (not a dummy) for n in xrange(1, self.N + 1): self.r[n] = sp.zeros((self.D[n], self.D[n]), dtype=self.typ, order=self.odr) self.l[n] = sp.zeros((self.D[n], self.D[n]), dtype=self.typ, order=self.odr) self.A[n] = sp.empty((self.q[n], self.D[n - 1], self.D[n]), dtype=self.typ, order=self.odr) sp.fill_diagonal(self.r[self.N], 1.)
def train(self): date = self.respond.rheader it = enumerate(zip(headRollingWindows(date, self.window, self.window-1), headRollingWindows(self.respond.data, self.window, self.window-1), headRollingWindows(self.regressors.data, self.window, self.window-1))) beta = scipy.empty((self.t,self.n)) for i,(d,y,X) in it: b = regression(X,y,self.W) beta[i,:] = b.T self.est = TimeSeriesFrame(beta, self.regressors.rheader, self.regressors.cheader) return self
def _add_single_block(self, Block) : """Adds all the data in a DataBlock Object to the Writer such that it can be written to a fits file eventually.""" Block.verify() # Merge the histories if self.first_block_added : self.history = db.History(Block.history) else : self.history = db.merge_histories(self.history, Block) # Some dimensioning and such dims = tuple(Block.dims) n_records = dims[0]*dims[1]*dims[2] block_shape = dims[0:-1] # For now automatically determine the format for the data field. data_format = str(dims[-1]) + 'E' if self.first_block_added : self.data_format = data_format elif self.data_format != data_format : raise ce.DataError('Data shape miss match: freq axis must be same' ' length for all DataBlocks added to Wirter.') # Copy the reshaped data from the DataBlock data = sp.array(ma.filled(Block.data, float('nan'))) if self.first_block_added : self.data = data.reshape((n_records, dims[3])) else : self.data = sp.concatenate((self.data, data.reshape(( n_records, dims[3]))), axis=0) # Now get all stored fields for writing out. for field, axes in Block.field_axes.iteritems() : # Need to expand the field data to the full ntimes x npol x ncal # length (with lots of repitition). We will use np broadcasting. broadcast_shape = [1,1,1] for axis in axes : axis_ind = list(Block.axes).index(axis) broadcast_shape[axis_ind] = dims[axis_ind] # Allowcate memory for the new full field. data_type = Block.field[field].dtype field_data = sp.empty(block_shape, dtype=data_type) # Copy data with the entries, expanding dummy axes. field_data[:,:,:] = sp.reshape(Block.field[field], broadcast_shape) if self.first_block_added : self.field[field] = field_data.reshape(n_records) self.formats[field] = Block.field_formats[field] else : self.field[field] = sp.concatenate((self.field[field], field_data.reshape(n_records)), axis=0) if self.formats[field] != Block.field_formats[field] : raise ce.DataError('Format miss match in added data blocks' ' and field: ' + field) self.first_block_added = False
def infer_EI(self,X,D,fixI=False,I=0.): m,v = self.infer_diag(X,D) if not fixI: I=np.infty for i in range(len(self.Y)): if sum(X[i,self.d:])==0: I = min(I,self.Y[i,0]) E = sp.empty([self.size,X.shape[0]]) for i in range(self.size): E[i,:] = EI(m[i,:],sp.sqrt(v[i,:]),I) return E
def draw(m_, V_, z): raise NotImplementedError ns = V_.shape[0] m = sp.array([[i for i in (m_)]]) V = copy.copy(V_) R = sp.empty([ns, z]) libGP.drawk(V.ctypes.data_as(ctpd), cint(ns), R.ctypes.data_as(ctpd), cint(z)) R += sp.hstack([m.T] * z) #R=sp.random.multivariate_normal(m.flatten(),V,z) return copy.copy(R).T
def hist2d(s1, s2, res=(100,100)): h2d = sp.histogram2d if len(s1.shape) == 1: h, xe, ye = h2d(s1, s2, normed=True, bins=res) xm, ym = 0.5 * (xe[:-1] + xe[1:]), 0.5 * (ye[:-1] + ye[1:]) int_wd = sp.diff(xe) else: N = len(s1) h = sp.empty((res[0], res[1], N)) xm, ym = sp.empty((N, res[0])), sp.empty((N, res[1])) int_wd = sp.empty((N, res[0])) print 'generating 2-D histogram...' pbar = ProgressBar(maxval=N).start() for i in range(N): h_, xe, ye = h2d(s1[i], s2[i], normed=True, bins=res) h[:, :, i], xm[i, :], ym[i, :] = h_, mid_pt(xe), mid_pt(ye) int_wd[i, :] = sp.diff(xe) pbar.update(i + 1) pbar.finish() return h, xm, ym, int_wd
def evalfREML(logDelta,MCtrials,hf,Y,beta_rand,e_rand_unscaled, chunk_size=3000): (N,M)= hf['X'].shape delta = sp.exp(logDelta, dtype= "single") y_rand = sp.empty((N,MCtrials), dtype= "single") H_inv_y_rand = sp.empty((N,MCtrials), dtype= "single") beta_hat_rand = sp.zeros((M,MCtrials), dtype= "single") e_hat_rand = sp.empty((N,MCtrials), dtype= "single") ## Defining the initial vector x0 x0 = sp.zeros(N, dtype= "single") for t in range(0,MCtrials): Xbeta = sp.empty(N, dtype = "single") ## build random phenotypes using pre-generated components for chunk in range(0,N,chunk_size): X_chunk = sp.array(hf['X'][chunk:chunk+chunk_size], dtype="single") Xbeta[chunk:chunk+X_chunk.shape[0]]= sp.dot(X_chunk, beta_rand[:,t]) print("First chunk") ############################################################# y_rand[:,t] = Xbeta+sp.sqrt(delta)*e_rand_unscaled[:,t] ## compute H^(-1)%*%y.rand[,t] by the aid of conjugate gradient iteration H_inv_y_rand[:,t] = conjugateGradientSolveChunks(hf=hf,x0=x0,b=y_rand[:,t],c2=delta, chunk_size=chunk_size) ## compute BLUP estimated SNP effect sizes and residuals for chunk in range(0,N,chunk_size): X_chunk = sp.array(hf['X'][chunk:chunk+chunk_size], dtype="single") beta_hat_rand[:,t] += sp.dot(X_chunk.T,H_inv_y_rand[chunk:chunk+chunk_size,t]) e_hat_rand[:,t] = H_inv_y_rand[:,t] print("In evalfREML: Iteration %d has been completed..." % t) ## compute BLUP estimated SNP effect sizes and residuals for real phenotypes e_hat_data = conjugateGradientSolveChunks(hf=hf,x0=x0,b=Y,c2=delta, chunk_size=chunk_size) beta_hat_data = sp.zeros(M,dtype="single") for chunk in range(0,N,chunk_size): X_chunk = sp.array(hf['X'][chunk:chunk+chunk_size], dtype="single") beta_hat_data += sp.dot(X_chunk.T,e_hat_data[chunk:chunk+chunk_size]) ## evaluate f_REML f = sp.log((sp.sum(beta_hat_data**2)/sp.sum(e_hat_data**2))/(sp.sum(beta_hat_rand**2)/sp.sum(e_hat_rand**2))) return(f)
def redside(data): """ Subtracts bias from data and returns the overscan region-subtracted image. CCD geometry is currently hardwired, so this won't work for windowed or binned setups. """ if data.shape[1]==2148: return oneamp(data) bias = scipy.empty((2,data.shape[0])) bias[0] = (data[:,1:20].mean(axis=1)*19+data[:,2088:2168].mean(axis=1)*80)/99. bias[1] = (data[:,20:38].mean(axis=1)*18+data[:,2168:2248].mean(axis=1)*80)/98. out_data = scipy.empty((2046,2048)) """ Mask out the bad columns. Note this might not be appropriate for older data (or if the CCDs change). """ mask = (data[0:1490,995]+data[0:1490,997])/2. data[0:1490,996] = mask.copy() mask = (data[0:1493,999]+data[0:1493,1001])/2. data[0:1493,1000] = mask.copy() data = data.transpose() out_data[0:1023,:] = data[41:1064,:] - bias[0] out_data[1023:2046,:] = data[1064:2087,:] - bias[1] """ Fix difference in amplifier gains. This does *not* convert from DN to electrons. """ out_data[1023:2046,:] *= 1.0765 # Note this differs from the LRIS # website that would suggest 1.0960 out_data = out_data.transpose() return out_data
def f(r, t): arr = empty(2 * N, float) for i in range(2 * N): if i < N: arr[i] = r[i + N] elif i == N: arr[i] = k / m * (r[1] - r[0]) + 1 / m * cos(omega * t) elif i == 2 * N - 1: arr[2 * N - 1] = k / m * (r[N - 2] - r[N - 1]) else: arr[i] = k / m * (r[i + 1 - N] - 2 * r[i - N] + r[i - 1 - N]) return arr
def geodesic_rhs_ode(t, xv, lam, dtd, func, jacobian, Avv, args, j, Acc): ## note t, xv need to be switched to use odeint/ode M,N = j.shape ans = scipy.empty(2*N) ans[:N] = xv[N:] j[:,:] = jacobian(xv[:N],*args) g = (scipy.dot(j.T,j) + lam*dtd) if Avv is not None: Acc[:] = Avv(xv[:N], xv[N:], *args) else: Acc[:] = AvvFD(xv[:N], xv[N:], func, args) ans[N:] = -scipy.linalg.solve(g,scipy.dot(j.T,Acc)) return ans
def summed_dist_matrix(self, vectors, presorted=False): D = sp.empty((len(vectors), len(vectors))) if len(vectors) > 0: might_have_units = self(vectors[0]) if hasattr(might_have_units, 'units'): D = D * might_have_units.units for i in xrange(len(vectors)): for j in xrange(i, len(vectors)): D[i, j] = D[j, i] = sp.sum( self((vectors[i] - sp.atleast_2d(vectors[j]).T).flatten())) return D
def find_direction(self, grad_diffs, steps, grad, hessian_diag, idxs): grad = grad.copy() # We will change this. n_current_factors = len(idxs) # TODO: find a good name for this variable. rho = scipy.empty(n_current_factors) alpha = scipy.empty(n_current_factors) for i in idxs[::-1]: rho[i] = 1 / scipy.inner(grad_diffs[i], steps[i]) alpha[i] = rho[i] * scipy.inner(steps[i], grad) grad -= alpha[i] * grad_diffs[i] z = hessian_diag * grad # TODO: find a good name for this variable (surprise!) for i in idxs: beta = rho[i] * scipy.inner(grad_diffs[i], z) z += steps[i] * (alpha[i] - beta) return z, {}
def initialization4LCKSVD(self,training_feats,H_train,dictsize,iterations,sparsitythres,tol=1e-4): """ Initialization for Label consistent KSVD algorithm Inputs training_feats -training features H_train -label matrix for training feature dictsize -number of dictionary items iterations -iterations sparsitythres -sparsity threshold tol -tolerance when performing the approximate KSVD Outputs Dinit -initialized dictionary Tinit -initialized linear transform matrix Winit -initialized classifier parameters Q -optimal code matrix for training features """ numClass = H_train.shape[0] # number of objects numPerClass = round(dictsize/float(numClass)) # initial points from each class Dinit = sp.empty((training_feats.shape[0],numClass*numPerClass)) # for LC-Ksvd1 and LC-Ksvd2 dictLabel = sp.zeros((numClass,numPerClass)) runKsvd = ApproximateKSVD(numPerClass, max_iter=iterations, tol=tol, transform_n_nonzero_coefs=sparsitythres) for classid in range(numClass): col_ids = sp.logical_and(H_train[classid,:]==1,sp.sum(training_feats**2, axis=1) > 1e-6) # Initilization for LC-KSVD (perform KSVD in each class) Dpart = training_feats[:,col_ids][:,sp.random.choice(col_ids.sum(),numPerClass,replace=False)] Dpart = Dpart/splin.norm(Dpart,axis=0) para_data = training_feats[:,col_ids] # ksvd process runKsvd.fit(training_feats[:,col_ids]) Dinit[:,numPerClass*classid:numPerClass*(classid+1)] = runKsvd.components_ dictLabel[classid,numPerClass*classid:numPerClass*(classid+1)] = 1. T = sp.eye(dictsize) # scale factor Q = sp.zeros((dictsize,training_feats.shape[1])) # energy matrix for frameid in range(training_feats.shape[1]): for itemid in range(Dinit.shape[1]): Q[sp.ix_(dictLabel==itemid,H_train==frameid)] =1. # ksvd process runKsvd.fit(training_feats,Dinit=Dinit) Xtemp = runKsvd.gamma_ # learning linear classifier parameters Winit = splin.pinv(Xtemp.dot(Xtemp.T)+sp.eye(Xtemp.shape[0])).dot(Xtemp).dot(H_train.T) Tinit = splin.pinv(Xtemp.dot(Xtemp.T)+sp.eye(Xtemp.shape[0])).dot(Xtemp).dot(Q.T) return Dinit,Tinit.T,Winit.T,Q
def twinsurr(X, m=1, t=1, e=0.1, nSurr=100, RP=None): """ Returns Twin Surrogates (TS) based on RP with FAN metric. X := time series m := dimension of embedding (default = 1) t := time delay of embedding (default = 1) e := recurrence threshold (default = 0.1) nSurr := number of Surrogates (default = 100) RP := recurrence Plot of X Output: S := matrix where each column is a TS nTwins := number of twins found twinMat := matrix of twin pairs twinMat[i, j] = 1 => twins """ if RP is None: RP = rpfan(X, m, t, e) nX = len(RP) print 'Searching for twins...' twinMat = sparse.lil_matrix((nX, nX), dtype=sp.int8) pbar = ProgressBar(maxval=nX).start() for j in range(nX): i = sp.tile(RP[:, j], (nX, 1)).T i = sp.all(RP == i, axis=0) * any(RP[:, j]) twinMat[i, j] = 1 pbar.update(j + 1) pbar.finish() nTwins = sum(sp.any((twinMat - sparse.eye(nX, nX)).toarray(), axis=0)) if nTwins == 0: print 'Warning: No twins detected!' print 'Surrogates are same as original time series!' S = sp.empty((nX, nSurr)) print 'Creating surrogates...' pbar = ProgressBar(maxval=nSurr).start() for i in range(nSurr): k, l = 0, sp.ceil(nX * sp.rand()) - 1 k, l = int(k), int(l) S[k, i] = X[l] while k < nX - 1: twins = sp.where(twinMat[:, l].toarray().squeeze())[0] if len(twins) > 1: idx = int(sp.ceil(len(twins) * sp.rand()) - 1) l = twins[idx] l += 1 if l > nX - 1: while l > nX - 1: l = sp.ceil(nX * sp.rand()) - 1 l = int(l) S[k + 1, i] = X[l] k += 1 pbar.update(i + 1) pbar.finish() return S, nTwins, twinMat.toarray()
def __calPostParticleDisFunc(self, momentsCollision): """ Transforming the post-collision moments m' back to the post collision distribution function f' """ tempParticleDisFunc = sp.empty([9, self.ny, self.nx]) inverseTransformation = slin.inv(self.transformationMatrix) for i in sp.arange(self.ny): for j in sp.arange(self.nx): tempParticleDisFunc[:, i, j] = np.dot(inverseTransformation, \ momentsCollision[:, i, j]) return tempParticleDisFunc
def spec_interp2(coef_spec, factor=1): """Interpolate a function""" assert isinstance(factor,int) and factor > 0,\ "factor must be a positive integer" npts = len(coef_spec) padded_fft = S.empty(npts * factor, 'D') for i in range(factor): padded_fft[i * npts:(i + 1) * npts] = coef_spec out = S.ifft(padded_fft) return out.real
def buildKsym_d(kf,x,d): #x should be column vector (l,_)=x.shape K=sp.matrix(sp.empty([l,l])) for i in range(l): K[i,i]=kf(x[i,:],x[i,:],d1=d[i],d2=d[i])+10**-10 for j in range(i+1,l): K[i,j]=kf(x[i,:],x[j,:],d1=d[i],d2=d[j]) K[j,i]=K[i,j] return K
def compute_surface_points(self, Xn): Xe = scipy.empty((self.NPoints, 3)) np = 0 for ne, nwmap in enumerate(self.NWMap): nodes = self.Nodes[nwmap[1]:nwmap[2]] NN = nodes.shape[0] Phi = self.Weights[nwmap[3]:nwmap[4]] Phi = Phi.reshape((Phi.size / NN, NN)) NPhi = Phi.shape[0] Xe[np:np + NPhi, :] = scipy.dot(Phi, Xn[nodes, :]) np += NPhi return Xe
def mmul_diag(Adiag, B, act_right=True, out=None): if act_right: assert B.shape[0] == Adiag.shape[0] else: assert B.shape[1] == Adiag.shape[0] assert Adiag.ndim == 1 assert B.ndim == 2 if act_right: if out is None: out = sp.empty((Adiag.shape[0], B.shape[1]), dtype=sp.promote_types(Adiag.dtype, B.dtype)) out = out.T sp.multiply(Adiag, B.T, out) out = out.T else: if out is None: out = sp.empty((B.shape[0], Adiag.shape[0]), dtype=sp.promote_types(Adiag.dtype, B.dtype)) sp.multiply(Adiag, B, out) return out
def array_coords(shape): """ Faster version of scipy.indices() """ y = shape[0] x = shape[1] out = scipy.empty((2,y,x)) t = scipy.arange(y,dtype='f8') out[0] = scipy.tile(t,(x,1)).T t = scipy.arange(x,dtype='f8') out[1] = scipy.tile(t,(y,1)) return out
def csr_to_problem(x, prob): # Extra space for termination node and (possibly) bias term x_space = prob.x_space = scipy.empty((x.nnz+x.shape[0]*2), dtype=feature_node) prob.rowptr = x.indptr.copy() prob.rowptr[1:] += 2*scipy.arange(1,x.shape[0]+1) prob_ind = x_space["index"] prob_val = x_space["value"] prob_ind[:] = -1 if jit_enabled: csr_to_problem_jit(x.shape[0], x.data, x.indices, x.indptr, prob_val, prob_ind, prob.rowptr) else: csr_to_problem_nojit(x.shape[0], x.data, x.indices, x.indptr, prob_val, prob_ind, prob.rowptr)
def rgb_convert(arr): #assert(arr.min()>=0 and arr.max()<=1) # force 3 dims if arr.ndim == 2 or arr.shape[2] == 1: arr_new = sp.empty(arr.shape[:2] + (3, ), dtype="float32") arr_new[:, :, 0] = arr.copy() arr_new[:, :, 1] = arr.copy() arr_new[:, :, 2] = arr.copy() arr = arr_new return arr
def learn_gmm(self, samples, labels): """ Method that learns the GMM from training samples and store the mean, covariance and proportion of each class in class members. Input: samples: training samples labels: training labels (must be exactly C labels between 1 and C) """ # Get information from the data self.C = int(labels.max(0)) # Number of classes self.d = samples.shape[1] # Number of variables # Initialization self.nbSpl = sp.empty((self.C)) # Vector of number of samples for each class self.prop = sp.empty((self.C)) # Vector of proportion self.mean = sp.empty((self.C,self.d)) # Vector of means self.cov = sp.empty((self.C,self.d,self.d)) # Matrix of covariance self.vp = sp.empty((self.C,samples.shape[1])) # array of eigenvalues self.Q = sp.empty((self.C,samples.shape[1],samples.shape[1])) # array of eigenvectors # Learn the parameter of the model for each class for c in xrange(self.C): # Get index of class c+1 samples j = sp.where(labels==(c+1))[0] # Update GMM xj = samples[j,:] self.nbSpl[c] = float(j.size) self.mean[c,:] = sp.mean(xj,axis=0) # self.cov[c,:,:] = sp.cov(samples[j,:],rowvar=0) # implicit: with no bias self.cov[c,:,:] = compute_cov(xj,self.mean[c,:],self.nbSpl[c]) self.vp[c,:],self.Q[c,:,:],_ = self.decomposition(self.cov[c,:,:]) self.prop = self.nbSpl/samples.shape[0]
def plot_contour(self, filename, norms=False, lag_inds=(0), cross_power=False, title=None, coloraxis=[]): lag_inds = list(lag_inds) n_bins = 20 factor = 1.5 #start = 2.1e6 freq_diffs = sp.empty(n_bins) freq_diffs[0] = 0.0001 freq_diffs[1] = 2.5 * 200.0 / 256 freq_diffs[2] = 4.5 * 200.0 / 256 freq_diffs[3] = 6.5 * 200.0 / 256 for ii in range(4, n_bins): freq_diffs[ii] = factor * freq_diffs[ii - 1] freq_diffs *= 1e6 pdat = self.bin_correlation_nu(lag_inds, freq_diffs, norms=norms, cross_power=cross_power) a = plt.figure() #a.set_figwidth(a.get_figwidth() / 3.0) if len(coloraxis) > 0: f = plt.contourf(self.lags[lag_inds], (freq_diffs) / 1e6, pdat, coloraxis) else: f = plt.contourf(self.lags[lag_inds], (freq_diffs) / 1e6, pdat) f.ax.set_xscale('log') f.ax.set_yscale('log') #im = plt.pcolormesh(self.lags[lag_inds], (freq_diffs) / 1e6, pdat, # shading='gouraud') #im.axes.set_xscale('log') #im.axes.set_yscale('log') plt.axis('scaled') plt.xlim((0.05, 0.9)) plt.ylim((0.8, 100)) plt.xlabel("angular lag, $\sigma$ (degrees, 34$\cdotp$Mpc/h)") plt.ylabel("frequency lag, $\pi$ (MHz, 4.5$\cdotp$Mpc/h)") plt.title(title) #c = plt.colorbar(f, ticks=coloraxis) c = plt.colorbar(f) c.ax.set_ylabel("correlation (mK)") plt.savefig(filename)
def open_data(filename): ''' The function open and load the image given its name. The type of the data is checked from the file and the scipy array is initialized accordingly. Input: filename: the name of the file Output: im: the data cube GeoTransform: the geotransform information Projection: the projection information ''' data = gdal.Open(filename, gdal.GA_ReadOnly) if data is None: print 'Impossible to open ' + filename exit() nc = data.RasterXSize nl = data.RasterYSize d = data.RasterCount # Get the type of the data gdal_dt = data.GetRasterBand(1).DataType if gdal_dt == gdal.GDT_Byte: dt = 'uint8' elif gdal_dt == gdal.GDT_Int16: dt = 'int16' elif gdal_dt == gdal.GDT_UInt16: dt = 'uint16' elif gdal_dt == gdal.GDT_Int32: dt = 'int32' elif gdal_dt == gdal.GDT_UInt32: dt = 'uint32' elif gdal_dt == gdal.GDT_Float32: dt = 'float32' elif gdal_dt == gdal.GDT_Float64: dt = 'float64' elif gdal_dt == gdal.GDT_CInt16 or gdal_dt == gdal.GDT_CInt32 or gdal_dt == gdal.GDT_CFloat32 or gdal_dt == gdal.GDT_CFloat64: dt = 'complex64' else: print 'Data type unkown' exit() # Initialize the array im = sp.empty((nl, nc, d), dtype=dt) for i in range(d): im[:, :, i] = data.GetRasterBand(i + 1).ReadAsArray() GeoTransform = data.GetGeoTransform() Projection = data.GetProjection() data = None return im, GeoTransform, Projection
def adjust_dispersion_chunk(counts, dmatrix1, disp_raw, disp_fitted, varPrior, sf, options, idx, log=False): disp_adj = sp.empty((counts.shape[0], 1)) disp_adj.fill(sp.nan) disp_adj_conv = sp.zeros_like(disp_adj, dtype='bool') error_cnt = 0 for i in range(idx.shape[0]): if log: log_progress(i, idx.shape[0]) if not sp.isnan(disp_raw[i]): ### init dispersion and response disp = 0.1 resp = counts[i, :].astype('int') ### run for max 10 iterations for j in range(10): modNB = sm.GLM(resp, dmatrix1, family=sm.families.NegativeBinomial(alpha=disp), offset=sp.log(sf)) result = modNB.fit() dispBef = disp yhat = result.mu sign = -1.0 with warnings.catch_warnings(): warnings.simplefilter("ignore") try: res = minimize_scalar(adj_loglikelihood_shrink_scalar_onedisper, args=(dmatrix1, resp, yhat, disp_fitted[i], varPrior, sign), method='Bounded', bounds=(0, 10.0), tol=1e-5) except TypeError: disp_adj[i] = disp disp_adj_conv[i] = False error_cnt += 1 break disp = res.x if abs(sp.log(disp) - sp.log(dispBef)) < 1e-4: disp_adj[i] = disp disp_adj_conv[i] = True break else: disp_adj[i] = disp disp_adj_conv[i] = False if log: log_progress(idx.shape[0], idx.shape[0]) print('') #if error_cnt > 0: # print 'Warning: %i events did not fit due to a TypeError' % error_cnt return (disp_adj, disp_adj_conv, idx)
def build_cov_tensor_from_xcorrs(tf, chan_set, xcorrs, dtype=None, both=False): """builds a covariance tensor from a set of channel xcorrs The tensor will hold the forward (positive lags) covariances for all auto-/cross-correlations in the chan_set :type tf: int :param tf: desired lag in samples :type chan_set: list :param chan_set: list of channel ids to build the channel set from. the covariance tensor will be build so that the chan_set is indexed natively. :type xcorrs: XcorrStore :param xcorrs: XcorrStore object holding the xcorrs for various channel combinations :type dtype: dtype derivable :param dtype: will be passed to the constructor for the matrix returned. Default=None """ # init and checks assert tf <= xcorrs._tf chan_set = sorted(chan_set) nc = len(chan_set) assert all(sp.diff(chan_set) >= 1) assert max(chan_set) < xcorrs._nc assert all([key in xcorrs for key in build_idx_set(chan_set) ]), 'no data for requested channels' xc_len = tf + both * (tf - 1) rval = sp.empty((nc, nc, xc_len), dtype=dtype) # write single xcorrs for i in xrange(nc): m = chan_set[i] for j in xrange(i, nc): n = chan_set[j] xc = xcorrs[m, n] sample0 = xc.size / 2 bakw = xc[:sample0 + 1][:tf - 1:-1] comb = None if both is True: rval[i, j, :] = xc[sample0 - tf + 1:sample0 + tf] else: rval[i, j, :] = xc[sample0:][:tf] if i != j: if both is True: rval[j, i, :] = xc[::-1][sample0 - tf + 1:sample0 + tf] else: rval[j, i, :] = xc[::-1][:tf] # return return rval
def __init__(self, columns={}, fields=None, BoxSize=1., BoxCenter=0., Position='Position', Velocity='Velocity', **attrs): super(SurveyCatalogue, self).__init__(columns=columns, fields=fields, Position=Position, Velocity=Velocity, **attrs) self.BoxSize = scipy.empty((3), dtype=scipy.float64) self.BoxSize[:] = BoxSize self._boxcenter = scipy.empty((3), dtype=scipy.float64) self._boxcenter[:] = BoxCenter self._rotation = scipy.eye(3, dtype=scipy.float64) self._translation = self._boxcenter.copy() self._compute_position = True self._compute_velocity = True
def __init__(self, ellsin, ellsout, w): self.logger.info('Setting multipoles to multipoles transforms.') self.conversion = scipy.empty( (len(ellsout), len(ellsin)) + w(0, 0).shape, dtype=w(0, 0).dtype) for illout, ellout in enumerate(ellsout): for illin, ellin in enumerate(ellsin): ells, coeffs = coefficients(ellout, ellin[0]) #case ellin = (ell,n) self.conversion[illout][illin] = scipy.sum([ coeff * w(ell, ellin[-1]) for ell, coeff in zip(ells, coeffs) ], axis=0)
def _create_matrix_from_indexed_function(shape, func, symmetric_2d=False, **func_params): mat = sp.empty(shape) if symmetric_2d: for i in xrange(shape[0]): for j in xrange(i, shape[1]): mat[i, j] = mat[j, i] = func(i, j, **func_params) else: for idx in sp.ndindex(*shape): mat[idx] = func(*idx, **func_params) return mat
def addType(self, dof_type): """ Input: dof_type = string of dof name """ if isinstance(dof_type, str): if dof_type not in self.types: self.types.append(dof_type) else: raise TypeError(self.__type_str__) # Check if dofspace has enough columns for all dof types c_reqd = len(self.types) - np.size(self.dofspace, 1) if c_reqd > 0: c_new = np.empty((self.nrow, c_reqd)) c_new[:] = np.nan self.dofspace = np.hstack((self.dofspace, c_new))