def run(self, phase=None, throats=None): logger.warning('This algorithm can take some time...') conduit_lengths = sp.sum(misc.conduit_lengths(network=self._net, mode='centroid'), axis=1) graph = self._net.create_adjacency_matrix(data=conduit_lengths, sprsfmt='csr') if phase is not None: self._phase = phase if 'throat.occupancy' in self._phase.props(): temp = conduit_lengths*(self._phase['throat.occupancy'] == 1) graph = self._net.create_adjacency_matrix(data=temp, sprsfmt='csr', prop='temp') path = spgr.shortest_path(csgraph=graph, method='D', directed=False) Px = sp.array(self._net['pore.coords'][:, 0], ndmin=2) Py = sp.array(self._net['pore.coords'][:, 1], ndmin=2) Pz = sp.array(self._net['pore.coords'][:, 2], ndmin=2) Cx = sp.square(Px.T - Px) Cy = sp.square(Py.T - Py) Cz = sp.square(Pz.T - Pz) Ds = sp.sqrt(Cx + Cy + Cz) temp = path / Ds temp[sp.isnan(temp)] = 0 temp[sp.isinf(temp)] = 0 return temp
def relative_bin_deviation(h1, h2): # 79 us @array, 104 us @list \w 100 bins r""" Calculate the bin-wise deviation between two histograms. The relative bin deviation between two histograms :math:`H` and :math:`H'` of size :math:`m` is defined as: .. math:: d_{rbd}(H, H') = \sum_{m=1}^M \frac{ \sqrt{(H_m - H'_m)^2} }{ \frac{1}{2} \left( \sqrt{H_m^2} + \sqrt{{H'}_m^2} \right) } *Attributes:* - semimetric (triangle equation satisfied?) *Attributes for normalized histograms:* - :math:`d(H, H')\in[0, \infty)` - :math:`d(H, H) = 0` - :math:`d(H, H') = d(H', H)` *Attributes for not-normalized histograms:* - :math:`d(H, H')\in[0, \infty)` - :math:`d(H, H) = 0` - :math:`d(H, H') = d(H', H)` *Attributes for not-equal histograms:* - not applicable Parameters ---------- h1 : sequence The first histogram. h2 : sequence The second histogram, same bins as ``h1``. Returns ------- relative_bin_deviation : float Relative bin deviation between the two histograms. """ h1, h2 = __prepare_histogram(h1, h2) numerator = scipy.sqrt(scipy.square(h1 - h2)) denominator = (scipy.sqrt(scipy.square(h1)) + scipy.sqrt(scipy.square(h2))) / 2. old_err_state = scipy.seterr(invalid='ignore') # divide through zero only occurs when the bin is zero in both histograms, in which case the division is 0/0 and leads to (and should lead to) 0 result = numerator / denominator scipy.seterr(**old_err_state) result[scipy.isnan(result)] = 0 # faster than scipy.nan_to_num, which checks for +inf and -inf also return scipy.sum(result)
def csr3(complex_n): ang = sp.angle(complex_n) # sp.arctan(a.imag/a.real) why it does not work?!?! r = sp.sqrt(sp.square(complex_n.real)+sp.square(complex_n.imag)) if (sp.sin(ang/2)>=0): #sin>0 return sp.sqrt(r)*(complex(sp.cos(ang/2),sp.sin(ang/2))) else: return sp.sqrt(r)*(complex(sp.cos((ang/2)+sp.pi),sp.sin((ang/2)+sp.pi)))
def run(self,phase=None): r''' ''' logger.warning('This algorithm can take some time...') graph = self._net.create_adjacency_matrix(data=self._net['throat.length'],sprsfmt='csr') if phase is not None: self._phase = phase if 'throat.occupancy' in self._phase.props(): temp = self._net['throat.length']*(self._phase['throat.occupancy']==1) graph = self._net.create_adjacency_matrix(data=temp,sprsfmt='csr',prop='temp') #self._net.tic() path = spgr.shortest_path(csgraph = graph, method='D', directed = False) #self._net.toc() Px = sp.array(self._net['pore.coords'][:,0],ndmin=2) Py = sp.array(self._net['pore.coords'][:,1],ndmin=2) Pz = sp.array(self._net['pore.coords'][:,2],ndmin=2) Cx = sp.square(Px.T - Px) Cy = sp.square(Py.T - Py) Cz = sp.square(Pz.T - Pz) Ds = sp.sqrt(Cx + Cy + Cz) temp = path/Ds #temp = path temp[sp.isnan(temp)] = 0 temp[sp.isinf(temp)] = 0 return temp
def relative_deviation(h1, h2): # 18 us @array, 42 us @list \w 100 bins r""" Calculate the deviation between two histograms. The relative deviation between two histograms :math:`H` and :math:`H'` of size :math:`m` is defined as: .. math:: d_{rd}(H, H') = \frac{ \sqrt{\sum_{m=1}^M(H_m - H'_m)^2} }{ \frac{1}{2} \left( \sqrt{\sum_{m=1}^M H_m^2} + \sqrt{\sum_{m=1}^M {H'}_m^2} \right) } *Attributes:* - semimetric (triangle equation satisfied?) *Attributes for normalized histograms:* - :math:`d(H, H')\in[0, \sqrt{2}]` - :math:`d(H, H) = 0` - :math:`d(H, H') = d(H', H)` *Attributes for not-normalized histograms:* - :math:`d(H, H')\in[0, 2]` - :math:`d(H, H) = 0` - :math:`d(H, H') = d(H', H)` *Attributes for not-equal histograms:* - not applicable Parameters ---------- h1 : sequence The first histogram. h2 : sequence The second histogram, same bins as ``h1``. Returns ------- relative_deviation : float Relative deviation between the two histograms. """ h1, h2 = __prepare_histogram(h1, h2) numerator = math.sqrt(scipy.sum(scipy.square(h1 - h2))) denominator = (math.sqrt(scipy.sum(scipy.square(h1))) + math.sqrt(scipy.sum(scipy.square(h2)))) / 2. return numerator / denominator
def tsc(parameters,positions,values): values_tsc = sp.zeros((parameters.Ng,parameters.Ng,parameters.Ng)) cellsize = parameters.boxsize/parameters.Ng for position,pvalue in zip(positions,values): position = sp.array(position) position_cellunits = position/cellsize cell_indices = sp.floor(position_cellunits) leftcell_indices = cell_indices - 1 rightcell_indices = cell_indices + 1 cell_position = cell_indices + 0.5 leftcell_position = leftcell_indices + 0.5 rightcell_position = rightcell_indices + 0.5 particle_cell_distances = sp.absolute(position_cellunits - cell_position) particle_leftcell_distances = sp.absolute(position_cellunits - leftcell_position) particle_rightcell_distances = sp.absolute(position_cellunits - rightcell_position) weights_cell = 0.75 - sp.square(particle_cell_distances) weights_leftcell = 0.5*sp.square(1.5 - particle_leftcell_distances) weights_rightcell = 0.5*sp.square(1.5 - particle_rightcell_distances) if periodic_boundaries: cell_indices = sp.mod(cell_indices,parameters.Ng) leftcell_indices = sp.mod(leftcell_indices,parameters.Ng) rightcell_indices = sp.mod(rightcell_indices,parameters.Ng) indices_x, weights_x = [cell_indices[0],leftcell_indices[0],rightcell_indices[0]],\ [weights_cell[0],weights_leftcell[0],weights_rightcell[0]] indices_y, weights_y = [cell_indices[1],leftcell_indices[1],rightcell_indices[1]],\ [weights_cell[1],weights_leftcell[1],weights_rightcell[1]] indices_z, weights_z = [cell_indices[2],leftcell_indices[2],rightcell_indices[2]],\ [weights_cell[2],weights_leftcell[2],weights_rightcell[2]] for index_x,weight_x in zip(indices_x, weights_x): for index_y,weight_y in zip(indices_y, weights_y): for index_z,weight_z in zip(indices_z, weights_z): values_tsc[index_x][index_y][index_z] += pvalue*weight_x*weight_y*weight_z/cellsize**3 return values_tsc
def cosine(h1, h2): # 17 us @array, 42 us @list \w 100 bins r""" Cosine simmilarity. Compute the angle between the two histograms in vector space irrespective of their length. The cosine similarity between two histograms :math:`H` and :math:`H'` of size :math:`m` is defined as: .. math:: d_{\cos}(H, H') = \cos\alpha = \frac{H * H'}{\|H\| \|H'\|} = \frac{\sum_{m=1}^M H_m*H'_m}{\sqrt{\sum_{m=1}^M H_m^2} * \sqrt{\sum_{m=1}^M {H'}_m^2}} *Attributes:* - not a metric, a similarity *Attributes for normalized histograms:* - :math:`d(H, H')\in[0, 1]` - :math:`d(H, H) = 1` - :math:`d(H, H') = d(H', H)` *Attributes for not-normalized histograms:* - :math:`d(H, H')\in[-1, 1]` - :math:`d(H, H) = 1` - :math:`d(H, H') = d(H', H)` *Attributes for not-equal histograms:* - not applicable Parameters ---------- h1 : sequence The first histogram. h2 : sequence The second histogram, same bins as ``h1``. Returns ------- cosine : float Cosine simmilarity. Notes ----- The resulting similarity ranges from -1 meaning exactly opposite, to 1 meaning exactly the same, with 0 usually indicating independence, and in-between values indicating intermediate similarity or dissimilarity. """ h1, h2 = __prepare_histogram(h1, h2) return scipy.sum(h1 * h2) / math.sqrt(scipy.sum(scipy.square(h1)) * scipy.sum(scipy.square(h2)))
def relative_bin_deviation(h1, h2): # 79 us @array, 104 us @list \w 100 bins """ Calculate the bin-wise deviation between two histograms. The relative bin deviation between two histograms \f$H\f$ and \f$H'\f$ of size \f$m\f$ is defined as \f[ d_{rbd}(H, H') = \sum_{m=1}^M \frac{ \sqrt{(H_m - H'_m)^2} }{ \frac{1}{2} \left( \sqrt{H_m^2} + \sqrt{{H'}_m^2} \right) } \f] Attributes: - semimetric (triangle equation satisfied?) Attributes for normalized histograms: - \f$d(H, H')\in[0, \infty)\f$ - \f$d(H, H) = 0\f$ - \f$d(H, H') = d(H', H)\f$ Attributes for not-normalized histograms: - \f$d(H, H')\in[0, \infty)\f$ - \f$d(H, H) = 0\f$ - \f$d(H, H') = d(H', H)\f$ Attributes for not-equal histograms: - not applicable @param h1 the first histogram @type h1 array-like sequence @param h2 the second histogram, same bins as h1 @type h2 array-like sequence @return relative bin deviation @rtype float """ h1, h2 = __prepare_histogram(h1, h2) numerator = scipy.sqrt(scipy.square(h1 - h2)) denominator = (scipy.sqrt(scipy.square(h1)) + scipy.sqrt(scipy.square(h2))) / 2.0 old_err_state = scipy.seterr( invalid="ignore" ) # divide through zero only occurs when the bin is zero in both histograms, in which case the division is 0/0 and leads to (and should lead to) 0 result = numerator / denominator scipy.seterr(**old_err_state) result[scipy.isnan(result)] = 0 # faster than scipy.nan_to_num, which checks for +inf and -inf also return scipy.sum(result)
def __fit(index1, index2): from scipy import stats, sqrt, square # do the fit (cijFitted,intercept,r,tt,stderr) = stats.linregress(strain[:,index2-1],stress[:,index1-1]) if (S.__version__ < '0.7.0'): # correct for scipy weirdness - see http://www.scipy.org/scipy/scipy/ticket/8 # This was fixed before 0.7.0 release. Maybe in some versions of 0.6.x too - # will report huge errors if the check is wrong stderr = S.sqrt((numsteps * stderr**2)/(numsteps-2)) error = stderr/sqrt(sum(square(strain[:,index2-1]))) else: # Work out the error ourselves as I cannot get it from # stderr and this has been checked with gnuplot's fitter fit_str = ((strain[:,index2-1] * cijFitted) + intercept) error = sqrt((sum(square(stress[:,index1-1] - fit_str)) / \ (numsteps-2))/(sum(square(strain[:,index2-1])))) # print info about the fit print '\n' print 'Cij (gradient) : ', cijFitted print 'Error in Cij : ', error print 'Intercept : ', intercept if abs(r) > 0.9: print 'Correlation coefficient : ',r else: print 'Correlation coefficient : ',r, ' <----- WARNING' # if using graphics, add a subplot if options.graphics: # position this plot in a 6x6 grid sp = P.subplot(6,6,6*(index1-1)+index2) sp.set_axis_on() # change the labels on the axes xlabels = sp.get_xticklabels() P.setp(xlabels,'rotation',90,fontsize=7) ylabels = sp.get_yticklabels() P.setp(ylabels,fontsize=7) # colour the plot depending on the strain pattern sp.set_axis_bgcolor(colourDict[patt]) # plot the data P.plot([strain[0,index2-1],strain[numsteps-1,index2-1]],[cijFitted*strain[0,index2-1]+intercept,cijFitted*strain[numsteps-1,index2-1]+intercept]) P.plot(strain[:,index2-1],stress[:,index1-1],'ro') return cijFitted, error
def conduit_lengths(network, throats=None, mode='pore'): r""" Return the respective lengths of the conduit components defined by the throat conns P1 - T - P2 Notes ----- mode = 'pore' - uses pore coordinates mode = 'centroid' uses pore and throat centroids """ if throats is None: throats = network.throats() Ps = network['throat.conns'] pdia = network['pore.diameter'] if mode == 'centroid': try: pcentroids = network['pore.centroid'] tcentroids = network['throat.centroid'] if _sp.sum(_sp.isnan(pcentroids)) + _sp.sum(_sp.isnan(tcentroids)) > 0: mode = 'pore' else: plen1 = _sp.sqrt(_sp.sum(_sp.square(pcentroids[Ps[:, 0]] - tcentroids), 1))-network['throat.length']/2 plen2 = _sp.sqrt(_sp.sum(_sp.square(pcentroids[Ps[:, 1]] - tcentroids), 1))-network['throat.length']/2 except KeyError: mode = 'pore' if mode == 'pore': # Find half-lengths of each pore pcoords = network['pore.coords'] # Find the pore-to-pore distance, minus the throat length lengths = _sp.sqrt(_sp.sum(_sp.square(pcoords[Ps[:, 0]] - pcoords[Ps[:, 1]]), 1)) - network['throat.length'] lengths[lengths < 0.0] = 2e-9 # Calculate the fraction of that distance from the first pore try: fractions = pdia[Ps[:, 0]]/(pdia[Ps[:, 0]] + pdia[Ps[:, 1]]) # Don't allow zero lengths # fractions[fractions == 0.0] = 0.5 # fractions[fractions == 1.0] = 0.5 except: fractions = 0.5 plen1 = lengths*fractions plen2 = lengths*(1-fractions) return _sp.vstack((plen1, network['throat.length'], plen2)).T[throats]
def euclidean(h1, h2): # 9 us @array, 33 us @list \w 100 bins """ Equal to Minowski distance with p=2. @see minowski() """ h1, h2 = __prepare_histogram(h1, h2) return math.sqrt(scipy.sum(scipy.square(scipy.absolute(h1 - h2))))
def get_chisq(self, pops, writeback=False): """Calculate the chi-square goodness-of-fit between the experimental population abundances and the fitted ones. Arguments --------- pops : ndarray The normalized abundances of each lattice+ligand stoichiometries at each of the provided component concentrations. writeback : boolean Update the experiment's simulated heat attribute (dQ_fit) with the provided Qs? Returns ------- float The goodness of the fit, as a reduced chi-square. Notes ----- This method takes advantage of the fact that ITCSim doesn't inspect the data that is returned by the model, and instead lets the associated experiment handle the goodness-of-fit. The only caveat of course is that model must return the same number of lattice+ligand stoichiometries as are present in the experimental results. Variances (sigma**2) must have been precomputed as self.PopSigmas (perhaps should be self.PopVariances?) """ assert self.PopIntens.shape == pops.shape self.chisq = scipy.sum(scipy.square(self.PopIntens - pops) / self.PopSigmas) / self.PopIntens.size if writeback: self.PopFits = pops return self.chisq
def learn(self, X, t, tol=0.01, amax=1e10): u"""学習""" N = X.shape[0] a = sp.ones(N+1) # hyperparameter b = 1.0 phi = sp.ones((N, N+1)) # design matrix phi[:,1:] = [[self._kernel(xi, xj) for xj in X] for xi in X] diff = 1 while diff >= tol: sigma = spla.inv(sp.diag(a) + b * sp.dot(phi.T, phi)) m = b * sp.dot(sigma, sp.dot(phi.T, t)) gamma = sp.ones(N+1) - a * sigma.diagonal() anew = gamma / (m * m) bnew = (N - gamma.sum()) / sp.square(spla.norm(t - sp.dot(phi, m))) anew[anew >= amax] = amax adiff, bdiff = anew - a, bnew - b diff = (adiff * adiff).sum() + bdiff * bdiff a, b = anew, bnew print ".", self._a = a self._b = b self._X = X self._m = m self._sigma = sigma self._amax = amax
def errorApproximation(self, ratio, dim=20): self.buildMatrix() sumNonzeros = (self.vxm !=0).sum() numTest = int(ratio*sumNonzeros) elementList = [] nonZeroTuple = sp.nonzero(self.vxm) for x in range(int(numTest)): rInt = sp.random.randint(0,nonZeroTuple[0].size) randrow = nonZeroTuple[0][rInt] randcolumn = nonZeroTuple[1][rInt] valElementIndex = [randrow,randcolumn] elementList.append(valElementIndex) self.modvxm = sp.copy(self.vxm) for x in elementList: self.modvxm[x[0],x[1]] = 0 self.modvmx = self.fillAverages(vxm = self.modvxm) self.newmodvxm = self.predict(dim,vxm=self.modvxm) sqDiff = 0 for x in elementList: sqDiff += sp.square(self.newmodvxm[x[0],x[1]] - self.vxm[x[0],x[1]]) self.rmse = sp.sqrt(sqDiff/len(elementList))
def predict_gmm(self, testSamples, tau=0): """ Function that predict the label for testSamples using the learned model Inputs: testSamples: the samples to be classified tau: regularization parameter Outputs: predLabels: the class scores: the decision value for each class """ # Get information from the data nbTestSpl = testSamples.shape[0] # Number of testing samples # Initialization scores = sp.empty((nbTestSpl,self.C)) # Start the prediction for each class for c in xrange(self.C): testSamples_c = testSamples - self.mean[c,:] regvp = self.vp[c,:] + tau logdet = sp.sum(sp.log(regvp)) cst = logdet - 2*sp.log(self.prop[c]) # Pre compute the constant term # compute ||lambda^{-0.5}q^T(x-mu)||^2 + cst for all samples scores[:,c] = sp.sum( sp.square( sp.dot( (self.Q[c,:,:][:,:]/sp.sqrt(regvp)).T, testSamples_c.T ) ), axis=0 ) + cst del testSamples_c # Assign the label to the minimum value of scores predLabels = sp.argmin(scores,1)+1 return predLabels,scores
def __fit(index1,index2): from scipy import stats, sqrt, square print strain print stress (cijFitted,intercept,r,tt,stderr) = stats.linregress(strain[:,index2-1],stress[:,index1-1]) if (S.__version__ < '0.7.0'): stderr = S.sqrt((numsteps * stderr**2)/(numsteps-2)) error = stderr/sqrt(sum(square(strain[:,index2-1]))) else: fit_str = ((strain[index2-1,:] * cijFitted) + intercept) error = sqrt((sum(square(stress[:,index1-1] - fit_str)) / \ (numsteps-2))/(sum(square(strain[:,index2-1])))) print 'Cij ', cijFitted print 'Error ', error print 'intercept ', intercept return cijFitted, error
def propup(self, X, eps=1e-8): #~ F = self.W.dot(X.T) F = X.dot(self.W.T).T Fs = sqrt(square(F) + eps) NFs, L2Fs = l2row(Fs) Fhat, L2Fn = l2row(NFs.T) return F, Fs, NFs, L2Fs, Fhat, L2Fn
def objF(x): global INDEX modelname = 'Model-'+str(INDEX) INDEX += 1 K,e0,n,rth,rz= x parts = [7.85e-9,210000.0,.3,K,e0,n,rth,rz] paramFile = open('result.txt','a+') paramFile.write('%s %10.6E %10.6E %10.6E %10.6E %10.6E '%(str(INDEX),K,e0,n,rth,rz)) paramFile.flush() material = {'part':parts} shapes = {'outDimater':72.5,'thick':3.65,'length':145} mesh={'tube':100} timelist = [.20,.21,.22,.24] amps = [.5547,.576,.5824,.5982] midpairs = [(.05,.0329),(.1,.2246),(.15,.4013),(.18,.4957)] args={'timelist':timelist,'amp':amps,'midpairs':midpairs} load=args inits={} BCs={} positions = {} material={'part':parts} meshSize = {'pressDie':shapes['thick']*3/0.6,'tube':shapes['thick']/0.6 * 2} t = TBFEA(modelname) t.setParameter(shapes,material,positions,inits,\ len(timelist),BCs,load,meshSize,args) t.setModels() coords = t.getResults() npFEA = scipy.array(coords) npExp = scipy.array([37.00,37.55,38.53,40.58]) npres = npFEA - npExp mse=scipy.sum(scipy.square(npres)) paramFile.write('%10.6E\n'%(mse)) paramFile.close() return mse
def relative_deviation(h1, h2): # 18 us @array, 42 us @list \w 100 bins """ Calculate the deviation between two histograms. The relative deviation between two histograms \f$H\f$ and \f$H'\f$ of size \f$m\f$ is defined as \f[ d_{rd}(H, H') = \frac{ \sqrt{\sum_{m=1}^M(H_m - H'_m)^2} }{ \frac{1}{2} \left( \sqrt{\sum_{m=1}^M H_m^2} + \sqrt{\sum_{m=1}^M {H'}_m^2} \right) } \f] Attributes: - semimetric (triangle equation satisfied?) Attributes for normalized histograms: - \f$d(H, H')\in[0, \sqrt{2}]\f$ - \f$d(H, H) = 0\f$ - \f$d(H, H') = d(H', H)\f$ Attributes for not-normalized histograms: - \f$d(H, H')\in[0, 2]\f$ - \f$d(H, H) = 0\f$ - \f$d(H, H') = d(H', H)\f$ Attributes for not-equal histograms: - not applicable @param h1 the first histogram @type h1 array-like sequence @param h2 the second histogram, same bins as h1 @type h2 array-like sequence @return relative deviation @rtype float """ h1, h2 = __prepare_histogram(h1, h2) numerator = math.sqrt(scipy.sum(scipy.square(h1 - h2))) denominator = (math.sqrt(scipy.sum(scipy.square(h1))) + math.sqrt(scipy.sum(scipy.square(h2)))) / 2.0 return numerator / denominator
def correlate(h1, h2): # 31 us @array, 55 us @list \w 100 bins """ Compute the correlation between two histograms. The histogram correlation between two histograms \f$H\f$ and \f$H'\f$ of size \f$m\f$ is defined as \f[ d_{corr}(H, H') = \frac{ \sum_{m=1}^M (H_m-\bar{H}) \cdot (H'_m-\bar{H'}) }{ \sqrt{\sum_{m=1}^M (H_m-\bar{H})^2 \cdot \sum_{m=1}^M (H'_m-\bar{H'})^2} } \f] with \f$\bar{H}\f$ and \f$\bar{H'}\f$ being the mean values of \f$H\f$ resp. \f$H'\f$ Attributes: - not a metric, a similarity Attributes for normalized histograms: - \f$d(H, H')\in[-1, 1]\f$ - \f$d(H, H) = 1\f$ - \f$d(H, H') = d(H', H)\f$ Attributes for not-normalized histograms: - \f$d(H, H')\in[-1, 1]\f$ - \f$d(H, H) = 1\f$ - \f$d(H, H') = d(H', H)\f$ Attributes for not-equal histograms: - not applicable @note returns 0 if one of h1 or h2 contains only zeros. @param h1 the first histogram @type h1 array-like sequence @param h2 the second histogram, same bins as h1 @type h2 array-like sequence """ h1, h2 = __prepare_histogram(h1, h2) h1m = h1 - scipy.sum(h1) / float(h1.size) h2m = h2 - scipy.sum(h2) / float(h2.size) a = scipy.sum(scipy.multiply(h1m, h2m)) b = math.sqrt(scipy.sum(scipy.square(h1m)) * scipy.sum(scipy.square(h2m))) return 0 if 0 == b else a / b
def jrangesearch(X, Y, epsilon): D = distance.cdist(X, Y) MD2full = square(D) r, c, v = find(D < epsilon) dM = np.ones(r.shape) dMD2 = MD2full[r, c] I = X.shape[0] M = sp.csr_matrix((dM, (r, c)), shape=(I, I)) MD2 = sp.csr_matrix((dMD2, (r, c)), shape=(I, I)) return (M, MD2)
def variogram_data(div=15): # Square difference between z-values print num * (num - 1) / 2 # check number of pairs z_pairs = scipy.array(list(itertools.combinations(zvals, r=2))) z_sqdif = scipy.square(z_pairs[:, 0] - z_pairs[:, 1]) vg = pandas.DataFrame() vg['z_sqdif'] = z_sqdif # Square distance along x-axis x_pairs = scipy.array(list(itertools.combinations(xvals, r=2))) x_sqdif = scipy.square(x_pairs[:, 0] - x_pairs[:, 1]) # Square distance along y-axis y_pairs = scipy.array(list(itertools.combinations(yvals, r=2))) y_sqdif = scipy.square(y_pairs[:, 0] - y_pairs[:, 1]) # Linear distance between points xy_dist = scipy.sqrt(x_sqdif + y_sqdif) vg['xy_dist'] = xy_dist vg.to_csv('hw5_variogram_data.csv', index=False)
def euclidean(h1, h2): # 9 us @array, 33 us @list \w 100 bins r""" Equal to Minowski distance with :math:`p=2`. See also -------- minowski """ h1, h2 = __prepare_histogram(h1, h2) return math.sqrt(scipy.sum(scipy.square(scipy.absolute(h1 - h2))))
def conduit_lengths(network, throats=None, mode='pore'): r""" Return the respective lengths of the conduit components defined by the throat conns P1 T P2 mode = 'pore' - uses pore coordinates mode = 'centroid' uses pore and throat centroids """ if throats is None: throats = network.throats() Ps = network['throat.conns'] pdia = network['pore.diameter'] if mode == 'centroid': try: pcentroids = network['pore.centroid'] tcentroids = network['throat.centroid'] if _sp.sum(_sp.isnan(pcentroids)) + _sp.sum( _sp.isnan(tcentroids)) > 0: mode = 'pore' else: plen1 = _sp.sqrt( _sp.sum(_sp.square(pcentroids[Ps[:, 0]] - tcentroids), 1)) - network['throat.length'] / 2 plen2 = _sp.sqrt( _sp.sum(_sp.square(pcentroids[Ps[:, 1]] - tcentroids), 1)) - network['throat.length'] / 2 except KeyError: mode = 'pore' if mode == 'pore': #Find half-lengths of each pore pcoords = network['pore.coords'] # Find the pore-to-pore distance, minus the throat length lengths = _sp.sqrt( _sp.sum(_sp.square(pcoords[Ps[:, 0]] - pcoords[Ps[:, 1]]), 1)) - network['throat.length'] # Calculate the fraction of that distance from the first pore try: fractions = pdia[Ps[:, 0]] / (pdia[Ps[:, 0]] + pdia[Ps[:, 1]]) except: fractions = 0.5 plen1 = lengths * fractions plen2 = lengths * (1 - fractions) return _sp.vstack((plen1, network['throat.length'], plen2)).T[throats]
def calculateELBO(self): Z = self.markov_blanket["Z"].getExpectation() Wtmp = self.markov_blanket["SW"].getExpectations() Ztmp = self.markov_blanket["Z"].getExpectations() zeta = self.params["zeta"] SW, SWW = Wtmp["E"], Wtmp["ESWW"] Z, ZZ = Ztmp["E"], Ztmp["E2"] mask = self.getMask() # Compute Lower Bound using the Bernoulli likelihood and the observed data # BOTH ARE WRONG AS THEY EXCHANGE LOG AND EXPECTATIONS # lb = self.obs.data*tmp - s.log(1.+s.exp(tmp)) # lb = s.log(1.+s.exp(-(2.*self.obs-1)*tmp)) # DAMIEN'S suggestion # lb[mask] = 0. # Compute Lower Bound using the gaussian likelihood with pseudo data # MISSING CONSTANT TERM # term1 = 0.5*s.log(self.params["zeta"]) # term2 = 0.5*self.params["zeta"]*(self.E-tmp)**2 # lb = term1 - term2 # lb[mask] = 0. # Compute Evidence Lower Bound using the lower bound to the likelihood # calculate E(Z)E(W) ZW = Z.dot(SW.T) ZW[mask] = 0. # Calculate E[(ZW_nd)^2] # this is equal to E[\sum_{k != k} z_k w_k z_k' w_k'] + E[\sum_{k} z_k^2 w_k^2] tmp1 = s.square(ZW) - s.dot(s.square(Z),s.square(SW).T) # this is for terms in k != k' tmp2 = ZZ.dot(SWW.T) # this is for terms in k = k' EZZWW = tmp1 + tmp2 # calculate elbo terms term1 = 0.5 * ((2.*self.obs.data - 1.)*ZW - zeta) term2 = - s.log(1 + s.exp(-zeta)) term3 = - 1/(4 * zeta) * s.tanh(zeta/2.) * (EZZWW - zeta**2) lb = term1 + term2 + term3 lb[mask] = 0. return lb.sum()
def _has_converged(self, prev_ranks, ranks): ranks = np.array(ranks) if self.error_type == "msqrt": return (scipy.square(ranks - prev_ranks).sum()/ranks.size)**0.5 < self.tol elif self.error_type == "mabs": return scipy.absolute(ranks - prev_ranks).sum()/ranks.size < self.tol elif self.error_type == "small_value": return scipy.absolute(ranks).sum()/ranks.size < self.tol else: raise Exception("Supported error types: msqrt, mabs")
def calcglobalcost_WDTW(self,Wmax,g): """ The function calcualtes the weighted DTW version Takes max wieght and restriction constant g (the smaller g is the constrained the DTW becomes)""" self.Globalcost_[0,0]=math.square(self.FirstVector_[0]-self.SecondVector_[0]) temp=0 for i in range(1,self.FirstVector_.shape[0]): weight=Wmax/(1+math.exp(-g)) self.Globalcost_[0,i]=math.square(weight*(self.FirstVector_[i]-self.SecondVector_[0]))+self.Globalcost_[0,i-1] for i in range(1,self.SecondVector_.shape[0]): weight=Wmax/(1+math.exp(-g)) self.Globalcost_[i,0]=math.square(weight*(self.SecondVector_[i]-self.FirstVector_[0]))+self.Globalcost_[i-1,0] for i in range(1,self.SecondVector_.shape[0]): for j in range(1,self.FirstVector_.shape[0]): weight=Wmax/(1+math.exp(-g*math.absolute((i-j)))) self.Globalcost_[i,j]=math.square(weight*(self.FirstVector_[j]-self.SecondVector_[i])) temp=math.minimum(self.Globalcost_[i-1,j],self.Globalcost_[i,j-1]) temp=math.minimum(self.Globalcost_[i-1,j-1],temp) self.Globalcost_[i,j]+=temp
def __fit(index1, index2): from scipy import stats, sqrt, square # do the fit (cijFitted, intercept, r, tt, stderr) = stats.linregress(strain[:, index2 - 1], stress[:, index1 - 1]) (vmajor, vminor, vmicro) = re.split('\.', S.__version__) if (vmajor > 0 or vminor >= 7): error = stderr else: # correct for scipy weirdness - see http://www.scipy.org/scipy/scipy/ticket/8 # This was fixed before 0.7.0 release. Maybe in some versions of 0.6.x too - # will report huge errors if the check is wrong stderr = S.sqrt((numsteps * stderr**2) / (numsteps - 2)) error = stderr / sqrt(sum(square(strain[:, index2 - 1]))) # print info about the fit print '\n' print 'Cij (gradient) : ', cijFitted print 'Error in Cij : ', error print 'Intercept : ', intercept if abs(r) > 0.9: print 'Correlation coefficient : ', r else: print 'Correlation coefficient : ', r, ' <----- WARNING' # if using graphics, add a subplot if options.graphics: # position this plot in a 6x6 grid sp = P.subplot(6, 6, 6 * (index1 - 1) + index2) sp.set_axis_on() # change the labels on the axes xlabels = sp.get_xticklabels() P.setp(xlabels, 'rotation', 90, fontsize=7) ylabels = sp.get_yticklabels() P.setp(ylabels, fontsize=7) # colour the plot depending on the strain pattern sp.set_axis_bgcolor(colourDict[patt]) # plot the data P.plot([ strain[0, index2 - 1], strain[numsteps - 1, index2 - 1] ], [ cijFitted * strain[0, index2 - 1] + intercept, cijFitted * strain[numsteps - 1, index2 - 1] + intercept ]) P.plot(strain[:, index2 - 1], stress[:, index1 - 1], 'ro') return cijFitted, error
def cylinders(shape: List[int], radius: int, nfibers: int, phi_max: float = 0, theta_max: float = 90): r""" Generates a binary image of overlapping cylinders. This is a good approximation of a fibrous mat. Parameters ---------- phi_max : scalar A value between 0 and 90 that controls the amount that the fibers lie out of the XY plane, with 0 meaning all fibers lie in the XY plane, and 90 meaning that fibers are randomly oriented out of the plane by as much as +/- 90 degrees. theta_max : scalar A value between 0 and 90 that controls the amount rotation in the XY plane, with 0 meaning all fibers point in the X-direction, and 90 meaning they are randomly rotated about the Z axis by as much as +/- 90 degrees. Returns ------- A boolean array with True values denoting the pore space """ shape = sp.array(shape) if sp.size(shape) == 1: shape = sp.full((3, ), int(shape)) elif sp.size(shape) == 2: raise Exception("2D fibers don't make sense") im = sp.zeros(shape) R = sp.sqrt(sp.sum(sp.square(shape))) n = 0 while n < nfibers: x = sp.rand(3) * shape phi = sp.deg2rad(90 + 90 * (0.5 - sp.rand()) * phi_max / 90) theta = sp.deg2rad(180 - 90 * (0.5 - sp.rand()) * 2 * theta_max / 90) X0 = R * sp.array([ sp.sin(theta) * sp.cos(phi), sp.sin(theta) * sp.sin(phi), sp.cos(theta) ]) [X0, X1] = [X0 + x, -X0 + x] crds = line_segment(X0, X1) lower = ~sp.any(sp.vstack(crds).T < [0, 0, 0], axis=1) upper = ~sp.any(sp.vstack(crds).T >= shape, axis=1) valid = upper * lower if sp.any(valid): im[crds[0][valid], crds[1][valid], crds[2][valid]] = 1 n += 1 im = sp.array(im, dtype=bool) dt = spim.distance_transform_edt(~im) < radius return ~dt
def chi_square(h1, h2): # 23 us @array, 49 us @list \w 100 r""" Chi-square distance. Measure how unlikely it is that one distribution (histogram) was drawn from the other. The Chi-square distance between two histograms :math:`H` and :math:`H'` of size :math:`m` is defined as: .. math:: d_{\chi^2}(H, H') = \sum_{m=1}^M \frac{ (H_m - H'_m)^2 }{ H_m + H'_m } *Attributes:* - semimetric *Attributes for normalized histograms:* - :math:`d(H, H')\in[0, 2]` - :math:`d(H, H) = 0` - :math:`d(H, H') = d(H', H)` *Attributes for not-normalized histograms:* - :math:`d(H, H')\in[0, \infty)` - :math:`d(H, H) = 0` - :math:`d(H, H') = d(H', H)` *Attributes for not-equal histograms:* - not applicable Parameters ---------- h1 : sequence The first histogram. h2 : sequence The second histogram. Returns ------- chi_square : float Chi-square distance. """ h1, h2 = __prepare_histogram(h1, h2) old_err_state = scipy.seterr(invalid='ignore') # divide through zero only occurs when the bin is zero in both histograms, in which case the division is 0/0 and leads to (and should lead to) 0 result = scipy.square(h1 - h2) / (h1 + h2) scipy.seterr(**old_err_state) result[scipy.isnan(result)] = 0 # faster than scipy.nan_to_num, which checks for +inf and -inf also return scipy.sum(result)
def ASM(glcm): '''Calculates Angular Second Moment from a GLCM''' # value = 0 # for r in range(glcm.shape[0]): # for c in range(glcm.shape[1]): # if glcm[r,c] != 0: # value += glcm[r,c]**2 value = sp.square(glcm).sum() return value
def loss_to_pair(self, pair, atg_a, atg_b, pl_exp=4, gamma=1e2): dist = sp.sqrt( sp.add(sp.square(pair.tx_x), sp.add(sp.square(pair.tx_y), sp.square(self.h)))) phi = sp.multiply(sp.divide(180, sp.pi), sp.arcsin(sp.divide(self.h, dist))) pr_LOS = sp.divide( 1, sp.add( 1, sp.multiply( atg_a, sp.exp(sp.multiply(-atg_b, sp.subtract(phi, atg_a)))))) pr_NLOS = sp.subtract(1, pr_LOS) total_loss = sp.add( sp.multiply(pr_LOS, sp.power(dist, -pl_exp)), sp.multiply(sp.multiply(pr_NLOS, gamma), sp.power(dist, -pl_exp))) return total_loss
def calcglobalcost_UDTW(self): self.Globalcost_[0, 0] = math.square(self.FirstVector_[0] - self.SecondVector_[0]) temp = 0 for i in range(1, self.FirstVector_.shape[0]): self.Globalcost_[0, i] = math.square( self.FirstVector_[i] - self.SecondVector_[0]) + self.Globalcost_[0, i - 1] for i in range(1, self.SecondVector_.shape[0]): self.Globalcost_[i, 0] = math.square( self.SecondVector_[i] - self.FirstVector_[0]) + self.Globalcost_[i - 1, 0] for i in range(1, self.SecondVector_.shape[0]): for j in range(1, self.FirstVector_.shape[0]): self.Globalcost_[i, j] = math.square(self.FirstVector_[j] - self.SecondVector_[i]) temp = math.minimum(self.Globalcost_[i - 1, j], self.Globalcost_[i, j - 1]) temp = math.minimum(self.Globalcost_[i - 1, j - 1], temp) self.Globalcost_[i, j] += temp
def cost(self, v): if len(np.shape(v)) == 1: v.shape = (1,len(v)) use_fw = self.trainfn == 'fpcd' use_persist = use_fw or self.trainfn == 'pcd' num_points = v.shape[0] # positive phase pos_h_samples = self.propup(v) # negative phase nh0 = self.p[:num_points] if use_persist else pos_h_samples[-1][0] for i in range(self.n): neg_v_samples, neg_h_samples = self.gibbs_hvh(nh0, fw=use_fw) nh0 = neg_h_samples[-1][0] # compute gradients grads = self.grad(v, pos_h_samples, neg_v_samples, neg_h_samples) self.p[:num_points] = nh0 # compute reconstruction error if self.trainfn=='cdn': cost = np.sum(np.square(v - neg_v_samples[0][1])) / self.batch_size else: cost = np.sum(np.square(v - self.gibbs_vhv(v)[0][0][1])) / self.batch_size return cost, grads
def predict_gmm(self, testSamples, featIdx=None, tau=0): """ Function that predict the label for testSamples using the learned model Inputs: testSamples: the samples to be classified featIdx: indices of features to use for classification tau: regularization parameter Outputs: predLabels: the class scores: the decision value for each class """ # Get information from the data nbTestSpl = testSamples.shape[0] # Number of testing samples # Initialization scores = sp.empty((nbTestSpl,self.C)) # If not specified, predict with all features if featIdx is None: idx = range(testSamples.shape[1]) else: idx = list(featIdx) # Allocate storage for decomposition in eigenvalues if self.idxDecomp != idx: self.vp = sp.empty((self.C,len(idx))) # array of eigenvalues self.Q = sp.empty((self.C,len(idx),len(idx))) # array of eigenvectors flagDecomp = True else: flagDecomp = False # Start the prediction for each class for c in xrange(self.C): testSamples_c = testSamples[:,idx] - self.mean[c,idx] if flagDecomp: self.vp[c,:],self.Q[c,:,:],_ = self.decomposition(self.cov[c,idx,:][:,idx]) regvp = self.vp[c,:] + tau logdet = sp.sum(sp.log(regvp)) cst = logdet - 2*sp.log(self.prop[c]) # Pre compute the constant term # compute ||lambda^{-0.5}q^T(x-mu)||^2 + cst for all samples scores[:,c] = sp.sum( sp.square( sp.dot( (self.Q[c,:,:][:,:]/sp.sqrt(regvp)).T, testSamples_c.T ) ), axis=0 ) + cst del testSamples_c self.idxDecomp = idx # Assign the label to the minimum value of scores predLabels = sp.argmin(scores,1)+1 return predLabels,scores
def updateParameters(self, ix=None, ro=1.): """ Public method to update the nodes parameters Optional arguments for stochastic updates are: - ix: list of indices of the minibatch - ro: step size of the natural gradient ascent """ # Get expectations from other nodes W = self.markov_blanket["W"].getExpectations() Y = self.markov_blanket["Y"].get_mini_batch() tau = self.markov_blanket["Tau"].get_mini_batch() mask = [ self.markov_blanket["Y"].nodes[m].getMask() for m in range(len(Y)) ] if "MuZ" in self.markov_blanket: Mu = self.markov_blanket['MuZ'].get_mini_batch() else: Mu = self.P.getParameters()["mean"] if ix is not None: Mu = Mu[ix] if "AlphaZ" in self.markov_blanket: Alpha = self.markov_blanket['AlphaZ'].get_mini_batch() else: Alpha = 1. / self.P.params['var'] if ix is not None: Alpha = Alpha[ix, :] # Get parameters of current node Q = self.Q.getParameters() Qmean, Qvar = Q['mean'], Q['var'] if ix is not None: self.mini_batch = {} Qmean = Qmean[ix, :] Qvar = Qvar[ix, :] # Compute updates par_up = self._updateParameters(Y, W, tau, Mu, Alpha, Qmean, Qvar, mask) # Update parameters if ix is None: Q['mean'] = par_up['Qmean'] Q['var'] = par_up['Qvar'] else: self.mini_batch['E'] = par_up['Qmean'] self.mini_batch['E2'] = s.square(par_up['Qmean']) + par_up['Qvar'] Q['mean'][ix, :] = par_up['Qmean'] Q['var'][ix, :] = par_up['Qvar'] self.Q.setParameters( mean=Q['mean'], var=Q['var'] ) # NOTE should not be necessary but safer to keep for now
def cosine(h1, h2): # 17 us @array, 42 us @list \w 100 bins """ Compute the angle between the two histograms in vector space irrespective of their length. The cosine similarity between two histograms \f$H\f$ and \f$H'\f$ of size \f$m\f$ is defined as \f[ d_{\cos}(H, H') = \cos\alpha = \frac{H * H'}{\|H\| \|H'\|} = \frac{\sum_{m=1}^M H_m*H'_m}{\sqrt{\sum_{m=1}^M H_m^2} * \sqrt{\sum_{m=1}^M {H'}_m^2}} \f] Attributes: - not a metric, a similarity Attributes for normalized histograms: - \f$d(H, H')\in[0, 1]\f$ - \f$d(H, H) = 1\f$ - \f$d(H, H') = d(H', H)\f$ Attributes for not-normalized histograms: - \f$d(H, H')\in[-1, 1]\f$ - \f$d(H, H) = 1\f$ - \f$d(H, H') = d(H', H)\f$ @note The resulting similarity ranges from -1 meaning exactly opposite, to 1 meaning exactly the same, with 0 usually indicating independence, and in-between values indicating intermediate similarity or dissimilarity. Attributes for not-equal histograms: - not applicable @param h1 the first histogram @type h1 array-like sequence @param h2 the second histogram, same bins as h1 @type h2 array-like sequence @return cosine similarity (in radiands) @rtype float """ h1, h2 = __prepare_histogram(h1, h2) return scipy.sum(h1 * h2) / math.sqrt(scipy.sum(scipy.square(h1)) * scipy.sum(scipy.square(h2)))
def quadratic_utility(x, y): """Quadratic utility function Notes ---------- .. math:: u(x, y) = - \sum_{i=1}^n (x_i - y_i)^2 """ u = - sp.square((x - y)).sum() return u
def pair_dist(rand_pair, sub_files, reg_var, len_time=235): """ Pair distance """ sub1_data = spio.loadmat(sub_files[rand_pair[0]])['dtseries'].T sub2_data = spio.loadmat(sub_files[rand_pair[1]])['dtseries'].T sub1_data, _, _ = normalizeData(sub1_data[:len_time, :]) sub2_data, _, _ = normalizeData(sub2_data[:len_time, :]) sub2_data, _ = brainSync(X=sub1_data, Y=sub2_data) fmri_diff = sp.sum((sub2_data - sub1_data)**2, axis=0) regvar_diff = sp.square(reg_var[rand_pair[0]] - reg_var[rand_pair[1]]) return fmri_diff, regvar_diff
def calcglobalcost_DDTW(self): derv1=npy.array(self.FirstVector_) derv2=npy.array(self.SecondVector_) for indx in range(1,derv1.shape[0]-1): derv1[indx]=((self.FirstVector_[indx+1]-self.FirstVector_[indx-1])/2+(self.FirstVector_[indx]-self.FirstVector_[indx-1]))/2 for indx in range(1,derv2.shape[0]-1): derv2[indx]=((self.SecondVector_[indx+1]-self.SecondVector_[indx-1])/2+(self.SecondVector_[indx]-self.SecondVector_[indx-1]))/2 derv1[0]=derv1[1] derv1[derv1.shape[0]-1]=derv1[derv1.shape[0]-2] derv2[0]=derv2[1] derv2[derv2.shape[0]-1]=derv2[derv2.shape[0]-2] self.Globalcost_[0,0]=math.square(derv1[0]-derv2[0]) temp=0; for i in range(1,self.FirstVector_.shape[0]): self.Globalcost_[0,i]=math.square(derv1[i]-derv2[0])+self.Globalcost_[0,i-1] for i in range(1,self.SecondVector_.shape[0]): self.Globalcost_[i,0]=math.square(derv2[i]-derv1[0])+self.Globalcost_[i-1,0] for i in range(1,self.SecondVector_.shape[0]): for j in range(1,self.FirstVector_.shape[0]): self.Globalcost_[i,j]=math.square(derv1[j]-derv2[i]) temp=math.minimum(self.Globalcost_[i-1,j],self.Globalcost_[i,j-1]) temp=math.minimum(self.Globalcost_[i-1,j-1],temp) self.Globalcost_[i,j]+=temp
def get_AIC(self): """this method returns the corrected Akaike information criterion (Glatting 07). It is only available after a successful fit""" if self.rc > 0: n = self.fit.size aic = n * sp.log(sp.sum(sp.square(self.residuals)) / n) +2 * (self.nooffreeparameters + 1) +2 * (self.nooffreeparameters + 1) * (self.nooffreeparameters + 2) / (n - self.nooffreeparameters - 2) return aic else: return False
def rbf_exe(net, x): Nin = np.size(x, 0) Nmes = np.size(x, 1) Nhid = np.size(net.centers, 0) V1 = np.zeros((Nhid, Nmes)) for i in range(Nin): temp1 = net.IW[:, i, None].dot(x[None, i, :]) temp2 = np.multiply(net.IW[:, i, None], net.centers[:, i, None]) # temp3 = np.square(temp1-(temp2)*np.ones((1,Nmes))) temp3 = np.square(temp1 - temp2) V1 = V1 + temp3 Y1 = np.exp(-V1) Y2 = net.OW.dot(Y1) return Y2, Y1, V1
def run(self, phase=None): r''' ''' logger.warning('This algorithm can take some time...') graph = self._net.create_adjacency_matrix( data=self._net['throat.length'], sprsfmt='csr') if phase is not None: self._phase = phase if 'throat.occupancy' in self._phase.props(): temp = self._net['throat.length'] * ( self._phase['throat.occupancy'] == 1) graph = self._net.create_adjacency_matrix(data=temp, sprsfmt='csr', prop='temp') #self._net.tic() path = spgr.shortest_path(csgraph=graph, method='D', directed=False) #self._net.toc() Px = sp.array(self._net['pore.coords'][:, 0], ndmin=2) Py = sp.array(self._net['pore.coords'][:, 1], ndmin=2) Pz = sp.array(self._net['pore.coords'][:, 2], ndmin=2) Cx = sp.square(Px.T - Px) Cy = sp.square(Py.T - Py) Cz = sp.square(Pz.T - Pz) Ds = sp.sqrt(Cx + Cy + Cz) temp = path / Ds #temp = path temp[sp.isnan(temp)] = 0 temp[sp.isinf(temp)] = 0 return temp
def calculateELBO(self): """ Compute Lower Bound """ Wtmp = self.markov_blanket["SW"].getExpectations() Ztmp = self.markov_blanket["Z"].getExpectations() W, WW = Wtmp["E"], Wtmp["ESWW"] Z, ZZ = Ztmp["E"], Ztmp["E2"] zeta = self.params["zeta"] tau = self.markov_blanket["Tau"].getValue() # to-do: not expand mask = self.getMask() # Precompute terms ZW = Z.dot(W.T) ZZWW = s.square(ZW) - s.dot(s.square(Z),s.square(W).T) + ZZ.dot(WW.T) # term1 = 0.5*tau*(ZW - zeta)**2 term1 = 0.5*tau*(ZZWW - 2*ZW*zeta + s.square(zeta)) term2 = (ZW - zeta)*(sigmoid(zeta)*(1-self.obs/self.ratefn(zeta))) term3 = self.ratefn(zeta) - self.obs*s.log(self.ratefn(zeta)) elbo = -(term1 + term2 + term3) elbo[mask] = 0. return elbo.sum()
def get_AIC(self): """this method returns the corrected Akaike information criterion (Glatting 07). It is only available after a successful fit""" if self._fitted: n = self.fit.size npar = len(self._parameters) aic = n * sp.log(sp.sum(sp.square(self.residuals)) / n) +2 * (npar + 1) +2 * (npar + 1) * \ (npar + 2) / \ (n - npar - 2) return aic else: return False
def locgpd(mesh1, mesh2, R_0=None, M_0=None, max_iter=1000, mirror=False): # print out which mesh to work with #print(mesh1.name) #print(mesh2.name) # number of vertices N = len(mesh1.vertices) # V1 and V2 are of size 3 * N V1 = mesh1.vertices.T V2 = mesh2.vertices.T if M_0 is None: M_0 = np.ones([N, N]) # compute distance D = distance.cdist(V1.T, np.dot(R_0, V2).T) MD2 = square(D) # compute initial mappings and correspondences P_0, trash = Correspondence.linassign(M_0, MD2) # compute loops P = P_0 R = R_0 d = np.linalg.norm(V1 - np.dot(R, V2 @ P)) P_prev = sp.eye(N) gamma = 0 i = 0 while np.linalg.norm((abs(P - P_prev).sum(axis=0)).sum(axis=1)) > 0: # norm(P - P_prev) i += 1 P_prev = P R_prev = R d_prev = d # Do Kabsch newV2 = V2 @ P_prev R = Correspondence.Kabsch(V1.T, newV2.T) d = np.linalg.norm(V1 - np.dot(R, newV2)) # Do Hungary gamma = 1.5 * Correspondence.ltwoinf(V1 - np.dot(R, newV2)) M, MD2 = Correspondence.jrangesearch(V1.T, np.dot(R, V2).T, gamma) P, trash = Correspondence.linassign(M, MD2) #d = np.linalg.norm(V1 - np.dot(R, V2@P)) if i > max_iter or abs(d_prev - d) < (0.00001 * d_prev): break else: if i % 100 == 0: print("Current error is: ", d) return {'d': d, 'r': R, 'p': P, 'g': gamma}
def __call__(self, Xi, Xj, ni, nj, hyper_deriv=None, symmetric=False): """Evaluate the covariance between points `Xi` and `Xj` with derivative order `ni`, `nj`. Parameters ---------- Xi : :py:class:`Matrix` or other Array-like, (`M`, `D`) `M` inputs with dimension `D`. Xj : :py:class:`Matrix` or other Array-like, (`M`, `D`) `M` inputs with dimension `D`. ni : :py:class:`Matrix` or other Array-like, (`M`, `D`) `M` derivative orders for set `i`. nj : :py:class:`Matrix` or other Array-like, (`M`, `D`) `M` derivative orders for set `j`. hyper_deriv : Non-negative int or None, optional The index of the hyperparameter to compute the first derivative with respect to. If None, no derivatives are taken. Hyperparameter derivatives are not supported at this point. Default is None. symmetric : bool Whether or not the input `Xi`, `Xj` are from a symmetric matrix. Default is False. Returns ------- Kij : :py:class:`Array`, (`M`,) Covariances for each of the `M` `Xi`, `Xj` pairs. Raises ------ NotImplementedError If the `hyper_deriv` keyword is not None. """ if hyper_deriv is not None: raise NotImplementedError( "Hyperparameter derivatives have not been implemented!") if scipy.any(scipy.sum(ni, axis=1) > 1) or scipy.any( scipy.sum(nj, axis=1) > 1): raise ValueError( "Matern52Kernel only supports 0th and 1st order derivatives") Xi = scipy.asarray(Xi, dtype=scipy.float64) Xj = scipy.asarray(Xj, dtype=scipy.float64) ni = scipy.array(ni, dtype=scipy.int32) nj = scipy.array(nj, dtype=scipy.int32) var = scipy.square(self.params[-self.num_dim:]) value = _matern52(Xi, Xj, ni, nj, var) return self.params[0]**2 * value
def print_verbose_message(self, i): """Method to print training statistics if Verbose is TRUE""" # Memory usage (does not work in Windows) # print('Peak memory usage: %.2f MB' % (resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / infer_platform() )) # Variance explained r2 = s.asarray( self.calculate_variance_explained(total=True)).mean(axis=0) r2[r2 < 0] = 0. print("- Variance explained: " + " ".join([ "View %s: %.2f%%" % (m, 100 * r2[m]) for m in range(self.dim["M"]) ])) # Sparsity levels of the weights W = self.nodes["W"].getExpectation() foo = [s.mean(s.absolute(W[m]) < 1e-3) for m in range(self.dim["M"])] print("- Fraction of zero weights: " + " ".join([ "View %s: %.0f%%" % (m, 100 * foo[m]) for m in range(self.dim["M"]) ])) # Correlation between factors Z = self.nodes["Z"].getExpectation() Z += s.random.normal(s.zeros(Z.shape), 1e-10) r = s.absolute(corr(Z.T, Z.T)) s.fill_diagonal(r, 0) print("- Maximum correlation between factors: %.2f" % (s.nanmax(r))) # Factor norm bar = s.mean(s.square(Z), axis=0) print("- Factor norms: " + " ".join(["%.2f" % bar[k] for k in range(Z.shape[1])])) # Tau tau = self.nodes["Tau"].getExpectation() print("- Tau per view (average): " + " ".join([ "View %s: %.2f" % (m, tau[m].mean()) for m in range(self.dim["M"]) ])) #Sigma: if 'Sigma' in self.nodes.keys(): sigma = self.nodes["Sigma"] if i >= sigma.start_opt and i % sigma.opt_freq == 0: print('Sigma node has been optimised:\n- Lengthscales = %s \n- Scale = %s' % \ (np.array2string(sigma.get_ls(), precision=2, separator=", "), np.array2string(1-sigma.get_zeta(), precision=2, separator=", "))) print("\n")
def Windowed_UDTW(self): self.WarpingPath_.append((0, 0)) i = 0 j = 0 while ((i < self.FirstVector_.shape[0]) or (j < self.SecondVector_.shape[0])): if i != self.FirstVector_.shape[ 0] - 1 and j != self.SecondVector_.shape[0] - 1: if math.square(self.FirstVector_[i] - self.SecondVector_[j + 1] ) >= math.square(self.FirstVector_[i + 1] - self.SecondVector_[j + 1]): if math.square(self.FirstVector_[i + 1] - self.SecondVector_[j]) >= math.square( self.FirstVector_[i + 1] - self.SecondVector_[j + 1]): j += 1 i += 1 self.WarpingPath_.append((i, j)) else: i += 1 self.WarpingPath_.append((i, j)) else: if math.square(self.FirstVector_[i] - self.SecondVector_[j + 1]) >= math.square( self.FirstVector_[i + 1] - self.SecondVector_[j]): i += 1 self.WarpingPath_.append((i, j)) else: j += 1 self.WarpingPath_.append((i, j)) else: if i == self.FirstVector_.shape[ 0] - 1 and j != self.SecondVector_.shape[0] - 1: while (j < self.SecondVector_.shape[0] - 1): j += 1 self.WarpingPath_.append((i, j)) else: if i != self.FirstVector_.shape[ 0] - 1 and j == self.SecondVector_.shape[0] - 1: while (i < self.FirstVector_.shape[0] - 1): i += 1 self.WarpingPath_.append((i, j)) else: i += 1 j += 1 self.Warpindexoffv_, self.Warpindexofsv_ = map(list, zip(*self.WarpingPath_)) self.Warpindexoffv_ = npy.array(self.Warpindexoffv_) self.Warpindexofsv_ = npy.array(self.Warpindexofsv_) for i in self.Warpindexoffv_: self.Warpedfv = npy.append(self.Warpedfv, [self.FirstVector_[i]]) for i in self.Warpindexofsv_: self.Warpedsv = npy.append(self.Warpedsv, [self.SecondVector_[i]])
def randpairsdist_reg(bfp_path, sub_files, reg_var, num_pairs=1000, len_time=235): """ Perform regression stats based on square distance between random pairs """ print('dist2atlas_reg, assume that the data is normalized') print('This function is deprecated!!!!!!!!!!') # Get the number of vertices from a file num_vert = spio.loadmat(sub_files[0])['dtseries'].shape[0] # Generate random pairs rand_pairs = sp.random.choice(len(sub_files), (num_pairs, 2), replace=True) fmri_diff = sp.zeros((num_vert, num_pairs)) regvar_diff = sp.zeros(num_pairs) print('Reading subjects') # Compute distance to atlas for ind in tqdm(range(num_pairs)): sub1_data = spio.loadmat(sub_files[rand_pairs[ind, 0]])['dtseries'].T sub2_data = spio.loadmat(sub_files[rand_pairs[ind, 1]])['dtseries'].T sub1_data, _, _ = normalizeData(sub1_data[:len_time, :]) sub2_data, _, _ = normalizeData(sub2_data[:len_time, :]) sub2_data, _ = brainSync(X=sub1_data, Y=sub2_data) fmri_diff[:, ind] = sp.sum((sub2_data - sub1_data)**2, axis=0) regvar_diff[ind] = sp.square(reg_var[rand_pairs[ind, 0]] - reg_var[rand_pairs[ind, 1]]) corr_pval = sp.zeros(num_vert) for ind in tqdm(range(num_vert)): _, corr_pval[ind] = sp.stats.pearsonr(fmri_diff[ind, :], regvar_diff) corr_pval[sp.isnan(corr_pval)] = .5 labs = spio.loadmat(bfp_path + '/supp_data/USCBrain_grayord_labels.mat' )['labels'].squeeze() corr_pval_fdr = sp.zeros(num_vert) _, corr_pval_fdr[labs > 0] = fdrcorrection(corr_pval[labs > 0]) return corr_pval, corr_pval_fdr
def chi_square(h1, h2): # 23 us @array, 49 us @list \w 100 """ Measure how unlikely it is that one distribution (histogram) was drawn from the other. The Chi-square distance between two histograms \f$H\f$ and \f$H'\f$ of size \f$m\f$ is defined as \f[ d_{\chi^2}(H, H') = \sum_{m=1}^M \frac{ (H_m - H'_m)^2 }{ H_m + H'_m } \f] Attributes: - semimetric Attributes for normalized histograms: - \f$d(H, H')\in[0, 2]\f$ - \f$d(H, H) = 0\f$ - \f$d(H, H') = d(H', H)\f$ Attributes for not-normalized histograms: - \f$d(H, H')\in[0, \infty)\f$ - \f$d(H, H) = 0\f$ - \f$d(H, H') = d(H', H)\f$ Attributes for not-equal histograms: - not applicable @param h1 the first histogram @type h1 array-like sequence @param h2 the second histogram @type h2 array-like sequence @return chi-square distance @rtype float """ h1, h2 = __prepare_histogram(h1, h2) old_err_state = scipy.seterr( invalid="ignore" ) # divide through zero only occurs when the bin is zero in both histograms, in which case the division is 0/0 and leads to (and should lead to) 0 result = scipy.square(h1 - h2) / (h1 + h2) scipy.seterr(**old_err_state) result[scipy.isnan(result)] = 0 # faster than scipy.nan_to_num, which checks for +inf and -inf also return scipy.sum(result)
def pair_dist_simulation(rand_pair, sub_files, sub_data=[], reg_var=[], len_time=235, roi=[]): """ Pair distance """ # normalize the clinical variable reg_var_norm, _, _ = normalizeData(reg_var) roi_ind, _ = np.where(roi) noise_data = (reg_var_norm - np.min(reg_var_norm)) * np.random.normal( size=(len(roi_ind), len_time, len(reg_var))) sub_data = np.array(sub_data) if sub_data.size > 0: sub1_data = sub_data[:, :, rand_pair[0]] sub2_data = sub_data[:, :, rand_pair[1]] sub1_data, _, _ = normalizeData(sub1_data[:len_time, :]) sub2_data, _, _ = normalizeData(sub2_data[:len_time, :]) sub1_data += noise_data[:, :, rand_pair[0]] sub2_data += noise_data[:, :, rand_pair[1]] sub1_data, _, _ = normalizeData(sub1_data[:len_time, :]) sub2_data, _, _ = normalizeData(sub2_data[:len_time, :]) else: sub1_data = spio.loadmat(sub_files[rand_pair[0]])['dtseries'].T sub2_data = spio.loadmat(sub_files[rand_pair[1]])['dtseries'].T sub1_data, _, _ = normalizeData(sub1_data[:len_time, :]) sub2_data, _, _ = normalizeData(sub2_data[:len_time, :]) sub1_data[:len_time, roi_ind] += noise_data[:, :, rand_pair[0]].T sub2_data[:len_time, roi_ind] += noise_data[:, :, rand_pair[1]].T sub1_data, _, _ = normalizeData(sub1_data[:len_time, :]) sub2_data, _, _ = normalizeData(sub2_data[:len_time, :]) sub2_data, _ = brainSync(X=sub1_data, Y=sub2_data) fmri_diff = sp.sum((sub2_data - sub1_data)**2, axis=0) # Returns SQUARE of the distance if len(reg_var) > 0: regvar_diff = sp.square(reg_var[rand_pair[0]] - reg_var[rand_pair[1]]) return fmri_diff, regvar_diff else: return fmri_diff
def calculate_variance_explained(self, total=False): # Collect relevant expectations Z = self.nodes['Z'].getExpectation() W = self.nodes["W"].getExpectation() Y = self.nodes["Y"].getExpectation() # Get groups groups = self.nodes[ "AlphaZ"].groups if "AlphaZ" in self.nodes else s.array( [0] * self.dim['N']) # to maintain correct ordering of groups in R2 unique_groups, idx = np.unique(groups, return_index=True) unique_groups = unique_groups[np.argsort(idx)] if total: r2 = [s.zeros(self.dim['M']) for g in range(self.dim['G'])] else: r2 = [ s.zeros([self.dim['M'], self.dim['K']]) for g in range(self.dim['G']) ] for m in range(self.dim['M']): mask = self.nodes["Y"].getNodes()[m].getMask(full=True) for g in range(self.dim['G']): gidx = unique_groups[g] gg = groups == gidx SS = s.square(Y[m][gg, :]).sum() # Total variance explained (using all factors) if total: Ypred = s.dot(Z[gg, :], W[m].T) Ypred[mask[gg, :]] = 0. Res = s.sum((Y[m][gg, :] - Ypred)**2.) r2[g][m] = 1. - Res / SS # Variance explained per factor else: for k in range(self.dim['K']): Ypred = s.outer(Z[gg, k], W[m][:, k]) Ypred[mask[gg, :]] = 0. Res = s.sum((Y[m][gg, :] - Ypred)**2.) r2[g][m, k] = 1. - Res / SS return r2
def funcEval(self, x, data1): '''assuming x as a 10X784 matrix and every element in x is a float data is a tuple of pixel vectors and its label or what no the vector represent''' self.fn = sp.ones(784) #next two steps are req for calling stack fn self.x_temp = x map(self.stack, data1[1], ['fn' for i in range(len(data1[1]))]) self.fn = sp.delete( self.fn, (0), axis=0) #deleting the first row (of ones created above) fval_large = sp.square(self.fn - data1[0]) #DSX784, where DS is dataset size fval_large = fval_large.sum(axis=1, keepdims=True) #DSX1 fval = sn.measurements.sum(fval_large, data1[1], index=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) fval = sp.reshape(fval, (10, 1)) return fval
def __call__(self, Xi, Xj, ni, nj, hyper_deriv=None, symmetric=False): """Evaluate the covariance between points `Xi` and `Xj` with derivative order `ni`, `nj`. Parameters ---------- Xi : :py:class:`Matrix` or other Array-like, (`M`, `N`) `M` inputs with dimension `N`. Xj : :py:class:`Matrix` or other Array-like, (`M`, `N`) `M` inputs with dimension `N`. ni : :py:class:`Matrix` or other Array-like, (`M`, `N`) `M` derivative orders for set `i`. nj : :py:class:`Matrix` or other Array-like, (`M`, `N`) `M` derivative orders for set `j`. hyper_deriv : Non-negative int or None, optional The index of the hyperparameter to compute the first derivative with respect to. If None, no derivatives are taken. Hyperparameter derivatives are not supported at this point. Default is None. symmetric : bool Whether or not the input `Xi`, `Xj` are from a symmetric matrix. Default is False. Returns ------- Kij : :py:class:`Array`, (`M`,) Covariances for each of the `M` `Xi`, `Xj` pairs. Raises ------ NotImplementedError If the `hyper_deriv` keyword is not None. """ if hyper_deriv is not None: raise NotImplementedError("Hyperparameter derivatives have not been implemented!") if scipy.any(scipy.sum(ni, axis=1) > 1) or scipy.any(scipy.sum(nj, axis=1) > 1): raise ValueError("Matern52Kernel only supports 0th and 1st order derivatives") Xi = scipy.asarray(Xi, dtype=scipy.float64) Xj = scipy.asarray(Xj, dtype=scipy.float64) ni = scipy.array(ni, dtype=scipy.int32) nj = scipy.array(nj, dtype=scipy.int32) var = scipy.square(self.params[-self.num_dim:]) value = _matern52(Xi, Xj, ni, nj, var) return self.params[0]**2 * value
def european_option_vega(self): numerator = sp.add( sp.log( sp.divide( self.spot_price, self.strike_price ) ), sp.multiply( ( self.interest_rate - self.dividend_yield + 0.5*sp.power(self.sigma,2)), self.time_to_maturity ) ) d1 = sp.divide( numerator, sp.prod( [ self.sigma, sp.sqrt(self.time_to_maturity) ], axis=0, ) ) val = sp.multiply( sp.multiply( self.spot_price, sp.exp( -sp.multiply( self.dividend_yield, self.time_to_maturity ) ) ), sp.exp(-sp.square(d1)*0.5) ) val = sp.multiply( val, sp.sqrt(self.time_to_maturity) ) vega = (1/sqrt(2*pi))*val return vega
def __fit(index1, index2): from scipy import stats, sqrt, square # do the fit (cijFitted,intercept,r,tt,stderr) = stats.linregress(strain[:,index2-1],stress[:,index1-1]) # correct for scipy weirdness - see http://www.scipy.org/scipy/scipy/ticket/8 stderr = S.sqrt((numsteps * stderr**2)/(numsteps-2)) error = stderr/sqrt(sum(square(strain[:,index2-1]))) # print info about the fit print '\n' print 'Cij (gradient) : ',cijFitted print 'Error in Cij : ', error if abs(r) > 0.9: print 'Correlation coefficient : ',r else: print 'Correlation coefficient : ',r, ' <----- WARNING' # if using graphics, add a subplot if options.graphics: # position this plot in a 6x6 grid sp = P.subplot(6,6,6*(index1-1)+index2) sp.set_axis_on() # change the labels on the axes xlabels = sp.get_xticklabels() P.setp(xlabels,'rotation',90,fontsize=7) ylabels = sp.get_yticklabels() P.setp(ylabels,fontsize=7) # colour the plot depending on the strain pattern sp.set_axis_bgcolor(colourDict[patt]) # plot the data P.plot([strain[0,index2-1],strain[numsteps-1,index2-1]],[cijFitted*strain[0,index2-1]+intercept,cijFitted*strain[numsteps-1,index2-1]+intercept]) P.plot(strain[:,index2-1],stress[:,index1-1],'ro') return cijFitted