def generateNodesAdaptive(self): innerDomainSize = self.innerDomainSize innerMeshSize = self.innerMeshSize numberElementsInnerDomain = innerDomainSize/innerMeshSize assert(numberElementsInnerDomain < self.numberElements) domainCenter = (self.domainStart+self.domainEnd)/2 nodes0 = np.linspace(domainCenter,innerDomainSize/2.0,(numberElementsInnerDomain/2.0)+1.0) nodes0 = np.delete(nodes0,-1) numberOuterIntervalsFromDomainCenter = (self.numberElements - numberElementsInnerDomain)/2.0 const = np.log2(innerDomainSize/2.0)/0.5 exp = np.linspace(const,np.log2(self.domainEnd*self.domainEnd),numberOuterIntervalsFromDomainCenter+1) nodes1 = np.power(np.sqrt(2),exp) nodesp = np.concatenate((nodes0,nodes1)) nodesn = -nodesp[::-1] nodesn = np.delete(nodesn,-1) linNodalCoordinates = np.concatenate((nodesn,nodesp)) nodalCoordinates = 0 #Introduce higher order nodes if self.elementType == "quadratic" or self.elementType == "cubic": if self.elementType == "quadratic": numberNodesPerElement = 3 elif self.elementType == "cubic": numberNodesPerElement = 4 for i in range(0,len(linNodalCoordinates)-1): newnodes = np.linspace(linNodalCoordinates[i],linNodalCoordinates[i+1],numberNodesPerElement) nodalCoordinates = np.delete(nodalCoordinates,-1) nodalCoordinates = np.concatenate((nodalCoordinates,newnodes)) else: nodalCoordinates = linNodalCoordinates return nodalCoordinates
def diadicPartitions(N): sf = 1 cf = 1 width = 1 pcount = 0 partitions = zeros(round(log2(N)) * 2, dtype=numpy.int) pOff = round(log2(N)) * 2 - 1 while sf < N / 2: ep = cf + width / 2 - 1 sn = N - cf - width / 2 + 1 en = N - cf + width / 2 + 1 if ep > N: ep = N if sn < 0: sn = 0 if width / 2 == 0: ep += 1 sn -= 1 partitions[pcount] = ep partitions[pOff - pcount] = en pcount += 1 sf = sf + width if (sf > 2): width *= 2 cf = sf + width / 2 return partitions
def score(self): self.uniq_docids() fst_term = self.query_terms[0] term_docs_freq = self.im.get_term_info(fst_term).get_pos_map() for docid in self.rank_list: tf = 0 if term_docs_freq.has_key(docid): tf = len(term_docs_freq[docid]) score_this_term = sp.log2(self.parent.score_once(tf, docid)) self.rank_list[docid] += score_this_term previous = 0 for term_current in self.query_terms[1:]: term_previous = self.query_terms[previous] previous += 1 if not (self.im.token_id_map.has_key(term_previous) and self.im.token_id_map.has_key(term_current)): continue term_previous_docs_freq = self.im.get_term_info(term_previous).get_pos_map() term_current_docs_freq = self.im.get_term_info(term_current).get_pos_map() for docid in self.rank_list: tf_w1 = 0 distance_score = sp.inf if term_previous_docs_freq.has_key(docid): tf_w1 = len(term_previous_docs_freq[docid]) distance_score = self.im.get_doc_len_by_id(docid) if term_current_docs_freq.has_key(docid): distance_score = shortest_dis(term_previous_docs_freq[docid], term_current_docs_freq[docid]) score_this_term = sp.log2(self.score_once(tf_w1, distance_score)) self.rank_list[docid] += score_this_term
def calculateLevel(self, s, t): ''' Calculate the appropriate mipmap level for texture filtering over a quadrilateral given by the texture-space vertices [s[1], t[1]], ... , [s[4], t[4]]. There are many ways to do this; the instance variable levelCalcMethod selects the desired one. The most correct way is to choose the minSideLen method, as long as the quadrilateral is vaguely rectangular-shaped. This only works if you're happy to use lots of samples however, otherwise you get aliasing. ''' s = s.copy() * self.levels[0].image.shape[0] t = t.copy() * self.levels[0].image.shape[1] if self.levelCalcMethod == 'minSideLen': # Get mipmap level with minimum feature size equal to the shortest # quadrilateral side s1 = pylab.concatenate((s, s[0:1])) t1 = pylab.concatenate((t, t[0:1])) minSideLen2 = (numpy.diff(s1)**2 + numpy.diff(t1)**2).min() level = log2(minSideLen2) / 2 elif self.levelCalcMethod == 'minQuadWidth': # Get mipmap level with minimum feature size equal to the width of # the quadrilateral. This one is kinda tricky. # v1,v2 = vectors along edges v1 = array([ 0.5 * (s[1] - s[0] + s[2] - s[3]), 0.5 * (t[1] - t[0] + t[2] - t[3]), ]) v2 = array([ 0.5 * (s[3] - s[0] + s[2] - s[1]), 0.5 * (t[3] - t[0] + t[2] - t[1]), ]) v1Sq = dot(v1, v1) v2Sq = dot(v2, v2) level = 0.5 * log2( min(v1Sq, v2Sq) * (1 - dot(v1, v2)**2 / (v1Sq * v2Sq))) elif self.levelCalcMethod == 'minDiag': # Get mipmap level with minimum feature size equal to the minimum # distance between the centre of the quad and the vertices. Sort # of a "quad radius" # # This is more-or-less the algorithm used in Pixie... minDiag2 = ((s - s.mean())**2 + (t - t.mean())**2).min() level = log2(minDiag2) / 2 #elif self.levelCalcMethod == 'sqrtArea': # Get mipmap level with minimum feature size estimated as the # square root of the area of the box. elif self.levelCalcMethod == 'trilinear': # Get mipmap level which will result in no aliasing when plain # trilinear filtering is used (no integration) maxDiag2 = ((s - s.mean())**2 + (t - t.mean())**2).max() level = log2(maxDiag2) / 2 elif self.levelCalcMethod == 'level0': # Else just use level 0. Correct texture filtering will take care # of any aliasing... level = 0 else: raise "Invalid mipmap level calculation type: %s" % self.levelCalcMethod return max(level, 0)
def test_shannon(guys): tot = float(len(guys)) counts = count_digs(guys) entropy = sum( -count/tot*sp.log2(count/tot + 10**(-10)) for count in counts ) return max(1e-10,entropy/sp.log2(10))
def score(self): self.uniq_docids() fst_term = self.query_terms[0] term_docs_freq = term_freq[fst_term] parent = LMLaplace('<<empty query>>') for docid in self.rank_list: tf = 0 if term_docs_freq.has_key(docid): tf = term_docs_freq[docid] score_this_term = sp.log2(parent.score_once(tf, docid)) self.rank_list[docid] += score_this_term previous = 0 for term_current in self.query_terms[1:]: term_previous = self.query_terms[previous] previous += 1 if not (term_freq.has_key(term_previous) and term_freq.has_key(term_current)): continue term_previous_docs_freq = term_freq[term_previous] term_current_docs_freq = term_freq[term_current] for docid in self.rank_list: tf_w1 = 0 distance_score = sp.inf if term_previous_docs_freq.has_key(docid): tf_w1 = term_previous_docs_freq[docid] distance_score = doc_length[docid] if term_current_docs_freq.has_key(docid): distance_score = bigram_distance(docid, term_previous, term_current) score_this_term = sp.log2(self.score_once(tf_w1, distance_score)) self.rank_list[docid] += score_this_term
def set_comparison_plot(): #pl.xlim(xmin = max(0, pl.xlim()[1] -16 )) pyplot.xticks(symbols - 2**scipy.arange(scipy.log2(symbols))[::-1], 2**scipy.arange(scipy.log2(symbols), dtype=int)[::-1]) pyplot.grid('on') plotter.set_slave_info(slavename) pyplot.xlabel("Rank Deficiency") pyplot.ylabel("Extra Packets")
def balaced_ordinal_gen(self, kdim, depth, seed, threads=-1): assert int(2**sp.log2(kdim)) == kdim sp.random.seed(seed) random_matrix = sp.randn(self.feat_mat.shape[1], depth) X = PyMatrix(self.feat_mat.dot(random_matrix)) codes = sp.zeros(X.rows, dtype=sp.uint32) new_depth = depth * int(sp.log2(kdim)) clib.get_codes(X, new_depth, Indexer.KDTREE_CYCLIC, seed, codes, threads=threads) return codes
def set_comparison_plot(): #pl.xlim(xmin = max(0, pl.xlim()[1] -16 )) pyplot.xticks( symbols - 2 ** scipy.arange(scipy.log2(symbols))[::-1], 2 ** scipy.arange(scipy.log2(symbols), dtype=int)[::-1]) pyplot.grid('on') plotter.set_slave_info(slavename) pyplot.xlabel("Rank Deficiency") pyplot.ylabel("Extra Packets")
def calculateLevel(self, s, t): ''' Calculate the appropriate mipmap level for texture filtering over a quadrilateral given by the texture-space vertices [s[1], t[1]], ... , [s[4], t[4]]. There are many ways to do this; the instance variable levelCalcMethod selects the desired one. The most correct way is to choose the minSideLen method, as long as the quadrilateral is vaguely rectangular-shaped. This only works if you're happy to use lots of samples however, otherwise you get aliasing. ''' s = s.copy()*self.levels[0].image.shape[0] t = t.copy()*self.levels[0].image.shape[1] if self.levelCalcMethod == 'minSideLen': # Get mipmap level with minimum feature size equal to the shortest # quadrilateral side s1 = pylab.concatenate((s, s[0:1])) t1 = pylab.concatenate((t, t[0:1])) minSideLen2 = (numpy.diff(s1)**2 + numpy.diff(t1)**2).min() level = log2(minSideLen2)/2 elif self.levelCalcMethod == 'minQuadWidth': # Get mipmap level with minimum feature size equal to the width of # the quadrilateral. This one is kinda tricky. # v1,v2 = vectors along edges v1 = array([0.5*(s[1]-s[0] + s[2]-s[3]), 0.5*(t[1]-t[0] + t[2]-t[3]),]) v2 = array([0.5*(s[3]-s[0] + s[2]-s[1]), 0.5*(t[3]-t[0] + t[2]-t[1]),]) v1Sq = dot(v1,v1) v2Sq = dot(v2,v2) level = 0.5*log2(min(v1Sq,v2Sq) * (1 - dot(v1,v2)**2/(v1Sq*v2Sq))) elif self.levelCalcMethod == 'minDiag': # Get mipmap level with minimum feature size equal to the minimum # distance between the centre of the quad and the vertices. Sort # of a "quad radius" # # This is more-or-less the algorithm used in Pixie... minDiag2 = ((s - s.mean())**2 + (t - t.mean())**2).min() level = log2(minDiag2)/2 #elif self.levelCalcMethod == 'sqrtArea': # Get mipmap level with minimum feature size estimated as the # square root of the area of the box. elif self.levelCalcMethod == 'trilinear': # Get mipmap level which will result in no aliasing when plain # trilinear filtering is used (no integration) maxDiag2 = ((s - s.mean())**2 + (t - t.mean())**2).max() level = log2(maxDiag2)/2 elif self.levelCalcMethod == 'level0': # Else just use level 0. Correct texture filtering will take care # of any aliasing... level = 0 else: raise "Invalid mipmap level calculation type: %s" % self.levelCalcMethod return max(level,0)
def bigreal2qobj(arr): """Convert big real vector into corresponding qutip object.""" if arr.ndim == 1 or arr.shape[0] != arr.shape[1]: arr = bigreal2complex(arr) num_qubits = scipy.log2(arr.shape[0]).astype(int) return qutip.Qobj(arr, dims=[[2] * num_qubits, [1] * num_qubits]) elif arr.shape[0] == arr.shape[1]: arr = bigreal2complex(arr) num_qubits = scipy.log2(arr.shape[0]).astype(int) return qutip.Qobj(arr, dims=[[2] * num_qubits] * 2) else: raise ValueError('Not sure what to do with this here.')
def kl(p, q): """Compute the KL divergence between two discrete probability distributions The calculation is done directly using the Kullback-Leibler divergence, KL( p || q ) = sum_{x} p(x) log_2( p(x) / q(x) ) Base 2 logarithm is used, so that returned values is measured in bits. """ if (p==0.).sum()+(q==0.).sum() > 0: raise Exception, "Zero bins found" return (p*(log2(p) - log2(q))).sum()
def plot_cwt(t): s1 = plt.subplot(221) t.plot() s2 = plt.subplot(222) spec = time_avg(t) plt.plot(spec, sp.log2(t.period)) plt.ylim(sp.log2(t.period).max(), sp.log2(t.period).min()) nscales = len(t.scales) yt = sp.arange(nscales, step=int(1 / self.dscale)) plt.yticks(yt, t.scales[yt]) plt.ylim(nscales - 1, 0) s1.set_position((0.1, 0.1, 0.65, 0.8)) s2.set_position((0.8, 0.1, 0.15, 0.8))
def mutual_information(Q_xy): assert all(Q_xy.ravel() >= 0) assert len(Q_xy.shape) == 2 Gx = Q_xy.shape[0] Gy = Q_xy.shape[1] Q_xy /= Q_xy.sum() Q = Q_xy.ravel() Q_x = Q_xy.sum(1) Q_y = Q_xy.sum(0) H = -sp.sum(Q * sp.log2(Q + utils.TINY_FLOAT64)) H_x = -sp.sum(Q_x * sp.log2(Q_x + utils.TINY_FLOAT64)) H_y = -sp.sum(Q_y * sp.log2(Q_y + utils.TINY_FLOAT64)) I = H_x + H_y - H return I
def get_independent(MyList, sorting = False, sigma = 0.387): """ Calculate the tuple of (L,k) or (L,k,W) using a List written in short form, as follows: [\ [L,[k1,k2,k3,...],[W1,W2]]\ Case 1 [L,[k1,k2,k3,...],(Wmin,Wmax)]\ Case 2 ...\ ] The Windows W of Case 2 are calculated as powers of 2, from Wmin to Wmax included Output: independentNames (as "L,k", or "L,k,W") independentValues """ out = {} numIndependentNames = len(MyList[0]) independentNames = "L, k" if numIndependentNames == 3: independentNames = independentNames + ", W" # for line in MyList: if numIndependentNames == 2: L, ks = line elif numIndependentNames == 3: L, ks, Ws = line if isinstance(Ws,int): Ws = [Ws] elif isinstance(Ws,tuple): lower_e, upper_e = scipy.log2(Ws[0]), scipy.log2(Ws[1]) e2 = scipy.array(range(lower_e, upper_e+1)) Ws = 2**e2 if not isinstance(ks,list): ks = [ks] for k in ks: if numIndependentNames == 2: wincorr = 1.0*k/L out[wincorr] = L, k elif numIndependentNames == 3: for W in Ws: wincorr = 1.0*W*(1.0*k/L)**sigma out[wincorr] = L, k, W if sorting: return independentNames, map(out.get,sorted(out)) else: return independentNames, out.values()
def _get_freq_stuff(x, params, timeDim=2, verbose=None): ''' internal function, not really meant to be called/viewed by the end user (unless end user is curious). computes nfft based on x.shape. ''' badNfft = False if 'nfft' in params: if params['nfft'] < x.shape[timeDim]: badNfft = True logger.warn( 'nfft should be >= than number of time points. Reverting' + 'to default setting of nfft = 2**ceil(log2(nTimePts))\n') if 'nfft' not in params or badNfft: nfft = int(2.0**ceil(sci.log2(x.shape[timeDim]))) else: nfft = int(params['nfft']) f = (np.arange(0.0, nfft, 1.0) * params['Fs'] / nfft) fInd = ((f >= params['fpass'][0]) & (f <= params['fpass'][1])) f = f[fInd] return (nfft, f, fInd)
def Calculate_entropy(prediction): entropy = 0 for key in prediction: p = prediction[key] entropy -= p * scipy.log2(p) return entropy
def negativity(rho, subsys, method='tracenorm', logarithmic=False): """ Compute the negativity for a multipartite quantum system described by the density matrix rho. The subsys argument is an index that indicates which system to compute the negativity for. .. note:: Experimental. """ mask = [idx == subsys for idx, n in enumerate(rho.dims[0])] rho_pt = partial_transpose(rho, mask) if method == 'tracenorm': N = ((rho_pt.dag() * rho_pt).sqrtm().tr().real - 1)/2.0 elif method == 'eigenvalues': l = rho_pt.eigenenergies() N = ((abs(l)-l)/2).sum() else: raise ValueError("Unknown method %s" % method) if logarithmic: return log2(2 * N + 1) else: return N
def coeficientes(self, g=0): u""" Genera la lista de deltas a usar, por defecto genera potencias de 2 :param g: (opcional) indica o bien una lista explicita de deltas a aplicar al objecto, o un nivel de granularidad (por defecto es 0), el cual determina exponencialmente cuantos numeros impares se usan multiplicados por potencias de 2 g = 0 -> [1] * 2^n -> [1,2,4,8,16..] g = 1 -> [1,3,5] * 2^n -> [1,2,3,4,5,6,8,10..] g = 2 -> [1,3,5,7,9,11,13] -> [1,2,3,..,11,12,13,14,16,18,20..] .. note:: no es necesario llamar a esta función directamente """ if type(g) == list: self.coefs = self.g = g return g factors = [ 2 * i + 1 for i in range(2 ** g + 1) ] pots = lambda e: [e * (2 ** i) for i in range( int(log2(self.min/e)) )] sides = reduce(lambda x, y: x + pots(y), factors, []) sides.sort() self.coefs = sides self.g = g self.stage = Box.coef return sides
def _get_freq_stuff(x, params, timeDim=2, verbose=None): ''' internal function, not really meant to be called/viewed by the end user (unless end user is curious). computes nfft based on x.shape. ''' badNfft = False if 'nfft' in params: if params['nfft'] < x.shape[timeDim]: badNfft = True logger.warn( 'nfft should be >= than number of time points. Reverting' + 'to default setting of nfft = 2**ceil(log2(nTimePts))\n') if 'nfft' not in params or badNfft: nfft = int(2.0 ** ceil(sci.log2(x.shape[timeDim]))) else: nfft = int(params['nfft']) f = (np.arange(0.0, nfft, 1.0) * params['Fs'] / nfft) fInd = ((f >= params['fpass'][0]) & (f <= params['fpass'][1])) f = f[fInd] return (nfft, f, fInd)
def ppmi(vectors): """Compute PPMI vectors from count vectors. """ # Do not modify rowsum = scipy.sum( vectors, axis=0) # sum each column across rows (count of context c) # remove all-zero columns nonzerocols = rowsum > 0 rowsum = rowsum[nonzerocols] vectors = vectors[:, nonzerocols] colsum = scipy.sum(vectors, axis=1) # sum each row across columns (count of word w) allsum = scipy.sum(rowsum) # sum all values in matrix # get p(x, y)/(p(x)*p(y)) vectors /= colsum[:, scipy.newaxis] # count_ij/count_i* vectors /= rowsum # count_ij/(count_i* * count_j*) vectors *= allsum # prob_ij/(prob_i* * prob_j*) # get log, floored at 0 vectors = scipy.log2( vectors) # will give runtime warning for log(0); ignore vectors[vectors < 0] = 0 # get indices where value<0 and set them to 0 return vectors
def entropy(self, n, ret_schmidt_sq=False): """Returns the von Neumann entropy of part of the system under a left-right split. The chain can be split into two parts between any two sites. This function returns the corresponding von Neumann entropy, which is a measure of the entanglement between the two parts. The parameter n specifies that the splitting should be done between sites n and n + 1. Parameters ---------- k : int Site offset for split. ret_schmidt_sq : bool Whether to also return the squared Schmidt coefficients. Returns ------- S : float The half-chain entropy. lam : sequence of float (if ret_schmidt_sq==True) The squared Schmidt coefficients. """ lam = self.schmidt_sq(n) S = -sp.sum(lam * sp.log2(lam)).real if ret_schmidt_sq: return S, lam else: return S
def find_cluster(lost_node: Tree, intra_distances: list = []): """ Recursively calculates whether a node in a tree is the root of a cluster, where a cluster is defined as a sub-tree (clade) whose members satisfy the condition: the distance to the parent of the current subtree's root multiplied by the logarithm base 2 of the number of cousins is greater than the mean(intra-cluster root-to-tip distances). Adding a large number of members to the clade is penalized, as well as large distances from the existing subtree to a new parent. :param lost_node: A node within a tree, for which we want to orient :param intra_distances: A list with the current set of leaf-tip distances :return: Tree node, a list of float distances """ parent = lost_node.up if lost_node.is_root() or parent.is_root(): return lost_node, intra_distances if not intra_distances: # If this is the initial attempt at finding lost_node's cluster, find the intra-cluster leaf distances intra_distances = get_tip_distances(lost_node) # Penalty for increasing the size of the clade is log-base 2 cousins = lost_node.get_sisters()[0].get_leaf_names() parent_dist = parent.get_distance(lost_node) cost = parent_dist * log2(len(cousins) + 1) if mean(intra_distances) > cost: return lost_node, intra_distances # Add the distance from the parent to the intra_distances = [dist + parent_dist for dist in intra_distances] for cousin in cousins: intra_distances.append(parent.get_distance(cousin)) return find_cluster(parent, intra_distances)
def integrator_solve(df): cum_vec = np.array(np.cumsum(df['ct'])) binheaders = utils.get_column_headers(df) n_bins = 1000 n_batches = len(binheaders) f_binned = sp.zeros((n_batches,n_bins)) bins = np.linspace(cum_vec[-1]/1000-1,cum_vec[-1]-1,1000,dtype=int) for i in range(n_bins): for j in range(n_batches): batch_name = binheaders[j] f_binned[j,i] = scipy.integrate.quad(integrand_1,bins[i],bins[i+1])[0] f_reg = scipy.ndimage.gaussian_filter1d(f_binned,0.04*n_bins,axis=0) f_reg = f_reg/f_reg.sum() # compute marginal probabilities p_b = sp.sum(f_reg,axis=1) p_s = sp.sum(f_reg,axis=0) # finally sum to compute the MI MI = 0 for j in range(n_batches): for i in range(n_bins): if f_reg[i,j] != 0: MI = MI + f_reg[i,j]*sp.log2(f_reg[i,j]/(p_b[i]*p_s[j])) return MI
def plotHeatmap(fwrap, aclass, algoparams, trials, maxsteps): """ Visualizing performance across trials and across time (iterations in powers of 2) """ psteps = int(log2(maxsteps)) + 1 storesteps = [0] + [2**x for x in range(psteps)] ls = lossTraces(fwrap, aclass, dim=trials, maxsteps=maxsteps, storesteps=storesteps, algoparams=algoparams, minLoss=1e-10) initv = mean(ls[0]) maxgain = exp(fwrap.stochfun.maxLogGain(maxsteps) + 1) maxneggain = (sqrt(maxgain)) M = zeros((psteps, trials)) for sid in range(psteps): # skip the initial values winfactors = clip(initv / ls[sid + 1], 1. / maxneggain, maxgain) winfactors[isnan(winfactors)] = 1. / maxneggain M[sid, :] = log10(sorted(winfactors)) pylab.imshow( M.T, interpolation='nearest', cmap=cm.RdBu, #@UndefinedVariable aspect=psteps / float(trials) / 1, vmin=-log10(maxgain), vmax=log10(maxgain), ) pylab.xticks([]) pylab.yticks([]) return ls
def plotHeatmap(fwrap, aclass, algoparams, trials, maxsteps): """ Visualizing performance across trials and across time (iterations in powers of 2) """ psteps = int(log2(maxsteps)) + 1 storesteps = [0] + [2 ** x for x in range(psteps)] ls = lossTraces(fwrap, aclass, dim=trials, maxsteps=maxsteps, storesteps=storesteps, algoparams=algoparams, minLoss=1e-10) initv = mean(ls[0]) maxgain = exp(fwrap.stochfun.maxLogGain(maxsteps) + 1) maxneggain = (sqrt(maxgain)) M = zeros((psteps, trials)) for sid in range(psteps): # skip the initial values winfactors = clip(initv / ls[sid+1], 1. / maxneggain, maxgain) winfactors[isnan(winfactors)] = 1. / maxneggain M[sid, :] = log10(sorted(winfactors)) pylab.imshow(M.T, interpolation='nearest', cmap=cm.RdBu, #@UndefinedVariable aspect=psteps / float(trials) / 1, vmin= -log10(maxgain), vmax=log10(maxgain), ) pylab.xticks([]) pylab.yticks([]) return ls
def negativity(rho, subsys, method='tracenorm', logarithmic=False): """ Compute the negativity for a multipartite quantum system described by the density matrix rho. The subsys argument is an index that indicates which system to compute the negativity for. .. note:: Experimental. """ mask = [idx == subsys for idx, n in enumerate(rho.dims[0])] rho_pt = partial_transpose(rho, mask) if method == 'tracenorm': N = ((rho_pt.dag() * rho_pt).sqrtm().tr().real - 1) / 2.0 elif method == 'eigenvalues': l = rho_pt.eigenenergies() N = ((abs(l) - l) / 2).sum() else: raise ValueError("Unknown method %s" % method) if logarithmic: return log2(2 * N + 1) else: return N
def MarkovMutualInfo(transitionMatrix): p0 = MarkovSteadyState(transitionMatrix) #M = scipy.transpose(transitionMatrix) M = transitionMatrix #***8testing sum, dot, log2 = scipy.sum, scipy.dot, lambda x: scipy.nan_to_num( scipy.log2(x)) return scipy.real_if_close(sum(dot(p0, M * log2(M))) - sum(p0 * log2(p0)))
def spectral_entropy(x, framesize=1024, hopsize=512, fs=44100): """ Calculate spectral entropy Parameters: inData: ndarray input signal framesize: int framesize hopsize: int hopsize fs: int samplingrate Returns: result: ndarray spectral entropy [frame * 1] """ S, F, T = stft(x, framesize, hopsize, fs, 'hann') S = sp.absolute(S) pmf = S / S.sum(0) entropy = -(pmf * sp.log2(pmf)).sum(0) return entropy
def OrderGrupShank(kurva, P=None, coba=None): ''' Algoritma 3.... (Algoritma Shank) Masukan : Kurva eliptik E Keluaran : Order grup E(Z_p) ''' p = kurva.p if coba == None: coba = int(log2(float(p))) + 5 if P == None: P = GeneratorTitik(kurva) M = BabyStepGiantStep(kurva, P) M = OrderTitik(M, P) List_M = [] N = int(ceil(((p + 1 - 2 * sqrt(p)) / M))) * M while N <= p + 1 + 2 * sqrt(p): List_M.append(N) N = N + M counter = 0 while len(List_M) != 1 and counter < coba: Q = GeneratorTitik(kurva) for Mi in List_M: MiQ = Mi * Q if MiQ != Infty(kurva): List_M.remove(Mi) counter += 1 if counter == coba: print('Error: order not found') return 0 return List_M[0]
def spectral(Y, X, dtype=sp.float32): from sklearn.cluster import SpectralCoclustering def scale_normalize(X): " from https://github.com/scikit-learn/scikit-learn/blob/b194674c4/sklearn/cluster/_bicluster.py#L108" row_diag = sp.asarray(sp.sqrt(X.sum(axis=1))).squeeze() col_diag = sp.asarray(sp.sqrt(X.sum(axis=0))).squeeze() row_diag[row_diag == 0] = 1.0 col_diag[col_diag == 0] = 1.0 row_diag = 1.0 / row_diag col_diag = 1.0 / col_diag if smat.issparse(X): n_rows, n_cols = X.shape r = smat.dia_matrix((row_diag, [0]), shape=(n_rows, n_rows)) c = smat.dia_matrix((col_diag, [0]), shape=(n_cols, n_cols)) an = r * X * c else: an = row_diag[:, sp.newaxis] * X * col_diag return an, row_diag, col_diag coclustering = SpectralCoclustering(n_clusters=16384, random_state=1) normalized_data, row_diag, col_diag = scale_normalize(Y.T) n_sv = 1 + int(sp.ceil(sp.log2(coclustering.n_clusters))) u, v = coclustering._svd(normalized_data, n_sv, n_discard=1) label_embedding = smat.csr_matrix(u, dtype=dtype) return label_embedding
def gen(self, kdim, depth, algo, seed, max_iter=10, threads=1): assert algo in [ Indexer.KMEANS, Indexer.KDTREE, Indexer.ORDINAL, Indexer.UNIFORM, Indexer.BALANCED_ORDINAL, Indexer.KDTREE_CYCLIC, Indexer.SKMEANS ] if algo in [ Indexer.KMEANS, Indexer.KDTREE, Indexer.KDTREE_CYCLIC, Indexer.SKMEANS ]: feat_mat = self.py_feat_mat codes = sp.zeros(feat_mat.rows, dtype=sp.uint32) clib.get_codes(feat_mat, depth, algo, seed, codes, max_iter=max_iter, threads=threads) elif algo in [Indexer.ORDINAL, Indexer.UNIFORM]: rp_clf = RandomProject(self.feat_mat, kdim, depth, algo, seed) codes = rp_clf.get_codes() elif algo in [Indexer.BALANCED_ORDINAL]: assert int(2**sp.log2(kdim)) == kdim codes = self.balaced_ordinal_gen(kdim, depth, seed, threads=threads) else: raise NotImplementedError('unknown algo {}'.format(algo)) return SeC(kdim, depth, algo, seed, codes)
def KLdivergence(pList, qList, skipQzeros=False): """ In bits. skipQzeros (False) : If qList has zeros where pList doesn't, the KL divergence isn't defined (at least according to Wikipedia). Set this flag True to instead skip these values in the calculation. """ eps = 1e-5 if (abs(sum(pList) - 1.) > eps) or (abs(sum(qList) - 1.) > eps): print("KLdivergence: WARNING: Check normalization of distributions.") print("KLdivergence: sum(pList) =", sum(pList)) print("KLdivergence: sum(qList) =", sum(qList)) if len(pList) != len(qList): raise Exception("pList and qList have unequal length.") div = 0. for p, q in zip(pList, qList): if p == 0.: div += 0. elif (q == 0.) and skipQzeros: div += 0. elif (q == 0.) and not skipQzeros: return scipy.nan else: div += p * scipy.log2(p / q) return div
def hz2midi(hz): """ midi = hz2midi(hz) Converts frequency in Hertz to midi notation. """ return 12*scipy.log2(hz/440.0) + 69
def main_loop(init_param, X, K, iter=1000, tol=1e-6): """ Gaussian Mixture Model Arguments: - `X`: Input data (2D array, [[x11, x12, ..., x1D], ..., [xN1, ... xND]]). - `K`: Number of clusters. - `iter`: Number of iterations to run. - `tol`: Tolerance. """ X = sp.asarray(X) N, D = X.shape pi = sp.asarray(init_param["coff"]) mu = sp.asarray(init_param["mean"]) sigma = sp.asarray(init_param["cov"]) L = sp.inf for i in xrange(iter): # E-step gamma = sp.apply_along_axis( lambda x: sp.fromiter( (pi[k] * gauss_mixture_calculate(x, mu[k], sigma[k]) for k in xrange(K)), dtype=float ), 1, X, ) gamma /= sp.sum(gamma, 1)[:, sp.newaxis] # M-step Nk = sp.sum(gamma, 0) mu = sp.sum(X * gamma.T[..., sp.newaxis], 1) / Nk[..., sp.newaxis] xmu = X[:, sp.newaxis, :] - mu sigma = ( sp.sum(gamma[..., sp.newaxis, sp.newaxis] * xmu[:, :, sp.newaxis, :] * xmu[:, :, :, sp.newaxis], 0) / Nk[..., sp.newaxis, sp.newaxis] ) pi = Nk / N # Likelihood Lnew = sp.sum( sp.log2( sp.sum( sp.apply_along_axis( lambda x: sp.fromiter( (pi[k] * gauss_mixture_calculate(x, mu[k], sigma[k]) for k in xrange(K)), dtype=float ), 1, X, ), 1, ) ) ) if abs(L - Lnew) < tol: break L = Lnew print "log likelihood=%s" % L return dict(pi=pi, mu=mu, sigma=sigma, gamma=gamma)
def pad(series): ''' Returns a time series padded with zeros to the next-highest power of two. ''' N = len(series) next_N = 2**sp.ceil(sp.log2(N)) return sp.hstack((series, sp.zeros(next_N - N)))
def pad(series): ''' Returns a time series padded with zeros to the next-highest power of two. ''' N = len(series) next_N = 2 ** sp.ceil(sp.log2(N)) return sp.hstack((series, sp.zeros(next_N - N)))
def calculate(self, instr): vals = None if isinstance(instr,_Seq): vals = self.pssm.calculate(instr) else: vals = self.pssm.calculate(_Seq(instr,_unamb_dna)) energy = self.pssm.calculate(self.consensus) -vals return energy /_S.log2(_S.e) # biopython uses log base 2, but GEMSTAT uses log base e #TODO: Make it automatically determine what the base of biopython log is by creating a special pwm.
def nextpow2(x): """ Get next power of 2 """ result = math.ceil(sp.log2(np.abs(x))) result = int(result) return (result)
def pad(x): T = len(x) target = 2**sp.ceil(sp.log2(T)) left = int((target - T) // 2) right = int(target - T - left) padded = sp.r_[[0] * left, x, [0] * right] mask = sp.r_[[0] * left, [1] * len(x), [0] * right].astype(bool) return padded, mask
def create_rgb_LUT(n_classes): """ Create a rgb color look up table (LUT) for all classes. """ # Define rgb colors for the different classes # with (somewhat) max differences in hue between nearby classes # Number of iterations over the grouping of 2x 3 colors n_classes = max(n_classes, 1) # input check > 0 n = ((n_classes - 1) // 6) + 1 # > 0 # Create a list of offsets for the grouping of 2x 3 colors # that (somewhat) max differences in hue between nearby classes offset_list = [0] # creates pure R G B - Y C M colors d = 128 n_offset_levels = int(scipy.log2(n - 1) + 1) if n > 1 else 1 # log(0) not defined n_offset_levels = min(n_offset_levels, 4) # limit number of colors to 96 for i in range(n_offset_levels): # Create in between R G B Y C M colors # in a divide by 2 pattern per level # i=0: + 128, # i=1: + 64, 192, # i=2: + 32, 160, 96, 224, # i=3: + 16, 144, 80, 208, 48, 176, 112, 240 # abs max i=7 with + 1 ... offset_list += ([int(offset + d) for offset in offset_list]) d /= 2 # If there are more classes than colors # then the offset_list is duplicated, # which assigns the same colors to different classes # but at least to the most distance classes length = len(offset_list) if n > length: offset_list = int(1 + scipy.ceil( (n - length) / length)) * offset_list rgb_LUT = [] for i in range(n): # Calculate grouping of 2x 3 rgb colors R G B - Y C M # that (somewhat) max differences in hue between nearby classes # and makes it easy to define other in between colors # using a simple linear offset # Based on HSI to RGB calculation with I = 1 and S = 1 offset = offset_list[i] rgb_LUT.append((255, offset, 0)) # 0 <= h < 60 RED ... rgb_LUT.append((0, 255, offset)) # 120 <= h < 180 GREEN ... rgb_LUT.append((offset, 0, 255)) # 240 <= h < 300 BLUE ... rgb_LUT.append( (255 - offset, 255, 0)) # 60 <= h < 120 YELLOW ... rgb_LUT.append( (0, 255 - offset, 255)) # 180 <= h < 240 CYAN ... rgb_LUT.append( (255, 0, 255 - offset)) # 300 <= h < 360 MAGENTA ... return rgb_LUT
def entropy2(values): """Calculate the entropy of vector values. values will be flattened to a 1d ndarray.""" values = sp.asarray(values).flatten() p = sp.diff(sp.c_[0,sp.diff(sp.sort(values)).nonzero(), values.size])/float(values.size) H = (p*sp.log2(p)).sum() return -H
def _bandwidth(data, N=None, MIN=None, MAX=None): ''' An implementation of the kde bandwidth selection method outlined in: Z. I. Botev, J. F. Grotowski, and D. P. Kroese. Kernel density estimation via diffusion. The Annals of Statistics, 38(5):2916-2957, 2010. Based on the implementation in Matlab by Zdravko Botev. Daniel B. Smith, PhD https://github.com/Daniel-B-Smith/KDE-for-SciPy/blob/master/kde.py Updated 1-23-2013 ''' # Parameters to set up the mesh on which to calculate N = 2**14 if N is None else int(2**sp.ceil(sp.log2(N))) if MIN is None or MAX is None: minimum = min(data) maximum = max(data) Range = maximum - minimum MIN = minimum - Range / 10 if MIN is None else MIN MAX = maximum + Range / 10 if MAX is None else MAX # Range of the data R = MAX - MIN # Histogram the data to get a crude first approximation of the density M = len(data) DataHist, bins = sp.histogram(data, bins=N, range=(MIN, MAX)) DataHist = DataHist / M DCTData = scipy.fftpack.dct(DataHist, norm=None) I = [iN * iN for iN in range(1, N)] SqDCTData = (DCTData[1:] / 2)**2 # The fixed point calculation finds the bandwidth = t_star guess = 0.1 try: t_star = scipy.optimize.brentq(__fixed_point, 0, guess, args=(M, I, SqDCTData)) except ValueError: print('Oops!') return None # Smooth the DCTransformed data using t_star SmDCTData = DCTData * sp.exp(-sp.arange(N)**2 * sp.pi**2 * t_star / 2) # Inverse DCT to get density density = scipy.fftpack.idct(SmDCTData, norm=None) * N / R mesh = [(bins[i] + bins[i + 1]) / 2 for i in range(N)] bandwidth = sp.sqrt(t_star) * R density = density / sp.trapz(density, mesh) # return bandwidth, mesh, density return bandwidth
def entropy2(values): """Calculate the entropy of vector values. values will be flattened to a 1d ndarray.""" values = values.flatten() M = len(sp.unique(values)) p = sp.diff(sp.c_[sp.diff(sp.sort(values)).nonzero(), len(values)])/float(len(values)) H = -((p*sp.log2(p)).sum()) return H
def zero_padding(signal, squared=True): """Creates a new ndarray that """ check_dim(signal, 2) rows, cols = signal.shape pow_rows = int(np.ceil(np.log2(rows))) pow_cols = int(np.ceil(np.log2(cols))) if squared: if pow_cols > pow_rows: pow_rows = pow_cols else: pow_cols = pow_rows padded_signal = np.zeros((2 ** pow_rows, 2 ** pow_cols), dtype=signal.dtype) y_0 = np.trunc((2 ** pow_rows - rows) / 2) y_t = y_0 + signal.shape[0] x_0 = np.trunc((2 ** pow_cols - cols) / 2) x_t = x_0 + signal.shape[1] padded_signal[y_0:y_t, x_0:x_t] = signal return padded_signal
def cq_fft(sig, fs, q_rate = q_rate_def, fmin = fmin_default, fmax = fmax_default, fratio = fratio_default, win = hamming, spThresh = 0.0054): # 100 frames per 1 second nhop = int(round(0.01 * fs)) # Calculate Constant-Q Properties nfreq = get_num_freq(fmin, fmax, fratio) # number of freq bins freqs = get_freqs(fmin, nfreq, fratio) # freqs [Hz] Q = int((1. / ((2 ** fratio) - 1)) * q_rate) # Q value # Preparation L = len(sig) nframe = L / nhop # number of time frames # N > max(N_k) fftLen = int(2 ** (ceil(log2(int(float(fs * Q) / freqs[0]))))) h_fftLen = fftLen / 2 # =================== # カーネル行列の計算 # =================== sparseKernel = zeros([nfreq, fftLen], dtype = complex128) for k in xrange(nfreq): tmpKernel = zeros(fftLen, dtype = complex128) freq = freqs[k] # N_k N_k = int(float(fs * Q) / freq) # FFT窓の中心を解析部分に合わせる. startWin = (fftLen - N_k) / 2 tmpKernel[startWin : startWin + N_k] = (hamming(N_k) / N_k) * exp(two_pi_j * Q * arange(N_k, dtype = float64) / N_k) # FFT (kernel matrix) sparseKernel[k] = fft(tmpKernel) ### 十分小さい値を0にする sparseKernel[abs(sparseKernel) <= spThresh] = 0 ### スパース行列に変換する sparseKernel = csr_matrix(sparseKernel) ### 複素共役にする sparseKernel = sparseKernel.conjugate() / fftLen ### New signal (for Calculation) new_sig = zeros(len(sig) + fftLen, dtype = float64) new_sig[h_fftLen : -h_fftLen] = sig ret = zeros([nframe, nfreq], dtype = complex128) for iiter in xrange(nframe): #print iiter + 1, "/", nframe istart = iiter * nhop iend = istart + fftLen # FFT (input signal) sig_fft = fft(new_sig[istart : iend]) # 行列積 ret[iiter] = sig_fft * sparseKernel.T return ret, freqs
def entropy(p): """Compute the negative entropy of a discrete probability distribution. The calculation is done directly using the entropy definition, H(p) = sum_{x} p(x) log_2( p(x) ) Base 2 logarithm is used, so that returned values is measured in bits. """ if (p==0.).sum() > 0: raise Exception, "Zero bins found" return (p*log2(p)).sum()
def calculate_ic(pssm, background=None): '''Given a normalized PSSM, calculates the information content, given by IC(w) = log2(J) + \sum_{j=1}^{J} [p_{wj} * log2(p_{wj})] If given a background, computes IC(w) = log2(J) + \sum_{j=1}^{J} [p_{wj} * log2(p_{wj} / b_{wj})] ''' ans = (pssm * calc_log_likelihood(pssm, background)).sum() if background is None: ans += log2(len(pssm)) return ans
def cwt(series, wavelet, octaves=None, dscale=0.25, minscale=None, dt=1.0): ''' Perform a continuous wavelet transform on a series. Parameters ---------- series : ndarray octaves : int Number of powers-of-two over which to perform the transform. dscale : float Fraction of power of two separating the scales. Defaults to 0.25. minscale : float Minimum scale. If none supplied, defaults to 2.0 * dt. dt : float Time step between observations in the series. Returns ------- WaveletTransform WaveletTransform object with the results of the CWT. See Also -------- ccwt : Cross continuous wavelet transform, for the wavelet coherence between two series Notes ----- This function uses a fast Fourier Transform (FFT) to convolve the wavelet with the series at each scale. For details, see: Torrence, C. and G. P. Compo, 1998: A Practical Guide to Wavelet Analysis. <I>Bull. Amer. Meteor. Soc.</I>, 79, 61-78. ''' # Generate the array of scales if not minscale: minscale = 2.0 * dt if not octaves: octaves = int(sp.log2(len(series) * dt / minscale) / dscale) * dscale scales = minscale * 2**sp.arange(octaves + dscale, step=dscale) # Demean and pad time series with zeroes to next highest power of 2 N = len(series) series = pad(series - series.mean()) N_padded = len(series) wave = sp.zeros((len(scales), N_padded)) + complex(0, 0) series_ft = sp.fft(series) for i, s in enumerate(scales): wave[i, :] = sp.ifft(series_ft * wavelet.daughter(s, N_padded, dt)) wave = wave[:, :N] series = series[:N] return WaveletTransform(series, wave, scales, dscale, wavelet, dt)
def score(self): self.uniq_docids() for term in self.query_terms: if not self.im.token_id_map.has_key(term): log.info('term %s is not found in term_freq, skip ...', term) continue term_docs_freq = self.im.get_term_info(term).get_pos_map() ttf = self.im.get_term_info(term).get_cf() for docid in self.rank_list: tf = 0 if term_docs_freq.has_key(docid): tf = len(term_docs_freq[docid]) score_this_term = sp.log2(self.score_once(tf, docid) + self.score_twice(ttf)) self.rank_list[docid] += score_this_term
def restore_RCF_l(self): G_nm1 = None l_nm1 = self.l[0] for n in xrange(self.N + 1): if n == 0: x = l_nm1 else: x = mm.mmul(mm.H(G_nm1), l_nm1, G_nm1) M = self.eps_l(n, x) ev, EV = la.eigh(M) self.l[n] = mm.simple_diag_matrix(ev, dtype=self.typ) G_n_i = EV if n == 0: G_nm1 = mm.H(EV) #for left uniform case l_nm1 = self.l[n] #for sanity check self.u_gnd_l.r = mm.mmul(G_nm1, self.u_gnd_l.r, G_n_i) #since r is not eye for s in xrange(self.q[n]): self.A[n][s] = mm.mmul(G_nm1, self.A[n][s], G_n_i) if self.sanity_checks: l = self.eps_l(n, l_nm1) if not sp.allclose(l, self.l[n], atol=1E-12, rtol=1E-12): print "Sanity Fail in restore_RCF_l!: l_%u is bad" % n print la.norm(l - self.l[n]) G_nm1 = mm.H(EV) l_nm1 = self.l[n] if self.sanity_checks: if not sp.allclose(sp.dot(G_nm1, G_n_i), sp.eye(G_n_i.shape[0]), atol=1E-12, rtol=1E-12): print "Sanity Fail in restore_RCF_l!: Bad GT for l_%u" % n #Now G_nm1 = G_N G_nm1_i = mm.H(G_nm1) for s in xrange(self.q[self.N + 1]): self.A[self.N + 1][s] = mm.mmul(G_nm1, self.A[self.N + 1][s], G_nm1_i) ##This should not be necessary if G_N is really unitary #self.r[self.N] = mm.mmul(G_nm1, self.r[self.N], mm.H(G_nm1)) #self.r[self.N + 1] = self.r[self.N] self.u_gnd_r.l[:] = mm.mmul(mm.H(G_nm1_i), self.u_gnd_r.l, G_nm1_i) self.S_hc = sp.zeros((self.N), dtype=sp.complex128) for n in xrange(1, self.N + 1): self.S_hc[n-1] = -sp.sum(self.l[n].diag * sp.log2(self.l[n].diag))
def _restore_CF_diag(self): nc = self.N_centre self.S_hc = sp.zeros((self.N + 1), dtype=sp.complex128) #Want: r[0 <= n < nc] diagonal Ui = sp.eye(self.D[nc], dtype=self.typ) for n in xrange(nc, 0, -1): self.r[n - 1], Um1, Um1_i = tm.restore_LCF_r(self.A[n], self.r[n], Ui, sanity_checks=self.sanity_checks) self.S_hc[n - 1] = -sp.sum(self.r[n - 1].diag * sp.log2(self.r[n - 1].diag)) Ui = Um1_i #Now U is U_0 U = Um1 for s in xrange(self.q[0]): self.A[0][s] = U.dot(self.A[0][s]).dot(Ui) self.uni_l.r = U.dot(self.uni_l.r.dot(U.conj().T)) #And now: l[nc <= n <= N] diagonal Um1 = mm.eyemat(self.D[nc - 1], dtype=self.typ) for n in xrange(nc, self.N + 1): self.l[n], U, Ui = tm.restore_RCF_l(self.A[n], self.l[n - 1], Um1, sanity_checks=self.sanity_checks) self.S_hc[n] = -sp.sum(self.l[n].diag * sp.log2(self.l[n].diag)) Um1 = U #Now, Um1 = U_N Um1_i = Ui for s in xrange(self.q[0]): self.A[self.N + 1][s] = Um1.dot(self.A[self.N + 1][s]).dot(Um1_i) self.uni_r.l = Um1_i.conj().T.dot(self.uni_r.l.dot(Um1_i))
def _add_coi(self, color, data_present=None, fill=False): n = len(self.series) coi_whole = self.coi * self.dt * sp.hstack((sp.arange((n + 1) / 2), sp.flipud(sp.arange(n / 2)))) coi_list = [coi_whole] baseline = sp.ones(n) * self.period[-1] if data_present is not None: for i in range(2, len(data_present) - 1): if data_present[i - 1] and (not data_present[i]): coi_list.append(circ_shift(coi_whole, i)) baseline[i] = 0 elif not data_present[i]: baseline[i] = 0 elif (not data_present[i - 1]) and data_present[i]: coi_list.append(circ_shift(coi_whole, i)) coi_list.append(baseline) coi_line = sp.array(coi_list).min(axis=0) coi_line[coi_line == 0] = 1e-4 x = sp.hstack((self.time, sp.flipud(self.time))) y = sp.log2(sp.hstack((coi_line, sp.ones(n) * self.period[-1]))) if fill: plt.fill(x, y, color='black', alpha=0.3) else: plt.plot(self.time, sp.log2(coi_line), color=color, linestyle=':')
def restore_SCF(self): X = la.cholesky(self.r, lower=True) Y = la.cholesky(self.l, lower=False) U, sv, Vh = la.svd(Y.dot(X)) #s contains the Schmidt coefficients, lam = sv**2 self.S_hc = - np.sum(lam * sp.log2(lam)) S = m.simple_diag_matrix(sv, dtype=self.typ) Srt = S.sqrt() g = m.mmul(Srt, Vh, m.invtr(X, lower=True)) g_i = m.mmul(m.invtr(Y, lower=False), U, Srt) for s in xrange(self.q): self.A[s] = m.mmul(g, self.A[s], g_i) if self.sanity_checks: Sfull = np.asarray(S) if not np.allclose(g.dot(g_i), np.eye(self.D)): print "Sanity check failed! Restore_SCF, bad GT!" l = m.mmul(m.H(g_i), self.l, g_i) r = m.mmul(g, self.r, m.H(g)) if not np.allclose(Sfull, l): print "Sanity check failed: Restorce_SCF, left failed!" if not np.allclose(Sfull, r): print "Sanity check failed: Restorce_SCF, right failed!" l = self.eps_l(Sfull) r = self.eps_r(Sfull) if not np.allclose(Sfull, l, rtol=self.itr_rtol*self.check_fac, atol=self.itr_atol*self.check_fac): print "Sanity check failed: Restorce_SCF, left bad!" if not np.allclose(Sfull, r, rtol=self.itr_rtol*self.check_fac, atol=self.itr_atol*self.check_fac): print "Sanity check failed: Restorce_SCF, right bad!" self.l = S self.r = S
def compute_MI_origemcee(seq_matQ,seq_matR,batches,ematQ,ematR,gamma,R_0): # preliminaries n_seqs = len(batches) n_batches = int(batches.max()) + 1 # assumes zero indexed batches n_bins = 1000 #energies = sp.zeros(n_seqs) f = sp.zeros((n_batches,n_seqs)) # compute energies # for i in range(n_seqs): # energies[i] = sp.sum(seqs[:,:,i]*emat) # alternate way energies = np.zeros(n_seqs) for i in range(n_seqs): RNAP = (seq_matQ[:,:,i]*ematQ).sum() TF = (seq_matR[:,:,i]*ematR).sum() + R_0 energies[i] = -RNAP + mp.log(1 + mp.exp(-TF - gamma)) - mp.log(1 + mp.exp(-TF)) # sort energies inds = sp.argsort(energies) for i,ind in enumerate(inds): f[batches[ind],i] = 1.0/n_seqs # batches aren't zero indexed # bin and convolve with Gaussian f_binned = sp.zeros((n_batches,n_bins)) for i in range(n_batches): f_binned[i,:] = sp.histogram(f[i,:].nonzero()[0],bins=n_bins,range=(0,n_seqs))[0] #f_binned = f_binned/f_binned.sum() f_reg = sp.ndimage.gaussian_filter1d(f_binned,0.04*n_bins,axis=1) f_reg = f_reg/f_reg.sum() # compute marginal probabilities p_b = sp.sum(f_reg,axis=1) p_s = sp.sum(f_reg,axis=0) # finally sum to compute the MI MI = 0 for i in range(n_batches): for j in range(n_bins): if f_reg[i,j] != 0: MI = MI + f_reg[i,j]*sp.log2(f_reg[i,j]/(p_b[i]*p_s[j])) print MI return MI,f_reg