def condition_distribution(year, bridge_db, pmatrix): cs_dist = [] cs = np.arange(8,0,-1) pmatrix = pmatrix.item() if int(year) % 2 !=0: print 'illegal year of interest, must be even number' else: indx = int(year)/2 for (name, lat, long, length, width, deck_cs0, super_cs0, sub_cs0, detour, onlink) in bridge_db: # create reliability index distribution of deck deck_cs0_array = (cs==deck_cs0).astype('float') deck_pk = np.dot(np.linalg.matrix_power(pmatrix['deck'].T,indx),deck_cs0_array) deck_cs_dist = stats.rv_discrete(name='deck_cs_dist', values=(cs,deck_pk)) # create super super_cs0_array = (cs==super_cs0).astype('float') super_pk = np.dot(np.linalg.matrix_power(pmatrix['super'].T,indx),super_cs0_array) super_cs_dist = stats.rv_discrete(name='super_cs_dist', values=(cs,super_pk)) # create sub sub_cs0_array = (cs==sub_cs0).astype('float') sub_pk = np.dot(np.linalg.matrix_power(pmatrix['sub'].T,indx),sub_cs0_array) sub_cs_dist = stats.rv_discrete(name='sub_cs_dist', values=(cs,sub_pk)) cs_dist.append( (name, deck_cs_dist, super_cs_dist, sub_cs_dist) ) return cs_dist
def processDomain(self, X, Y): for i in range(len(X)): self.processLine(X[i],Y[i]==1) self.sentWordPercentageInPos = self.sentWordPercentageInPos / self.totalPosInstances self.sentWordPercentageInNeg = self.sentWordPercentageInNeg / self.totalNegInstances #build word distribution for positive words for word in self.totalPosFreq : self.totalPosFreq[word] = self.totalPosFreq[word] / self.totalPosWordsInDomain self.posTokenizer = dict(zip(list(range(len(self.totalPosFreq.keys()))), list(self.totalPosFreq.keys()))) self.posDist = stats.rv_discrete(name='positiveDist', values=(list(range(len(self.totalPosFreq.keys()))), list(self.totalPosFreq.values()))) #build word distribution for negative words for word in self.totalNegFreq : self.totalNegFreq[word] = self.totalNegFreq[word] / self.totalNegWordsInDomain self.negTokenizer = dict(zip(list(range(len(self.totalNegFreq.keys()))), list(self.totalNegFreq.keys()))) self.negDist = stats.rv_discrete(name='negativeDist', values=(list(range(len(self.totalNegFreq.keys()))), list(self.totalNegFreq.values()))) #build word distribution for objective words for word in self.totalObjFreq : self.totalObjFreq[word] = self.totalObjFreq[word] / self.totalObjWordsInDomain self.objTokenizer = dict(zip(list(range(len(self.totalObjFreq.keys()))), list(self.totalObjFreq.keys()))) self.objDist = stats.rv_discrete(name='objectiveDist', values=(list(range(len(self.totalObjFreq.keys()))), list(self.totalObjFreq.values()))) #build positive line length distribution for length in self.posLineLengthDict: self.posLineLengthDict[length] = self.posLineLengthDict[length] / self.totalPosInstances self.posLineLengthDist = stats.rv_discrete(name='posLineLengthDist', values=(list(self.posLineLengthDict.keys()), list(self.posLineLengthDict.values()))) #build negative line length distribution for length in self.negLineLengthDict: self.negLineLengthDict[length] = self.negLineLengthDict[length] / self.totalNegInstances self.negLineLengthDist = stats.rv_discrete(name='negLineLengthDist', values=(list(self.negLineLengthDict.keys()), list(self.negLineLengthDict.values())))
def MakeData(number_of_students, actref_dist,senint_dist,visver_dist,seqglo_dist): # Range of possible values. # -1 = active/sensing/visual/global # +1 = reflective/intuitive/verbal/sequential xk = (-1,1) # Create random number generators that spit out numbers # according to their pre-set distributions. actref_custm = stats.rv_discrete(name = 'actref_custm', values=(xk,actref_dist)) senint_custm = stats.rv_discrete(name = 'senint_custm', values=(xk,senint_dist)) visver_custm = stats.rv_discrete(name = 'visver_custm', values=(xk,visver_dist)) seqglo_custm = stats.rv_discrete(name = 'seqglo_custm', values=(xk,seqglo_dist)) for i in range(number_of_students): #generate a fake name name = ''.join(random.choice(string.letters) for h in range(14)) #generate a learning styles profile act_ref = actref_custm.rvs() sen_int = senint_custm.rvs() vis_ver = visver_custm.rvs() seq_glo = seqglo_custm.rvs() # Uncomment for debug code print name, act_ref, sen_int, vis_ver, seq_glo # Write to file w.writerow([name, act_ref, sen_int, vis_ver, seq_glo])
def __init__(self, state, record): self.consequences = [['Test: CT scan'], ['Plan: Chemo', 'Plan: Surgery']] self.t_distributions = [[st.norm(loc=7, scale=2)], [st.rv_discrete(values=([0], [1])), st.rv_discrete(values=([0], [1]))]] self.c_distribution = [st.rv_discrete(values=([0], [1])), st.rv_discrete(values=([0, 1], [0.5, 0.5]))]
def RandomizedFictitiousPlay(A, Epsilon): n = len(A[0]) m = len(A) X = numpy.matrix(numpy.zeros((m, 1), dtype=int)) Y = numpy.matrix(numpy.zeros((n, 1), dtype=int)) X[0] = 1 Y[0] = 1 numpy.random.shuffle(X) numpy.random.shuffle(Y) t = int(round(6*math.log(2*n*m)/pow(Epsilon, 2))) for i in range(t): Ax = numpy.array(numpy.transpose(A) * X).tolist() #print Ax Ay = numpy.array(A * Y).tolist() #print Ay values = Ay probabilities = [] for item in Ay: probabilities.append(pow(math.e, Epsilon*item[0]/2)) while True: try: theprobabilities = [] temp = sum(probabilities) theprobabilities[:] = [x / temp for x in probabilities] distrib = stats.rv_discrete(values=(values, theprobabilities)) xchoice = Ay.index(distrib.rvs(size=1)[0]) break except: pass values = Ax probabilities = [] for item in Ax: probabilities.append(pow(math.e, -Epsilon*item[0]/2)) while True: try: theprobabilities = [] temp = sum(probabilities) theprobabilities[:] = [x / temp for x in probabilities] distrib = stats.rv_discrete(values=(values, theprobabilities)) ychoice = Ax.index(distrib.rvs(size=1)[0]) break except: pass #print xchoice X[xchoice] += 1 #print X #print ychoice Y[ychoice] += 1 #print Y return X/float(t+1), Y/float(t+1)
def prop(q): """ Returns a dictionary with keys tau_1 and tau_2 and values discrete RV with probabilities determined by $q$ as in the matrix above. """ proposal = {} proposal[tau.xk[0]] = rv_discrete(values = [(tau.xk[0], tau.xk[1]), (q, 1-q)]) proposal[tau.xk[1]] = rv_discrete(values = [(tau.xk[0], tau.xk[1]), (1-q, q)]) return proposal
def test_entropy(self): # Basic tests of entropy. pvals = np.array([0.25, 0.45, 0.3]) p = stats.rv_discrete(values=([0, 1, 2], pvals)) expected_h = -sum(xlogy(pvals, pvals)) h = p.entropy() assert_allclose(h, expected_h) p = stats.rv_discrete(values=([0, 1, 2], [1.0, 0, 0])) h = p.entropy() assert_equal(h, 0.0)
def perform(self, action): # get distribution about outcomes probabilities = self.belief[action] / np.sum(self.belief[action]) distrib = rv_discrete(values=(range(len(probabilities)), probabilities)) # draw sample sample = distrib.rvs() # update belief accordingly belief = copy(self.belief) belief[action][sample] += 1 # manual found if (self.pos == self.world.manual).all(): print("m", end="") belief = {ToyWorldAction(np.array([0, 1])): [50, 1, 1, 1], ToyWorldAction(np.array([0, -1])): [1, 50, 1, 1], ToyWorldAction(np.array([1, 0])): [1, 1, 50, 1], ToyWorldAction(np.array([-1, 0])): [1, 1, 1, 50]} # build next state pos = self._correct_position(self.pos + self.actions[sample].action) return ToyWorldState(pos, self.world, belief)
def discrete_sample(values, probabilities, ns=1): distrib = rv_discrete(values=(range(len(values)), probabilities)) indices = distrib.rvs(size=ns) if ns == 1: return map(values.__getitem__, indices)[0] else: return map(values.__getitem__, indices)
def SequenceDynSelf(protocell,mu,L,N): q = (1-mu)**L total=np.sum(protocell) global test while (total != 2*N): "Pick the sequence type" sec_freq=protocell/total values=np.arange(len(protocell)) custm = sps.rv_discrete(name='custm', values=(values, sec_freq)) R = custm.rvs(size=1) R=R.tolist() R=int(R[0]) sample=R test = nprandom.binomial(1,q) if sample == 0: protocell[0]=protocell[0]+1 elif test == 1: protocell[sample]=protocell[sample]+1 else: protocell[0]=protocell[0]+1 total=np.sum(protocell) return protocell
def __init__(self, len_dist, dataset, max_width=None, max_height=None, rotate=False, resize=False, crazy=False, max_dist=None): """ len_dist: a dict containing the distribution of length. max_dist: maximum distance between digits, to make them closer to each other """ lens = len_dist.keys() self.max_len = max(lens) probs = [len_dist[k] for k in lens] self.do_rotate = rotate self.do_resize = resize self.crazy = crazy self.max_dist = max_dist self.len_rvg = stats.rv_discrete(values=(lens, probs)) # merge train/valid/test self.dataset = dataset shape = self.dataset[0][0][0].shape self.orig_image_shape = int(np.sqrt(shape[0])) assert self.orig_image_shape ** 2 == int(shape[0]) if max_width is None: max_width = self.orig_image_shape * self.max_len if max_height is None: max_height = self.orig_image_shape self.img_size = (max_height, max_width) print "Original dataset size: {0}, {1}, {2}".format(len(dataset[0][0]), len(dataset[1][0]), len(dataset[2][0])) print "Image size: {0}".format(self.img_size)
def plot_boxplot(karr,xdistr,ydistr,name,color,ax=None,dx=.1): if ax==None: ax = plt.gca() minarr = []; maxarr = []; meanarr = [] leftarr = []; q1arr = []; medarr = []; q3arr = []; rightarr = [] for k in karr: ik = k-1 distr = stats.rv_discrete(name=name,values=(xdistr,ydistr[ik,:])) minarr.append(distr.ppf(0)) leftarr.append(distr.ppf(.05)) q1arr.append(distr.ppf(.25)) meanarr.append(distr.mean()) medarr.append(distr.median()) q3arr.append(distr.ppf(.75)) rightarr.append(distr.ppf(.95)) maxarr.append(distr.ppf(1)) minarr = np.array(minarr) leftarr = np.array(leftarr) q1arr = np.array(q1arr) meanarr = np.array(meanarr) medarr = np.array(medarr) q3arr = np.array(q3arr) rightarr= np.array(rightarr) maxarr = np.array(maxarr) ax.plot(karr,leftarr,linestyle='--',color=color) ax.plot(karr+dx,meanarr,marker='s',linestyle='None',color=color) ax.errorbar(karr,medarr,yerr=[medarr-q1arr,q3arr-medarr], marker='o',ls='-',lw=1.8,label=name,color=color) ax.plot(karr,rightarr,linestyle='--',color=color) return ax
def profileRandomlyGeneratedKmer(k, sequence, profile): bases = {'A': 0, 'C': 1, 'G': 2, 'T': 3} kmerProbabilities = [] # loop through all kmers in sequence for index in range(len(sequence) - k): kmerP = 1 # compute probability of kmer starting at sequence[index] for kmerIndex in range(k): baseIndex = bases[sequence[index + kmerIndex]] kmerP *= profile[baseIndex][kmerIndex] kmerProbabilities.append(kmerP) sumP = sum(kmerProbabilities) # normalize to total probability of 1 for i in range(len(kmerProbabilities)): kmerProbabilities[i] /= sumP # generate probability distribution based on kmer xk = np.arange(len(kmerProbabilities)) custm = stats.rv_discrete(name='custm', values=(xk, kmerProbabilities)) randKmerIndex = custm.rvs() return sequence[randKmerIndex:randKmerIndex + k]
def add_vertex(self): # Step 1. Choose cluster clusters = sorted(self.clusters.keys()) new_cluster = len(clusters) + 1 if self.cluster_attachment == "preferential": # probability of new cluster is 1 / (n_vertices + 1) norm = float(sum(self.clusters.values()) + 1) p_new = 1.0 / norm probas = [self.clusters[cl] / norm for cl in clusters] elif self.cluster_attachment == "uniform": # probability of new cluster is 1 / (n_clusters + 1) norm = float(len(self.clusters.keys()) + 1) p_new = 1.0 / norm probas = [1.0 / norm for cl in clusters] # determine if new cluster appears if np.random.random() < p_new: self.clusters[new_cluster] = 1 # generate new vertex # coord = self._gen_1d_vertex(self.bbox[0], self.bbox[1]) coord = np.random.choice(self.bbox) # select one boundary new_vertex = ((coord,), new_cluster) else: distr = rv_discrete(values=(clusters, probas)) cl = distr.rvs(size=1)[0] self.clusters[cl] += 1 # generate vertex from existing cluster cluster_vertices = np.array([v[0][0] for v in self.graph.nodes() if v[1] == cl]) pivot = np.random.choice(cluster_vertices) coord = self._gen_1d_vertex(pivot - 0.5, pivot + 0.5) new_vertex = ((coord,), cl) old_vertices = self.graph.nodes() self.graph.add_node(new_vertex) for v in old_vertices: if abs(v[0][0] - new_vertex[0][0]) < 0.5: self.graph.add_edge(v, new_vertex)
def run_episode(policy, num_episodes, max_time, transitions, rewards, time_based=False, repeating=True): """Runs NUM_EPISODES episodes using the policy given and returns the results.""" random.seed() deviation_prob = 0.05 lives = np.zeros([num_episodes, max_time, 4]) # lives = np.zeros(max_time, 4 * num_episodes) num_actions = rewards.shape[0] num_states = rewards.shape[1] for n in range(num_episodes): # or start at random: random.randrange(num_states) state = 0 life = np.zeros([max_time, 4]) for t in range(max_time): action = policy[t, state] deviated = random.random() < deviation_prob if deviated: # perform any other action action = random.choice([x for x in range(num_actions) if x != action]) life[t, 0] = rewards[action, state] life[t, 1] = state life[t, 2] = int(deviated) life[t, 3] = action # rv for next state with distribution from transition probabilites if time_based: rv_vals = (range(num_states), transitions[t, action, state, :]) else: rv_vals = (range(num_states), transitions[action, state, :]) next_rv = stats.rv_discrete(name="next_state", values=rv_vals) state = next_rv.rvs() lives[n] = life # lives[:, (n*4):((n+1)*4)] = life return lives
def softmax_rv(masses, values=None, z=0.05): """ Returns a discrete random variable based on the softmax function. """ nums = np.exp(np.array(masses) / z) dist = nums / np.sum(nums) if values is None: values = np.arange(len(masses)) return stats.rv_discrete(name='softmax', values=(values, dist))
def create_concordance_probability(d): word_list = list(d.keys()) freq_list = list(d.values()) word_order_list = [x for x in range(len(word_list))] probs = stats.rv_discrete(a=0, values=(word_order_list, freq_list)) return CacheResult(word_list, probs)
def __init__(self, state, record): stage = np.random.choice([1, 2, 3, 4], p=[1/100, 6/100, 14/100, 79/100]) self.consequences = [['Test: Endoscopy']] self.t_distributions = [[st.norm(loc=14, scale=3)]] self.c_distribution = [st.rv_discrete(values=([0], [1]))] state.add(('C16', stage))
def sample_child(self, model): values = [i for i in xrange(len(self.children))] probabilities = [model.get_probability(self, self.children[i]) for i in xrange(len(self.children))] distrib = rv_discrete(values=(range(len(values)), probabilities)) d = distrib.rvs(size=1)[0] return self.children[d]
def get_empirical_pmf(values, probabilities, name="Empirical PMF"): """Return custom Scipy.stats discrete PMF. *Arguments* ``values`` [Integer] The random variable represented by the emperical pmf can assume the values in this Python list of integers. ``probabilities`` [float] This Python list of positive floats should be the same length as ``values.`` The floats should be between zero and one. Each float is the probability that the corresponding (i.e., has same index) item in the ``values`` list will occur. ``name`` String, optional Assign ``name`` to the emperical_pmf. Default value is 'Empirical PMF`. """ return stats.rv_discrete(values=(values, probabilities), name=name)
def getCDF(self, points=None): # Approssimation of PDF integral (not into the original version of custom class) # given a set of points: what is the cdf, staring from the PDF of data? #------------------------------------ # version 1: #--------------------------------------------------------------------- #x = sorted(points) # points can be different from self.data # #y = self.getPDF(x) # pdf function associated with self.data and points # #c = [] # list for future CDF array #c.append( 0.) # initialization # #for i in range(1,len(points)): # c.append((y[i-1]+y[i] )*.5*(x[i]-x[i-1])+c[i-1] ) #for i in range(1,len(points)): # c[i] = c[i]/c[len(points)-1] #return c #-------------------------------------------------------------------- # version 2 points = np.matrix(points) y = self.getPDF(self.data) summ = np.sum(y) p = np.array(y/summ) custom = stats.rv_discrete(name='custom', values=(self.data, p)) return custom.cdf(points)
def getiCDF(self, xx): """ A custom inverse cumulative distribution function. :param Custom self: An instance of Custom class. :param array xx: An array of points in which the inverse cumulative density function needs to be evaluated. :return: Inverse cumulative density function values of the Custom distribution. """ #x = self.data #y = self.getPDF(x) #c = [] #yy = [] #c.append(0.0) #for i in range(1, len(x)): # c.append(c[i-1]+(x[i]-x[i-1])*(y[i]+y[i-1])*.5) #for i in range(1, len(x)): # c[i]=c[i]/c[len(x)-1] #for k in range(0, len(x)): # for i in range(0, len(x)): # if ((xx[k]>=c[i]) and (xx[k]<=c[i+1])): # value = float((xx[k]-c[i])/(c[i+1]-c[i])*(x[i+1]-x[i])+x[i]) # yy.append(value) # break #return yy xx = np.matrix(xx) y = self.getPDF(self.data) summ = np.sum(y) p = np.array(y/summ) custom = stats.rv_discrete(name='custom', values=(self.data, p)) return custom.ppf(xx)
def get_next_chord_conditional(self, chord, note): """ Given a chord, draws a random next chord according to its probability distribution conditioned on the proceeding note. note here is a note """ chords = self.key.c_chord_given_c_note(note[:-1]) idxs = [] for j in range(len(chords)): for i in range(len(self.key.chords)): if chords[j] == self.key.chords[i]: idxs.append(i) idxs = list(set(idxs)) chord = self.specified_chord_to_chord(chord) cond_dist_dict = self.chord_chord_map[chord] # Gets list of candidate chords next_chords = sorted(cond_dist_dict.keys()) next_probs = [cond_dist_dict[next_chord] for next_chord in next_chords] true_next_chords = [next_chords[int(i)] for i in idxs] true_next_probs = [next_probs[int(i)] for i in idxs] allsum = sum(true_next_probs) for i in range(len(true_next_probs)): true_next_probs[i] = true_next_probs[i] / allsum cond_dist = stats.rv_discrete(name='cond_dist', values=(true_next_chords, true_next_probs)) return self.key.chords[cond_dist.rvs(size=1)-1]
def simplify3(nk): result=[] nk=np.array(nk) xk = nk/float(np.sum(nk)) #print nk #X_plot = np.linspace(0, len(nk), 1000)[:, np.newaxis] sdiv=1000 X_plot = np.linspace(0, len(xk), sdiv)[:, np.newaxis] custm = stats.rv_discrete(name='custm',a=0,b=7, values=(range(len(xk)), xk)) yk= custm.rvs(size=100000) #yk.flatten() #fig, ax = plt.subplots(1, 1) #ax.hist(yk, normed=True, histtype='stepfilled', alpha=0.2) # gaussian KDE X=yk.reshape(-1, 1) kde = KernelDensity(kernel='gaussian', bandwidth=0.6).fit(X) log_dens = kde.score_samples(X_plot) mi, ma = argrelextrema(log_dens, np.less)[0], argrelextrema(log_dens, np.greater)[0] mi=np.rint(mi*float(len(xk))/float(sdiv)) ma=np.rint(ma*float(len(xk))/float(sdiv)) start=0 #print mi for i in mi: i=int(i) if start!=i: val=np.average(nk[start:i]) for j in xrange(start,i): result.append(val) start=i val=np.average(nk[start:]) for j in xrange(start,len(nk)): result.append(val) return np.array(result)
def main(): fig = plt.figure(figsize=(3.2, 1.8)) ax = fig.add_axes([0, 0, 1, 1]) signal = create_gammapy_skymap().data background = np.ones(signal.shape) background /= background.sum() data = (1 * signal + background) / 2. # setup counts generator pdf = data.copy().flatten() x = np.arange(pdf.size) counts_generator = rv_discrete(name='counts', values=(x, pdf)) counts = np.zeros_like(data) image = ax.imshow(counts, cmap='afmhot', origin='lower', vmin=0, vmax=9, interpolation='None') bins = np.arange(counts.size + 1) - 0.5 anim = FuncAnimation(fig, animate, fargs=[image, counts, bins, counts_generator], frames=200, interval=50) filename = 'gammapy_logo.gif' anim.save(filename, writer='imagemagick')
def getRandomGenerator(freqs): s = sum(freqs) l = len(freqs) probs = [float(freqs[i])/float(s) for i in range(l)] quals = np.arange(l) dist = stats.rv_discrete(name='custm', values=(quals, probs)) return dist
def simulate(self, N, start=None, stop=None, dt=1): """ generates a realization of the Hidden Markov Model :param N: int trajectory length in steps of the lag time :param start: int (default=None) - starting hidden state. If not given, will sample from the stationary distribution of the hidden transition matrix :param stop: int or int-array-like (default=None) - stopping hidden set. If given, the trajectory will be stopped before N steps once a hidden state of the stop set is reached :param dt: int - trajectory will be saved every dt time steps. Internally, the dt'th power of P is taken to ensure a more efficient simulation :return: ndarray, ndarray - tuple of (hidden state trajectory with length N/dt, observable state discrete trajectory with length N/dt) """ from scipy import stats import msmtools.generation as msmgen # generate output distributions output_distributions = [stats.rv_discrete(values=(np.arange(self.pobs.shape[1]), pobs_i)) for pobs_i in self.pobs] # sample hidden trajectory htraj = msmgen.generate_traj(self.transition_matrix, N, start=start, stop=stop, dt=dt) otraj = np.zeros(htraj.size, dtype=int) # for each time step, sample microstate for t, h in enumerate(htraj): otraj[t] = output_distributions[h].rvs() # current cluster return htraj, otraj
def non_uniform_approx(A, B, S, R): """Creates non-uniformly approximate matrices of A and B, C and R.""" # Pick rows from A and corresponding column from B uniformly random n = A.shape[1] s = S.shape[1] rows = np.arange(0, n) # The list of rows probs = np.zeros(n) # The probability of each column # Calculate the probability of selecting each column based on the amount of # information using the method proposed by Drineas and Kannan. The probability # is based on the product of the row and column euclidean norms divided by # the cumulative sum of the product of euclidean norms for all rows and columns D = 0.0 for i in range(0, n): prod = np.sqrt((A[i, :]*A[i, :]).sum()) * np.sqrt((B[:, i]*B[:, i]).sum()) D += prod probs[i] = prod / D # Use the probabilities to pick the rows and columns non-uniformly distrib = rv_discrete(values=(rows, probs)) for t, i_t in enumerate(distrib.rvs(size=s)): S[:, t] = A[i_t, :] R[t, :] = B[:, i_t] # Apply scaling scaling = np.sqrt(s * probs[i_t]) S[:, t] /= scaling R[t, :] /= scaling
def __call__(self, *args, **kwargs): frozen = self.underlying(*args, **kwargs) return stats.rv_discrete( a = 0, b = self.max_value, values = (np.arange(self.max_value + 1), truncated_pmf(frozen, self.max_value + 1)) )
def getadm(psrcatdm, iseed, nbins, n): """Function to randomly select a dm value from the known pulsar dms in the catalogue. Creates a distribution of the dm values given their probabilities, and randomly select a dm value to use for scattering. Args: ----- psrcatdmfile : a file containing psrcat dm values in 1 column (nan values replaced with zeros) iseed : seed for the random number generator [int]. nbins : number of bins. n : size of the samples to draw Returns: -------- rand_dm : randomly selected dm value (pc cm^-3). """ dm_file_name = str(psrcatdm) dm_file = np.loadtxt(dm_file_name) # Load the txt file containing the DM dm_dat = dm_file[np.where(dm_file > 0)] # Exclude the zero dms used to replace null values from psrcat hist, bin_edges = np.histogram(dm_dat, bins=nbins) # creates a histogram distribution probs = hist/float(len(dm_dat)) # Compute probabilities dm_range = np.linspace(np.min(dm_dat), np.max(dm_dat), endpoint=True, num=len(probs)) normdiscrete = stats.rv_discrete(values=(dm_range, probs), seed=iseed) # Find an arbitrary distribution rand_dm = normdiscrete.rvs(size=n) # draw a sample of size n return rand_dm
def measure_cheat(self): #Measure but ignore 0 state, for debugging shors data = self.array.toarray()[0] pos = np.arange(len(data)) probs = np.abs(np.square(data)) probs[0] = 0 #If probs is not normalised (usually due to rounding errors), re-normalise probs = probs / np.sum(probs) #print(probs) dist = stats.rv_discrete(values=(pos, probs)) self.array = np.zeros(data.shape) self.array[dist.rvs()] = 1 self.array = sp.bsr_matrix(self.array)
def generator(self): # construct a generator using the PDF if self.my_generator is None: N = len(self.dz) xk = np.arange(N) pk = self.dz pk = pk / pk.sum() self.my_generator = stats.rv_discrete(name='zdist', values=(xk, pk)) return self.my_generator
def _make_gp_list(self, mean, samples): """ Generating a list of numbers of pixels discharged based on the total mean discharges using the generalized poisson function. """ width = np.sqrt(mean) k_min = max([min([0, mean - 3 * width]), 0]) k_max = mean + 3 * width + 10 k_arr = np.arange(k_min, k_max) gp_prob = _general_poisson(k_arr, mean, self.lamb) gp_prob = gp_prob / np.sum(gp_prob) ## Additional normalization dist = stats.rv_discrete('GeneralizedPoisson', values=(k_arr, gp_prob)) return dist.rvs(size=samples)
def get_chord_single_distr(start_prob, end_prob, time, composition_length): cs_probs = [] unnormed_prob = 0. tc = time / composition_length chord_prob = filt(start_prob + (end_prob - start_prob) * tc) single_prob = 1. - chord_prob distrib = stats.rv_discrete(name='csd', values=([0, 1], [chord_prob, single_prob])) return distrib
def generate_padding(plain_samples, blocksize): padding_length = blocksize * blocksize - len(plain_samples) bincount = (-1 * (np.amin(plain_samples))) bincount += (np.amax(plain_samples) + 1) # add the zero bin hist, bins = np.histogram(plain_samples, bincount, density=True) pk = hist / np.sum(hist) xk = np.arange(np.amin(plain_samples), np.amax(plain_samples) + 1) sampdist = stats.rv_discrete(name='Sample distribution', values=(xk, pk)) randsamples = sampdist.rvs(size=padding_length) return randsamples
def total_correlation(Xs): pmf = [] Xs = np.asarray(Xs) for stock in range(len(Xs)): s = pd.Series(Xs[stock]) b = (s.groupby(s).transform('count') / len(s)).values pmf.append(b) pmf = np.asarray(pmf) pmf /= pmf.sum() custm = stats.rv_discrete(name='custm', values=(Xs, pmf)) d = Distribution.from_ndarray(pmf) t = T(d) return t
def getWarmingLevelMixDistributions(warmingLev, startYear=2000, endYear=2150): pdfR8 = getWarmingLevelMixDistributionByScen('rcp85', warmingLev, startYear=startYear, endYear=endYear) pdfR4 = getWarmingLevelMixDistributionByScen('rcp45', warmingLev, startYear=startYear, endYear=endYear) yrs = pdfR8.xk mixDist = (pdfR8.pk + pdfR4.pk) / 2. pdf = st.rv_discrete(values=(yrs, mixDist)) return pdf, pdfR8, pdfR4
def __init__(self, name, min_included, max_included, null_default_value, **kwargs): scipy_dist_obj = rv_discrete( name=name, a=min_included, b=max_included, **kwargs ) scipy_dist_obj._pmf = self._pmf ScipyDiscreteDistributionWrapper.__init__( self, scipy_distribution=scipy_dist_obj, null_default_value=null_default_value )
def erdos_renyi_ternary(num_genes: int, prob_conn: float) -> np.ndarray: """Generate ternary valued ER graph. Args: num_genes: Number of genes/nodes. prob_conn: Probability of connection. Returns: Adjacency matrix. """ signed_edge_dist = rv_discrete( values=([-1, 0, 1], [prob_conn / 2, 1 - prob_conn, prob_conn / 2])) return signed_edge_dist.rvs(size=(num_genes, num_genes))
def var(self, only_missense=False): if only_missense: return self.missense_scores.var() else: mean_of_type_vars = self.f_missense * self.missense_scores.var() type_means_dist = rv_discrete( values=([1, self.missense_scores.mean(), 0], self.get_type_freqs())) var_of_type_means = type_means_dist.var() # According the the law of total variance. return mean_of_type_vars + var_of_type_means
def measure(self): """Measure qubit register. Collapses to 1 definite state. Simulates real measurement, as intermediate values of qubit registers during computation remain unknown. """ self.normalise() pos = np.arange(len(self.array)) probs = self.array * np.conjugate(self.array) #If probs is not normalised (usually due to rounding errors), re-normalise #probs = probs/np.sum(probs) dist = stats.rv_discrete(values=(pos, probs)) self.array = np.zeros(self.array.shape) self.array[dist.rvs()] = 1 return self.array
def get_for(self, var: str, constr: str): """ Returns the randomness associated with the given variable and constraint pair. Returns a ``scipy.stats`` distribution (possibly discrete). """ logger.debug(f"Retrieving randomness for ({var}, {constr}).") if self.is_finite(): return rv_discrete(values=zip(*self._discrete[var, constr]), name="discrete") else: return self._randomness[var, constr]
def statistics_back(self): counter = [0] * (len(self.class_back)+1) for i in self.back: counter[i[0]] += 1 counter[i[1]] += 1 total = 0.0 for i in counter: total += i counter = [i/total for i in counter] candinate = list(range((len(self.class_back)+1))) custm = stats.rv_discrete(name='custm', values=(candinate, counter)) num = custm.rvs(size=2) return num
def generateSamples(w, mu, cov, s): dim = len(mu[0]) d = rv_discrete(values=(range(len(w)), w)) components = d.rvs(size=s) # generate samples of size of each component, then shuffle if dim > 1: return components, np.array([ np.random.multivariate_normal(mu[i], cov[i], 1)[0] for i in components ]) else: return components, np.asmatrix( [np.random.normal(mu[i], cov[i], 1)[0] for i in components]).T
def test_rvs(self): states = [-1, 0, 1, 2, 3, 4] probability = [0.0, 0.3, 0.4, 0.0, 0.3, 0.0] samples = 1000 r = stats.rv_discrete(name='sample', values=(states, probability)) x = r.rvs(size=samples) assert_(isinstance(x, numpy.ndarray)) for s, p in zip(states, probability): assert_(abs(sum(x == s) / float(samples) - p) < 0.05) x = r.rvs() assert_(isinstance(x, int))
def fit_variational_inference(self, data, est_cluster, a, b, alpha, max_iter=50): self.lam_history = np.zeros((max_iter, est_cluster)) self.pi_history = np.zeros((max_iter, est_cluster)) a_hat = a.copy() b_hat = b.copy() alpha_hat = alpha.copy() print("start fitting...") for iteration in tqdm(range(max_iter)): _loglam = scipy.special.digamma(a_hat) - np.log(b_hat) _lam = a_hat / b_hat _logpie = scipy.special.digamma(alpha_hat) - scipy.special.digamma( np.sum(alpha_hat)) # data shape = (data_len, 1) # param shape = (1, category_len) # nu shape = (data_len, category_len) nu = np.exp(data * _loglam - _lam + _logpie) nu = nu / np.sum(nu, axis=1, keepdims=True) a_hat = np.sum(data * nu, axis=0) + a b_hat = np.sum(nu, axis=0) + b alpha_hat = np.sum(nu, axis=0) + alpha self.pi_history[iteration, :] = np.random.dirichlet( alpha_hat[0], 1) self.lam_history[iteration, :] = stats.gamma.rvs(a_hat[0], scale=1 / b_hat[0], size=est_cluster, random_state=0) for cls in range(est_cluster): print("estimation of cluster {}: lambda = {:.2f}, real={}".format( cls, self.lam_history[iteration, cls], self.real_lam[cls])) _s = np.zeros((data.shape[0], est_cluster)) for _n in range(data.shape[0]): cat = stats.rv_discrete(name='custm', values=(range(est_cluster), nu[_n, :])) _class = cat.rvs(size=1) _s[_n, _class] = 1 self.estimate_cluster = _s self.cluster_proba = nu
def bootstrap_counts(dtrajs, lagtime, corrlength=None): """ Generates a randomly resampled count matrix given the input coordinates. See API function for full documentation. """ from scipy.stats import rv_discrete # if we have just one trajectory, put it into a one-element list: if not isinstance(dtrajs, list): dtrajs = [dtrajs] ntraj = len(dtrajs) # can we do the estimate? lengths = determine_lengths(dtrajs) Lmax = np.max(lengths) Ltot = np.sum(lengths) if lagtime >= Lmax: raise ValueError('Cannot estimate count matrix: lag time ' + str(lagtime) + ' is longer than the longest trajectory length ' + str(Lmax)) # how many counts can we sample? if corrlength is None: corrlength = lagtime nsample = int(Ltot / corrlength) # determine number of states n from deeptime.markov import number_of_states n = number_of_states(dtrajs) # assigning trajectory sampling weights w_trajs = np.maximum(0.0, lengths - lagtime) w_trajs /= np.sum(w_trajs) # normalize to sum 1.0 distrib_trajs = rv_discrete(values=(list(range(ntraj)), w_trajs)) # sample number of counts from each trajectory n_from_traj = np.bincount(distrib_trajs.rvs(size=nsample), minlength=ntraj) # for each trajectory, sample counts and stack them rows = np.zeros((nsample,)) cols = np.zeros((nsample,)) ones = np.ones((nsample,)) ncur = 0 for i in range(len(n_from_traj)): if n_from_traj[i] > 0: (r, c) = bootstrap_counts_singletraj(dtrajs[i], lagtime, n_from_traj[i]) rows[ncur:ncur + n_from_traj[i]] = r cols[ncur:ncur + n_from_traj[i]] = c ncur += n_from_traj[i] # sum over counts Csparse = scipy.sparse.coo_matrix((ones, (rows, cols)), shape=(n, n)) return Csparse.tocsr()
def batch_size_dist(min_num: int, max_num: int): """Function for sampling powers of 2. :param min_num: minimum number (a power of 2) :param max_num: maximum number (a power of 2) """ assert math.log(min_num, 2).is_integer() and math.log(max_num, 2).is_integer(),\ 'Supplied minimum and maximum have to be powers of 2' min_pow = int(math.log(min_num, 2)) max_pow = int(math.log(max_num, 2)) no = max_pow - min_pow + 1 return stats.rv_discrete( values=([2**p for p in np.arange(min_pow, max_pow + 1)], [1 / no for _ in np.arange(min_pow, max_pow + 1)]))
def generiere_termine(self): arten = [] wahrscheinlichkeiten = [] for k, v in self.terminarten.iteritems(): arten.append(k) wahrscheinlichkeiten.append(v[0]) verteilung = stats.rv_discrete(name='verteilung', values=(arten, wahrscheinlichkeiten)) self.grundtermine = verteilung.rvs(size=self.anzahlTermine) for t in self.grundtermine: reale_dauer = random.gauss(t, self.terminarten[t][1]) self.termine.append( model.Termin(random.choice(self.kunden), t, reale_dauer))
def generate_age_group_distribution(name, age_groups, probabilities): ''' Generates discrete distribution from age demography data. ''' probabilities = np.array(probabilities) / np.sum(probabilities) xk = np.arange(age_groups[-1][-1] + 1) # max age pk = [] for group, prob in zip(age_groups_demography, probabilities): a, b = group n = b - a + 1 pk += n * [prob / n] return stats.rv_discrete(name=name, values=(xk, pk))
def get_area_distribution(tracks, fit=False): area = np.sum(tracks > 0, axis=(1, 2)) if not fit: count = np.bincount(area) probability = count / float(np.sum(count)) return stats.rv_discrete(a=0, b=np.max(probability.shape[0]), name='signal distribution', values=(np.arange(count.shape[0]), probability)) else: exp_params = stats.expon.fit(area) return stats.expon(*exp_params)
def _hist_fill(data, bins, entries=0, freeze=None): if type(bins) == int: if bins == 0: bins = np.arange(data.size + 1, dtype=float) else: bins = np.linspace(data.min(), data.max(), bins) binc = (bins[1:] + bins[:-1]) / 2 if freeze: return lambda: data else: rv = rv_discrete(values=(binc, data / data.sum())) return lambda: np.histogram( rv.rvs(size=data.sum() if entries == 0 else entries), bins)[0].astype(float)
def mm(): # 两袋各取一颗,黄,绿, 求黄色来自1994的概率 # m&m problem color_1994 = ('brown', 'yellow', 'red', 'green', 'orange', 'yellow_brown') color_1996 = ('blue', 'green', 'orange', 'yellow', 'red', 'brown') p_1994 = (.3, .2, .2, .1, .1, .1) p_1996 = (.24, .2, .16, .14, .13, .13) # 把所有颜色转为对应的数字保存在字典中 color = set(color_1996).union(set(color_1994)) color_dict = {i: c for c, i in enumerate(color)} color_1996 = [color_dict[c] for c in color_1996] color_1994 = [color_dict[c] for c in color_1994] mm_1994 = st.rv_discrete(values=(color_1994, p_1994)) mm_1996 = st.rv_discrete(values=(color_1996, p_1996)) # s为取到一黄一绿的可能性 # p(黄=1994|s) = p(s|黄=1994) * p(黄=1994)/p(s)= p(s,黄=1994)/p(s), 这里 p1 = mm_1994.pmf(color_dict['yellow']) * mm_1996.pmf(color_dict['green']) p2 = mm_1996.pmf(color_dict['yellow']) * mm_1994.pmf(color_dict['green']) p = p1 / (p1 + p2) print(p)
def generate_data(self, model_params, my_N): """ Generate data according to the model. Internally uses generate_data_from_hidden. This method does _not_ obey gamma: The generated data may have more than gamma active causes for a given datapoint. """ D = self.D H = self.H s = stats.rv_discrete(values = (np.arange(H),model_params['pies']), name = 'compProbDistr').rvs(size=my_N) return self.generate_from_hidden(model_params, {'s': s})
def choice_faces(verts, faces): num = 4000 u1, u2, u3 = np.split(verts[faces[:, 0]] - verts[faces[:, 1]], 3, axis=1) v1, v2, v3 = np.split(verts[faces[:, 1]] - verts[faces[:, 2]], 3, axis=1) a = (u2 * v3 - u3 * v2)**2 b = (u3 * v1 - u1 * v3)**2 c = (u1 * v2 - u2 * v1)**2 Areas = np.sqrt(a + b + c) / 2 Areas = Areas / np.sum(Areas) choices = np.expand_dims(np.arange(Areas.shape[0]), 1) dist = stats.rv_discrete(name='custm', values=(choices, Areas)) choices = dist.rvs(size=num) select_faces = faces[choices] return select_faces
def get_rolling_boundary(closing_values, bound, window, direction): coll = [np.nan] * window for i in range(window, closing_values.shape[0]): values = closing_values[i - window:i] # Create distribution perc = np.ones(values.shape[0]) / values.shape[0] dist = rv_discrete(values=(values, perc)) # Get bounding of distribution if direction == 'upper' or direction == 'long': coll.append(dist.ppf(1 - bound)) else: coll.append(dist.ppf(bound)) return np.array(coll)
def _assign_sub_sector(self, person): """ Assign sub-sector job as defined in config """ MC_random = np.random.uniform() ratio = self.sub_sector_ratio[person.sector][person.sex] distr = self.sub_sector_distr[person.sector][person.sex] if MC_random < ratio: sub_sector_idx = stats.rv_discrete( values=(np.arange(len(distr)), distr) ).rvs() person.sub_sector = self.sub_sector_distr[person.sector]["label"][ sub_sector_idx ]
def get_power_law(a, m): """ Defining a discrete power law probability density function of length a : a parameter of the distribution that controls m : number of discrete values in the range of the r.v following the power law """ values = np.arange(1, m + 1, dtype='float') pmf = 1 / values**a pmf /= pmf.sum() return stats.rv_discrete(values=(range(1, m + 1), pmf)), pmf
def get_initial_probs(filename): positions = [] initial_probs = [] reader = csv.DictReader(open(filename)) for row in reader: positions.append(row["Position"]) initial_probs.append(float(row["p"])) return (positions, stats.rv_discrete(name="initial_probs", values=(range(0, len(positions)), initial_probs)))
def __init__(self, data_sketch_root, net, plotter, n=1000, transform=None, visdom=True, model_train="", image_size=224, triplet=True): self.corr_files = [] self.visdom = visdom self.data_sketch_root = data_sketch_root self.transform = transform self.n = n self.n_all = [] self.df = [] self.views = [] self.image_size = image_size self.triplet = (triplet * 4) + 1 for i in range(3): for j in range(3): self.views.append([dic_indices.get(i), dic_indices.get(j)]) self.corr_files.append("{}/dataset_{}_train_{}_{}.csv".format( data_sketch_root, model_train, i, j)) self.df.append(pd.read_csv(self.corr_files[-1], index_col=0)) self.n_all.append(self.df[-1].shape[0]) self.prob = stats.rv_discrete(name='custm', values=([0, 1], [0.95, 0.05])) self.plotter = plotter self.n_part_len = np.array(self.n_all) // self.n self.part = np.zeros(9) self.prob_rotations = stats.rv_discrete( name='custm', values=([0, 1, 2, 3], [0.25, 0.25, 0.25, 0.25])) self.rotations = [ lambda x: x, np.rot90, lambda x: np.rot90(np.rot90(x)), lambda x: np.rot90(np.rot90(np.rot90(x))) ] self.net = net self.prob_show_image = stats.rv_discrete(name='custm', values=([0, 1], [0.95, 0.05])) self.prob_pixear_image = stats.rv_discrete(name='custm', values=([0, 1, 2], [0.60, 0.20, 0.20])) self.prob_view = stats.rv_discrete(name='custm', values=(np.arange(9), np.ones(9) * 1 / 9)) self.prob_view_negative = stats.rv_discrete( name='custm', values=(np.arange(9), np.ones(9) * 1 / 9)) self.pixear = [ lambda x: x, lambda x: cv2.resize(x, (64, 64), interpolation=cv2.INTER_CUBIC), lambda x: cv2.resize(x, (128, 128), interpolation=cv2.INTER_CUBIC) ] self.init_csv()