Пример #1
0
def condition_distribution(year, bridge_db, pmatrix):
    cs_dist = []
    cs = np.arange(8,0,-1)
    pmatrix = pmatrix.item()
    if int(year) % 2 !=0:
        print 'illegal year of interest, must be even number'
    else:
        indx = int(year)/2
    for (name, lat, long, length, width, deck_cs0, super_cs0, sub_cs0, detour, onlink) in bridge_db:
        # create reliability index distribution of deck
        deck_cs0_array = (cs==deck_cs0).astype('float')
        deck_pk = np.dot(np.linalg.matrix_power(pmatrix['deck'].T,indx),deck_cs0_array)
        deck_cs_dist = stats.rv_discrete(name='deck_cs_dist', values=(cs,deck_pk))
        # create super
        super_cs0_array = (cs==super_cs0).astype('float')
        super_pk = np.dot(np.linalg.matrix_power(pmatrix['super'].T,indx),super_cs0_array)
        super_cs_dist = stats.rv_discrete(name='super_cs_dist', values=(cs,super_pk))
        # create sub
        sub_cs0_array = (cs==sub_cs0).astype('float')
        sub_pk = np.dot(np.linalg.matrix_power(pmatrix['sub'].T,indx),sub_cs0_array)
        sub_cs_dist = stats.rv_discrete(name='sub_cs_dist', values=(cs,sub_pk))

        cs_dist.append( (name, deck_cs_dist, super_cs_dist, sub_cs_dist) )

    return cs_dist
 def processDomain(self, X, Y):
     for i in range(len(X)):
         self.processLine(X[i],Y[i]==1)
         
     self.sentWordPercentageInPos = self.sentWordPercentageInPos / self.totalPosInstances
     self.sentWordPercentageInNeg = self.sentWordPercentageInNeg / self.totalNegInstances
     
     #build word distribution for positive words
     for word in self.totalPosFreq :
         self.totalPosFreq[word] = self.totalPosFreq[word] / self.totalPosWordsInDomain  
     self.posTokenizer = dict(zip(list(range(len(self.totalPosFreq.keys()))), list(self.totalPosFreq.keys())))            
     self.posDist = stats.rv_discrete(name='positiveDist', values=(list(range(len(self.totalPosFreq.keys()))), list(self.totalPosFreq.values())))
     
     #build word distribution for negative words
     for word in self.totalNegFreq :
         self.totalNegFreq[word] = self.totalNegFreq[word] / self.totalNegWordsInDomain
     self.negTokenizer = dict(zip(list(range(len(self.totalNegFreq.keys()))), list(self.totalNegFreq.keys())))            
     self.negDist = stats.rv_discrete(name='negativeDist', values=(list(range(len(self.totalNegFreq.keys()))), list(self.totalNegFreq.values())))
     
     #build word distribution for objective words
     for word in self.totalObjFreq :
         self.totalObjFreq[word] = self.totalObjFreq[word] / self.totalObjWordsInDomain
     self.objTokenizer = dict(zip(list(range(len(self.totalObjFreq.keys()))), list(self.totalObjFreq.keys())))            
     self.objDist = stats.rv_discrete(name='objectiveDist', values=(list(range(len(self.totalObjFreq.keys()))), list(self.totalObjFreq.values())))
     
     #build positive line length distribution
     for length in self.posLineLengthDict:
         self.posLineLengthDict[length] = self.posLineLengthDict[length] / self.totalPosInstances
     self.posLineLengthDist = stats.rv_discrete(name='posLineLengthDist', values=(list(self.posLineLengthDict.keys()), list(self.posLineLengthDict.values())))
     
     #build negative line length distribution
     for length in self.negLineLengthDict:
         self.negLineLengthDict[length] = self.negLineLengthDict[length] / self.totalNegInstances
     self.negLineLengthDist = stats.rv_discrete(name='negLineLengthDist', values=(list(self.negLineLengthDict.keys()), list(self.negLineLengthDict.values())))
Пример #3
0
def MakeData(number_of_students, actref_dist,senint_dist,visver_dist,seqglo_dist):

	# Range of possible values.
	# -1 = active/sensing/visual/global
	# +1 = reflective/intuitive/verbal/sequential
	xk = (-1,1)
	
	# Create random number generators that spit out numbers
	# according to their pre-set distributions.
	actref_custm = stats.rv_discrete(name = 'actref_custm', values=(xk,actref_dist))
	senint_custm = stats.rv_discrete(name = 'senint_custm', values=(xk,senint_dist))
	visver_custm = stats.rv_discrete(name = 'visver_custm', values=(xk,visver_dist))
	seqglo_custm = stats.rv_discrete(name = 'seqglo_custm', values=(xk,seqglo_dist))


	for i in range(number_of_students):
		#generate a fake name
		name = ''.join(random.choice(string.letters) for h in range(14))
		#generate a learning styles profile
		act_ref = actref_custm.rvs()
		sen_int = senint_custm.rvs()
		vis_ver = visver_custm.rvs()
		seq_glo = seqglo_custm.rvs()

		# Uncomment for debug code
		print name, act_ref, sen_int, vis_ver, seq_glo

		# Write to file
		w.writerow([name, act_ref, sen_int, vis_ver, seq_glo])
Пример #4
0
 def __init__(self, state, record):
     self.consequences = [['Test: CT scan'],
                          ['Plan: Chemo', 'Plan: Surgery']]
     self.t_distributions = [[st.norm(loc=7, scale=2)],
                             [st.rv_discrete(values=([0], [1])),
                              st.rv_discrete(values=([0], [1]))]]
     self.c_distribution = [st.rv_discrete(values=([0], [1])),
                            st.rv_discrete(values=([0, 1], [0.5, 0.5]))]
Пример #5
0
def RandomizedFictitiousPlay(A, Epsilon):
    n = len(A[0])
    m = len(A)

    X = numpy.matrix(numpy.zeros((m, 1), dtype=int))
    Y = numpy.matrix(numpy.zeros((n, 1), dtype=int))
    X[0] = 1
    Y[0] = 1

    numpy.random.shuffle(X)
    numpy.random.shuffle(Y)

    t = int(round(6*math.log(2*n*m)/pow(Epsilon, 2)))

    for i in range(t):

        Ax = numpy.array(numpy.transpose(A) * X).tolist()
        #print Ax
        Ay = numpy.array(A * Y).tolist()
        #print Ay
        values = Ay
        probabilities = []
        for item in Ay:
            probabilities.append(pow(math.e, Epsilon*item[0]/2))
        while True:
            try:
                theprobabilities = []
                temp = sum(probabilities)
                theprobabilities[:] = [x / temp for x in probabilities]
                distrib = stats.rv_discrete(values=(values, theprobabilities))
                xchoice = Ay.index(distrib.rvs(size=1)[0])
                break
            except:
                pass

        values = Ax
        probabilities = []
        for item in Ax:
            probabilities.append(pow(math.e, -Epsilon*item[0]/2))
        while True:
            try:
                theprobabilities = []
                temp = sum(probabilities)
                theprobabilities[:] = [x / temp for x in probabilities]
                distrib = stats.rv_discrete(values=(values, theprobabilities))
                ychoice = Ax.index(distrib.rvs(size=1)[0])
                break
            except:
                pass

        #print xchoice
        X[xchoice] += 1
        #print X
        #print ychoice
        Y[ychoice] += 1
        #print Y
    return X/float(t+1), Y/float(t+1)
Пример #6
0
def prop(q):
	"""
	Returns a dictionary with keys tau_1 and tau_2 and values discrete RV
	with probabilities determined by $q$ as in the matrix above.
	"""
	proposal = {}
	proposal[tau.xk[0]] = rv_discrete(values = [(tau.xk[0], tau.xk[1]), 
	                      (q, 1-q)])
	proposal[tau.xk[1]] = rv_discrete(values = [(tau.xk[0], tau.xk[1]), 
						  (1-q, q)])
	return proposal 
    def test_entropy(self):
        # Basic tests of entropy.
        pvals = np.array([0.25, 0.45, 0.3])
        p = stats.rv_discrete(values=([0, 1, 2], pvals))
        expected_h = -sum(xlogy(pvals, pvals))
        h = p.entropy()
        assert_allclose(h, expected_h)

        p = stats.rv_discrete(values=([0, 1, 2], [1.0, 0, 0]))
        h = p.entropy()
        assert_equal(h, 0.0)
Пример #8
0
    def perform(self, action):
        # get distribution about outcomes
        probabilities = self.belief[action] / np.sum(self.belief[action])
        distrib = rv_discrete(values=(range(len(probabilities)),
                                      probabilities))

        # draw sample
        sample = distrib.rvs()

        # update belief accordingly
        belief = copy(self.belief)
        belief[action][sample] += 1

        # manual found
        if (self.pos == self.world.manual).all():
            print("m", end="")
            belief = {ToyWorldAction(np.array([0, 1])): [50, 1, 1, 1],
                      ToyWorldAction(np.array([0, -1])): [1, 50, 1, 1],
                      ToyWorldAction(np.array([1, 0])): [1, 1, 50, 1],
                      ToyWorldAction(np.array([-1, 0])): [1, 1, 1, 50]}

        # build next state
        pos = self._correct_position(self.pos + self.actions[sample].action)

        return ToyWorldState(pos, self.world, belief)
Пример #9
0
def discrete_sample(values, probabilities, ns=1):
    distrib = rv_discrete(values=(range(len(values)), probabilities))
    indices = distrib.rvs(size=ns)
    if ns == 1:
        return map(values.__getitem__, indices)[0]
    else:
        return map(values.__getitem__, indices)
def SequenceDynSelf(protocell,mu,L,N):

    q = (1-mu)**L
    total=np.sum(protocell)
    global test

    while (total != 2*N):

    	"Pick the  sequence type"

        sec_freq=protocell/total
        values=np.arange(len(protocell))
        custm = sps.rv_discrete(name='custm', values=(values, sec_freq))
        R = custm.rvs(size=1)
        R=R.tolist()
        R=int(R[0])
        sample=R

        test = nprandom.binomial(1,q)

        if sample == 0:
            protocell[0]=protocell[0]+1
        elif test == 1:
            protocell[sample]=protocell[sample]+1
        else:
            protocell[0]=protocell[0]+1
        total=np.sum(protocell)

    return protocell
Пример #11
0
    def __init__(self, len_dist, dataset, max_width=None, max_height=None,
                rotate=False, resize=False, crazy=False, max_dist=None):
        """ len_dist: a dict containing the distribution of length.
            max_dist: maximum distance between digits, to make them closer to each other
        """
        lens = len_dist.keys()
        self.max_len = max(lens)
        probs = [len_dist[k] for k in lens]

        self.do_rotate = rotate
        self.do_resize = resize
        self.crazy = crazy
        self.max_dist = max_dist

        self.len_rvg = stats.rv_discrete(values=(lens, probs))
        # merge train/valid/test
        self.dataset = dataset
        shape = self.dataset[0][0][0].shape

        self.orig_image_shape = int(np.sqrt(shape[0]))
        assert self.orig_image_shape ** 2 == int(shape[0])
        if max_width is None:
            max_width = self.orig_image_shape * self.max_len
        if max_height is None:
            max_height = self.orig_image_shape
        self.img_size = (max_height, max_width)
        print "Original dataset size: {0}, {1}, {2}".format(len(dataset[0][0]),
                                                         len(dataset[1][0]),
                                                         len(dataset[2][0]))
        print "Image size: {0}".format(self.img_size)
Пример #12
0
def plot_boxplot(karr,xdistr,ydistr,name,color,ax=None,dx=.1):
    if ax==None: ax = plt.gca()
    minarr = []; maxarr = []; meanarr = []
    leftarr = []; q1arr = []; medarr = []; q3arr = []; rightarr = []
    for k in karr:
        ik = k-1
        distr = stats.rv_discrete(name=name,values=(xdistr,ydistr[ik,:]))
        minarr.append(distr.ppf(0))
        leftarr.append(distr.ppf(.05))
        q1arr.append(distr.ppf(.25))
        meanarr.append(distr.mean())
        medarr.append(distr.median())
        q3arr.append(distr.ppf(.75))
        rightarr.append(distr.ppf(.95))
        maxarr.append(distr.ppf(1))
    minarr  = np.array(minarr)
    leftarr = np.array(leftarr)
    q1arr   = np.array(q1arr)
    meanarr = np.array(meanarr)
    medarr  = np.array(medarr)
    q3arr   = np.array(q3arr)
    rightarr= np.array(rightarr)
    maxarr  = np.array(maxarr)
    
    ax.plot(karr,leftarr,linestyle='--',color=color)
    ax.plot(karr+dx,meanarr,marker='s',linestyle='None',color=color)
    ax.errorbar(karr,medarr,yerr=[medarr-q1arr,q3arr-medarr],
                marker='o',ls='-',lw=1.8,label=name,color=color)
    ax.plot(karr,rightarr,linestyle='--',color=color)
    return ax
def profileRandomlyGeneratedKmer(k, sequence, profile):
    bases = {'A': 0, 'C': 1, 'G': 2, 'T': 3}
    kmerProbabilities = []

    # loop through all kmers in sequence
    for index in range(len(sequence) - k):
        kmerP = 1
        # compute probability of kmer starting at sequence[index]
        for kmerIndex in range(k):
            baseIndex = bases[sequence[index + kmerIndex]]
            kmerP *= profile[baseIndex][kmerIndex]

        kmerProbabilities.append(kmerP)

    sumP = sum(kmerProbabilities)
    # normalize to total probability of 1
    for i in range(len(kmerProbabilities)):
        kmerProbabilities[i] /= sumP

    # generate probability distribution based on kmer
    xk = np.arange(len(kmerProbabilities))
    custm = stats.rv_discrete(name='custm', values=(xk, kmerProbabilities))
    randKmerIndex = custm.rvs()

    return sequence[randKmerIndex:randKmerIndex + k]
Пример #14
0
 def add_vertex(self):
     # Step 1. Choose cluster
     clusters = sorted(self.clusters.keys())
     new_cluster = len(clusters) + 1
     if self.cluster_attachment == "preferential":
         # probability of new cluster is 1 / (n_vertices + 1)
         norm = float(sum(self.clusters.values()) + 1)
         p_new = 1.0 / norm
         probas = [self.clusters[cl] / norm for cl in clusters]
     elif self.cluster_attachment == "uniform":
         # probability of new cluster is 1 / (n_clusters + 1)
         norm = float(len(self.clusters.keys()) + 1)
         p_new = 1.0 / norm
         probas = [1.0 / norm for cl in clusters]
     # determine if new cluster appears
     if np.random.random() < p_new:
         self.clusters[new_cluster] = 1
         # generate new vertex
         # coord = self._gen_1d_vertex(self.bbox[0], self.bbox[1])
         coord = np.random.choice(self.bbox)  # select one boundary
         new_vertex = ((coord,), new_cluster)
     else:
         distr = rv_discrete(values=(clusters, probas))
         cl = distr.rvs(size=1)[0]
         self.clusters[cl] += 1
         # generate vertex from existing cluster
         cluster_vertices = np.array([v[0][0] for v in self.graph.nodes() if v[1] == cl])
         pivot = np.random.choice(cluster_vertices)
         coord = self._gen_1d_vertex(pivot - 0.5, pivot + 0.5)
         new_vertex = ((coord,), cl)
     old_vertices = self.graph.nodes()
     self.graph.add_node(new_vertex)
     for v in old_vertices:
         if abs(v[0][0] - new_vertex[0][0]) < 0.5:
             self.graph.add_edge(v, new_vertex)
Пример #15
0
def run_episode(policy, num_episodes, max_time,
                transitions, rewards, time_based=False, repeating=True):
    """Runs NUM_EPISODES episodes using the policy given and returns the results."""
    random.seed()
    deviation_prob = 0.05
    lives = np.zeros([num_episodes, max_time, 4])
    # lives = np.zeros(max_time, 4 * num_episodes)
    num_actions = rewards.shape[0]
    num_states = rewards.shape[1]
    for n in range(num_episodes):
        # or start at random: random.randrange(num_states)
        state = 0
        life = np.zeros([max_time, 4])
        for t in range(max_time):
            action = policy[t, state]
            deviated = random.random() < deviation_prob
            if deviated:
                # perform any other action
                action = random.choice([x for x in range(num_actions) if x != action])
            life[t, 0] = rewards[action, state]
            life[t, 1] = state
            life[t, 2] = int(deviated)
            life[t, 3] = action
            # rv for next state with distribution from transition probabilites
            if time_based:
                rv_vals = (range(num_states), transitions[t, action, state, :])
            else:
                rv_vals = (range(num_states), transitions[action, state, :])
            next_rv = stats.rv_discrete(name="next_state", values=rv_vals)
            state = next_rv.rvs()
        lives[n] = life
        # lives[:, (n*4):((n+1)*4)] = life
    return lives
Пример #16
0
def softmax_rv(masses, values=None, z=0.05):
    """ Returns a discrete random variable based on the softmax function. """
    nums = np.exp(np.array(masses) / z)
    dist = nums / np.sum(nums)
    if values is None:
        values = np.arange(len(masses))
    return stats.rv_discrete(name='softmax', values=(values, dist))
Пример #17
0
def create_concordance_probability(d):
    word_list = list(d.keys())
    freq_list = list(d.values())
    word_order_list = [x for x in range(len(word_list))]
    probs = stats.rv_discrete(a=0, values=(word_order_list, freq_list))

    return CacheResult(word_list, probs)
Пример #18
0
 def __init__(self, state, record):
     stage = np.random.choice([1, 2, 3, 4],
                              p=[1/100, 6/100, 14/100, 79/100])
     self.consequences = [['Test: Endoscopy']]
     self.t_distributions = [[st.norm(loc=14, scale=3)]]
     self.c_distribution = [st.rv_discrete(values=([0], [1]))]
     state.add(('C16', stage))
Пример #19
0
 def sample_child(self, model):        
     values = [i for i in xrange(len(self.children))]
     probabilities = [model.get_probability(self, self.children[i]) for i in xrange(len(self.children))]
     distrib = rv_discrete(values=(range(len(values)), probabilities))
     d = distrib.rvs(size=1)[0]
                    
     return self.children[d]       
Пример #20
0
def get_empirical_pmf(values, probabilities, name="Empirical PMF"):
    """Return custom Scipy.stats discrete PMF.
    
    *Arguments*
        ``values`` [Integer]
            The random variable represented by the emperical pmf can
            assume the values in this Python list of integers.
        ``probabilities`` [float]
            This Python list of positive floats should be the same
            length as ``values.`` The floats should be between zero and
            one. Each float is the probability that the corresponding
            (i.e., has same index) item in the ``values`` list will
            occur.
        ``name`` String, optional
            Assign ``name`` to the emperical_pmf. Default value is
            'Empirical PMF`.
            
    """
    return stats.rv_discrete(values=(values, probabilities), name=name)



            
        
    

            
    def getCDF(self, points=None):
        # Approssimation of PDF integral (not into the original version of custom class)
        #    given a set of points: what is the cdf, staring from the PDF of data?
        #------------------------------------
        # version 1:
        #---------------------------------------------------------------------
        #x = sorted(points) # points can be different from self.data
        # 
        #y = self.getPDF(x) # pdf function associated with self.data and points
        # 
        #c     = [] # list for future CDF array
        #c.append( 0.) # initialization
        #      
        #for i in range(1,len(points)):
        #    c.append((y[i-1]+y[i] )*.5*(x[i]-x[i-1])+c[i-1] )
        #for i in range(1,len(points)):
        #    c[i] = c[i]/c[len(points)-1]     
        #return c
        #--------------------------------------------------------------------
        # version 2
        points = np.matrix(points)

        y = self.getPDF(self.data) 
        summ = np.sum(y) 
        p = np.array(y/summ)
        custom = stats.rv_discrete(name='custom', values=(self.data, p)) 

        return custom.cdf(points)
 def getiCDF(self, xx):
     """ 
     A custom inverse cumulative distribution function.
     
     :param Custom self:
         An instance of Custom class.
     :param array xx:
         An array of points in which the inverse cumulative density function needs to be evaluated.
     :return:
         Inverse cumulative density function values of the Custom distribution.
     """
     #x  = self.data 
     #y  = self.getPDF(x)
     #c  = []
     #yy = []
     #c.append(0.0)
     #for i in range(1, len(x)):
     #    c.append(c[i-1]+(x[i]-x[i-1])*(y[i]+y[i-1])*.5)
     #for i in range(1, len(x)):
     #    c[i]=c[i]/c[len(x)-1]
     #for k in range(0, len(x)):
     #    for i in range(0, len(x)):
     #        if ((xx[k]>=c[i]) and (xx[k]<=c[i+1])):
     #            value = float((xx[k]-c[i])/(c[i+1]-c[i])*(x[i+1]-x[i])+x[i])
     #            yy.append(value)
     #            break
     #return yy
     xx = np.matrix(xx)
     y = self.getPDF(self.data)
     summ = np.sum(y)
     p = np.array(y/summ)
     custom = stats.rv_discrete(name='custom', values=(self.data, p))
     return custom.ppf(xx)
Пример #23
0
    def get_next_chord_conditional(self, chord, note):
        """
        Given a chord, draws a random next chord according to its probability distribution conditioned
        on the proceeding note.
        note here is a note
        """
        chords = self.key.c_chord_given_c_note(note[:-1])
        idxs = []
        for j in range(len(chords)):
            for i in range(len(self.key.chords)):
                if chords[j] == self.key.chords[i]:
                    idxs.append(i)

        idxs = list(set(idxs))

        chord = self.specified_chord_to_chord(chord)
        cond_dist_dict = self.chord_chord_map[chord] # Gets list of candidate chords
        next_chords = sorted(cond_dist_dict.keys())
        next_probs = [cond_dist_dict[next_chord] for next_chord in next_chords]

        true_next_chords = [next_chords[int(i)] for i in idxs]
        true_next_probs = [next_probs[int(i)] for i in idxs]

        allsum = sum(true_next_probs)

        for i in range(len(true_next_probs)):
            true_next_probs[i] = true_next_probs[i] / allsum


        cond_dist = stats.rv_discrete(name='cond_dist', values=(true_next_chords, true_next_probs))
        return self.key.chords[cond_dist.rvs(size=1)-1]
Пример #24
0
def simplify3(nk):
	result=[]
	nk=np.array(nk)
	xk = nk/float(np.sum(nk))
	#print nk
	
	#X_plot = np.linspace(0, len(nk), 1000)[:, np.newaxis]
	sdiv=1000
	X_plot = np.linspace(0, len(xk), sdiv)[:, np.newaxis]
	custm = stats.rv_discrete(name='custm',a=0,b=7, values=(range(len(xk)), xk))
	yk= custm.rvs(size=100000)
	#yk.flatten()
	#fig, ax = plt.subplots(1, 1)
	#ax.hist(yk, normed=True, histtype='stepfilled', alpha=0.2)
	# gaussian KDE
	X=yk.reshape(-1, 1)
	kde = KernelDensity(kernel='gaussian', bandwidth=0.6).fit(X)
	log_dens = kde.score_samples(X_plot)
	mi, ma = argrelextrema(log_dens, np.less)[0], argrelextrema(log_dens, np.greater)[0]
	mi=np.rint(mi*float(len(xk))/float(sdiv))
	ma=np.rint(ma*float(len(xk))/float(sdiv))
	start=0	
	#print mi
	for i in mi:
		i=int(i)
		if start!=i:
			val=np.average(nk[start:i])
			for j in xrange(start,i):
				result.append(val)
		start=i	
	val=np.average(nk[start:])
	for j in xrange(start,len(nk)):
			result.append(val)
	return np.array(result)
def main():
	fig = plt.figure(figsize=(3.2, 1.8))
	ax = fig.add_axes([0, 0, 1, 1])

	signal = create_gammapy_skymap().data
	background = np.ones(signal.shape)
	background /= background.sum()

	data = (1 * signal + background) / 2.

	# setup counts generator
	pdf = data.copy().flatten()
	x = np.arange(pdf.size)
	counts_generator = rv_discrete(name='counts', values=(x, pdf))

	counts = np.zeros_like(data)

	image = ax.imshow(counts, cmap='afmhot', origin='lower', vmin=0, vmax=9,
					  interpolation='None')
	bins = np.arange(counts.size + 1) - 0.5

	anim = FuncAnimation(fig, animate, fargs=[image, counts, bins, counts_generator],
                         frames=200, interval=50)
	
	filename = 'gammapy_logo.gif'
	anim.save(filename, writer='imagemagick')
Пример #26
0
def getRandomGenerator(freqs):
    s = sum(freqs)
    l = len(freqs)
    probs = [float(freqs[i])/float(s) for i in range(l)]
    quals = np.arange(l)
    dist = stats.rv_discrete(name='custm', values=(quals, probs))
    return dist
Пример #27
0
    def simulate(self, N, start=None, stop=None, dt=1):
        """
        generates a realization of the Hidden Markov Model

        :param N: int  trajectory length in steps of the lag time
        :param start: int (default=None) - starting hidden state. If not given, will sample from the stationary
            distribution of the hidden transition matrix
        :param stop: int or int-array-like (default=None) - stopping hidden set. If given, the trajectory will be stopped before
            N steps once a hidden state of the stop set is reached
        :param dt: int - trajectory will be saved every dt time steps. Internally, the dt'th power of P is taken to ensure a more efficient simulation
        :return: ndarray, ndarray -  tuple of (hidden state trajectory with length N/dt, observable state discrete trajectory with length N/dt)
        """


        from scipy import stats
        import msmtools.generation as msmgen
        # generate output distributions
        output_distributions = [stats.rv_discrete(values=(np.arange(self.pobs.shape[1]), pobs_i)) for pobs_i in self.pobs]
        # sample hidden trajectory
        htraj = msmgen.generate_traj(self.transition_matrix, N, start=start, stop=stop, dt=dt)
        otraj = np.zeros(htraj.size, dtype=int)
        # for each time step, sample microstate
        for t, h in enumerate(htraj):
            otraj[t] = output_distributions[h].rvs()  # current cluster
        return htraj, otraj
Пример #28
0
def non_uniform_approx(A, B, S, R):
    """Creates non-uniformly approximate matrices of A and B, C and R."""
    # Pick rows from A and corresponding column from B uniformly random
    n = A.shape[1]
    s = S.shape[1]
    rows = np.arange(0, n)  # The list of rows
    probs = np.zeros(n)  # The probability of each column

    # Calculate the probability of selecting each column based on the amount of
    # information using the method proposed by Drineas and Kannan. The probability
    # is based on the product of the row and column euclidean norms divided by
    # the cumulative sum of the product of euclidean norms for all rows and columns
    D = 0.0
    for i in range(0, n):
        prod = np.sqrt((A[i, :]*A[i, :]).sum()) * np.sqrt((B[:, i]*B[:, i]).sum())
        D += prod
        probs[i] = prod / D

    # Use the probabilities to pick the rows and columns non-uniformly
    distrib = rv_discrete(values=(rows, probs))
    for t, i_t in enumerate(distrib.rvs(size=s)):
        S[:, t] = A[i_t, :]
        R[t, :] = B[:, i_t]
        # Apply scaling
        scaling = np.sqrt(s * probs[i_t])
        S[:, t] /= scaling
        R[t, :] /= scaling
Пример #29
0
  def __call__(self, *args, **kwargs):
    frozen = self.underlying(*args, **kwargs)

    return stats.rv_discrete(
      a = 0, b = self.max_value,
      values = (np.arange(self.max_value + 1), truncated_pmf(frozen, self.max_value + 1))
    )
Пример #30
0
def getadm(psrcatdm, iseed, nbins, n):
    """Function to randomly select a dm value from the known pulsar dms in the catalogue.

       Creates a distribution of the dm values given their probabilities, and randomly select
       a dm value to use for scattering.
       
       Args:
       -----
       psrcatdmfile : a file containing psrcat dm values in 1 column (nan values replaced with zeros)
       iseed        : seed for the random number generator [int].
       nbins        : number of bins.
       n            : size of the samples to draw
       
       Returns:
       --------
       rand_dm      : randomly selected dm value (pc cm^-3). 
    """
    dm_file_name = str(psrcatdm)
    dm_file = np.loadtxt(dm_file_name)                  # Load the txt file containing the DM
    dm_dat = dm_file[np.where(dm_file > 0)]             # Exclude the zero dms used to replace null values from psrcat
    hist, bin_edges = np.histogram(dm_dat, bins=nbins)  # creates a histogram distribution
    probs = hist/float(len(dm_dat))                     # Compute probabilities
    dm_range = np.linspace(np.min(dm_dat), np.max(dm_dat), endpoint=True, num=len(probs))
    normdiscrete = stats.rv_discrete(values=(dm_range, probs), seed=iseed) # Find an arbitrary distribution
    rand_dm = normdiscrete.rvs(size=n)                         # draw a sample of size n

    return rand_dm
Пример #31
0
 def measure_cheat(self):
     #Measure but ignore 0 state, for debugging shors
     data = self.array.toarray()[0]
     pos = np.arange(len(data))
     probs = np.abs(np.square(data))
     probs[0] = 0
     #If probs is not normalised (usually due to rounding errors), re-normalise
     probs = probs / np.sum(probs)
     #print(probs)
     dist = stats.rv_discrete(values=(pos, probs))
     self.array = np.zeros(data.shape)
     self.array[dist.rvs()] = 1
     self.array = sp.bsr_matrix(self.array)
Пример #32
0
    def generator(self):
        # construct a generator using the PDF
        if self.my_generator is None:

            N = len(self.dz)

            xk = np.arange(N)
            pk = self.dz
            pk = pk / pk.sum()

            self.my_generator = stats.rv_discrete(name='zdist', values=(xk, pk))

        return self.my_generator
Пример #33
0
 def _make_gp_list(self, mean, samples):
     """
 Generating a list of numbers of pixels discharged based on the total mean
 discharges using the generalized poisson function.
 """
     width = np.sqrt(mean)
     k_min = max([min([0, mean - 3 * width]), 0])
     k_max = mean + 3 * width + 10
     k_arr = np.arange(k_min, k_max)
     gp_prob = _general_poisson(k_arr, mean, self.lamb)
     gp_prob = gp_prob / np.sum(gp_prob)  ## Additional normalization
     dist = stats.rv_discrete('GeneralizedPoisson', values=(k_arr, gp_prob))
     return dist.rvs(size=samples)
Пример #34
0
def get_chord_single_distr(start_prob, end_prob, time, composition_length):
    cs_probs = []

    unnormed_prob = 0.

    tc = time / composition_length

    chord_prob = filt(start_prob + (end_prob - start_prob) * tc)
    single_prob = 1. - chord_prob

    distrib = stats.rv_discrete(name='csd',
                                values=([0, 1], [chord_prob, single_prob]))
    return distrib
def generate_padding(plain_samples, blocksize):
    padding_length = blocksize * blocksize - len(plain_samples)

    bincount = (-1 * (np.amin(plain_samples)))
    bincount += (np.amax(plain_samples) + 1)  # add the zero bin
    hist, bins = np.histogram(plain_samples, bincount, density=True)
    pk = hist / np.sum(hist)

    xk = np.arange(np.amin(plain_samples), np.amax(plain_samples) + 1)
    sampdist = stats.rv_discrete(name='Sample distribution', values=(xk, pk))
    randsamples = sampdist.rvs(size=padding_length)

    return randsamples
Пример #36
0
def total_correlation(Xs):
    pmf = []
    Xs = np.asarray(Xs)
    for stock in range(len(Xs)):
        s = pd.Series(Xs[stock])
        b = (s.groupby(s).transform('count') / len(s)).values
        pmf.append(b)
    pmf = np.asarray(pmf)
    pmf /= pmf.sum()
    custm = stats.rv_discrete(name='custm', values=(Xs, pmf))
    d = Distribution.from_ndarray(pmf)
    t = T(d)
    return t
Пример #37
0
def getWarmingLevelMixDistributions(warmingLev, startYear=2000, endYear=2150):
    pdfR8 = getWarmingLevelMixDistributionByScen('rcp85',
                                                 warmingLev,
                                                 startYear=startYear,
                                                 endYear=endYear)
    pdfR4 = getWarmingLevelMixDistributionByScen('rcp45',
                                                 warmingLev,
                                                 startYear=startYear,
                                                 endYear=endYear)
    yrs = pdfR8.xk
    mixDist = (pdfR8.pk + pdfR4.pk) / 2.
    pdf = st.rv_discrete(values=(yrs, mixDist))
    return pdf, pdfR8, pdfR4
Пример #38
0
 def __init__(self, name, min_included, max_included, null_default_value, **kwargs):
     scipy_dist_obj = rv_discrete(
         name=name,
         a=min_included,
         b=max_included,
         **kwargs
     )
     scipy_dist_obj._pmf = self._pmf
     ScipyDiscreteDistributionWrapper.__init__(
         self,
         scipy_distribution=scipy_dist_obj,
         null_default_value=null_default_value
     )
Пример #39
0
def erdos_renyi_ternary(num_genes: int, prob_conn: float) -> np.ndarray:
    """Generate ternary valued ER graph.

    Args:
        num_genes: Number of genes/nodes.
        prob_conn: Probability of connection.

    Returns:
        Adjacency matrix.
    """
    signed_edge_dist = rv_discrete(
        values=([-1, 0, 1], [prob_conn / 2, 1 - prob_conn, prob_conn / 2]))
    return signed_edge_dist.rvs(size=(num_genes, num_genes))
Пример #40
0
    def var(self, only_missense=False):
        if only_missense:
            return self.missense_scores.var()
        else:

            mean_of_type_vars = self.f_missense * self.missense_scores.var()
            type_means_dist = rv_discrete(
                values=([1, self.missense_scores.mean(), 0],
                        self.get_type_freqs()))
            var_of_type_means = type_means_dist.var()

            # According the the law of total variance.
            return mean_of_type_vars + var_of_type_means
Пример #41
0
 def measure(self):
     """Measure qubit register. Collapses to 1 definite state. Simulates real measurement, as 
     intermediate values of qubit registers during computation remain unknown.
     """
     self.normalise()
     pos = np.arange(len(self.array))
     probs = self.array * np.conjugate(self.array)
     #If probs is not normalised (usually due to rounding errors), re-normalise
     #probs = probs/np.sum(probs)
     dist = stats.rv_discrete(values=(pos, probs))
     self.array = np.zeros(self.array.shape)
     self.array[dist.rvs()] = 1
     return self.array
Пример #42
0
    def get_for(self, var: str, constr: str):
        """
        Returns the randomness associated with the given variable and
        constraint pair. Returns a ``scipy.stats`` distribution (possibly
        discrete).
        """
        logger.debug(f"Retrieving randomness for ({var}, {constr}).")

        if self.is_finite():
            return rv_discrete(values=zip(*self._discrete[var, constr]),
                               name="discrete")
        else:
            return self._randomness[var, constr]
Пример #43
0
 def statistics_back(self):
     counter = [0] * (len(self.class_back)+1)
     for i in self.back:
         counter[i[0]] += 1
         counter[i[1]] += 1
     total = 0.0
     for i in counter:
         total += i
     counter = [i/total for i in counter]
     candinate = list(range((len(self.class_back)+1)))
     custm = stats.rv_discrete(name='custm', values=(candinate, counter))
     num = custm.rvs(size=2)
     return num
Пример #44
0
def generateSamples(w, mu, cov, s):
    dim = len(mu[0])
    d = rv_discrete(values=(range(len(w)), w))
    components = d.rvs(size=s)
    # generate samples of size of each component, then shuffle
    if dim > 1:
        return components, np.array([
            np.random.multivariate_normal(mu[i], cov[i], 1)[0]
            for i in components
        ])
    else:
        return components, np.asmatrix(
            [np.random.normal(mu[i], cov[i], 1)[0] for i in components]).T
Пример #45
0
    def test_rvs(self):
        states = [-1, 0, 1, 2, 3, 4]
        probability = [0.0, 0.3, 0.4, 0.0, 0.3, 0.0]
        samples = 1000
        r = stats.rv_discrete(name='sample', values=(states, probability))
        x = r.rvs(size=samples)
        assert_(isinstance(x, numpy.ndarray))

        for s, p in zip(states, probability):
            assert_(abs(sum(x == s) / float(samples) - p) < 0.05)

        x = r.rvs()
        assert_(isinstance(x, int))
Пример #46
0
    def fit_variational_inference(self,
                                  data,
                                  est_cluster,
                                  a,
                                  b,
                                  alpha,
                                  max_iter=50):
        self.lam_history = np.zeros((max_iter, est_cluster))
        self.pi_history = np.zeros((max_iter, est_cluster))

        a_hat = a.copy()
        b_hat = b.copy()
        alpha_hat = alpha.copy()

        print("start fitting...")
        for iteration in tqdm(range(max_iter)):
            _loglam = scipy.special.digamma(a_hat) - np.log(b_hat)
            _lam = a_hat / b_hat
            _logpie = scipy.special.digamma(alpha_hat) - scipy.special.digamma(
                np.sum(alpha_hat))
            # data shape = (data_len, 1)
            # param shape = (1, category_len)
            # nu shape = (data_len, category_len)

            nu = np.exp(data * _loglam - _lam + _logpie)
            nu = nu / np.sum(nu, axis=1, keepdims=True)

            a_hat = np.sum(data * nu, axis=0) + a
            b_hat = np.sum(nu, axis=0) + b

            alpha_hat = np.sum(nu, axis=0) + alpha

            self.pi_history[iteration, :] = np.random.dirichlet(
                alpha_hat[0], 1)
            self.lam_history[iteration, :] = stats.gamma.rvs(a_hat[0],
                                                             scale=1 /
                                                             b_hat[0],
                                                             size=est_cluster,
                                                             random_state=0)

        for cls in range(est_cluster):
            print("estimation of cluster {}: lambda = {:.2f}, real={}".format(
                cls, self.lam_history[iteration, cls], self.real_lam[cls]))
        _s = np.zeros((data.shape[0], est_cluster))
        for _n in range(data.shape[0]):
            cat = stats.rv_discrete(name='custm',
                                    values=(range(est_cluster), nu[_n, :]))
            _class = cat.rvs(size=1)
            _s[_n, _class] = 1
        self.estimate_cluster = _s
        self.cluster_proba = nu
Пример #47
0
def bootstrap_counts(dtrajs, lagtime, corrlength=None):
    """
    Generates a randomly resampled count matrix given the input coordinates.

    See API function for full documentation.
    """
    from scipy.stats import rv_discrete
    # if we have just one trajectory, put it into a one-element list:
    if not isinstance(dtrajs, list):
        dtrajs = [dtrajs]
    ntraj = len(dtrajs)

    # can we do the estimate?
    lengths = determine_lengths(dtrajs)
    Lmax = np.max(lengths)
    Ltot = np.sum(lengths)
    if lagtime >= Lmax:
        raise ValueError('Cannot estimate count matrix: lag time '
                         + str(lagtime) + ' is longer than the longest trajectory length ' + str(Lmax))

    # how many counts can we sample?
    if corrlength is None:
        corrlength = lagtime
    nsample = int(Ltot / corrlength)

    # determine number of states n
    from deeptime.markov import number_of_states
    n = number_of_states(dtrajs)

    # assigning trajectory sampling weights
    w_trajs = np.maximum(0.0, lengths - lagtime)
    w_trajs /= np.sum(w_trajs)  # normalize to sum 1.0
    distrib_trajs = rv_discrete(values=(list(range(ntraj)), w_trajs))
    # sample number of counts from each trajectory
    n_from_traj = np.bincount(distrib_trajs.rvs(size=nsample), minlength=ntraj)

    # for each trajectory, sample counts and stack them
    rows = np.zeros((nsample,))
    cols = np.zeros((nsample,))
    ones = np.ones((nsample,))
    ncur = 0
    for i in range(len(n_from_traj)):
        if n_from_traj[i] > 0:
            (r, c) = bootstrap_counts_singletraj(dtrajs[i], lagtime, n_from_traj[i])
            rows[ncur:ncur + n_from_traj[i]] = r
            cols[ncur:ncur + n_from_traj[i]] = c
            ncur += n_from_traj[i]
    # sum over counts
    Csparse = scipy.sparse.coo_matrix((ones, (rows, cols)), shape=(n, n))

    return Csparse.tocsr()
Пример #48
0
def batch_size_dist(min_num: int, max_num: int):
    """Function for sampling powers of 2.

    :param min_num: minimum number (a power of 2)
    :param max_num: maximum number (a power of 2)
    """
    assert math.log(min_num, 2).is_integer() and math.log(max_num, 2).is_integer(),\
        'Supplied minimum and maximum have to be powers of 2'
    min_pow = int(math.log(min_num, 2))
    max_pow = int(math.log(max_num, 2))
    no = max_pow - min_pow + 1
    return stats.rv_discrete(
        values=([2**p for p in np.arange(min_pow, max_pow + 1)],
                [1 / no for _ in np.arange(min_pow, max_pow + 1)]))
    def generiere_termine(self):
        arten = []
        wahrscheinlichkeiten = []
        for k, v in self.terminarten.iteritems():
            arten.append(k)
            wahrscheinlichkeiten.append(v[0])
        verteilung = stats.rv_discrete(name='verteilung',
                                       values=(arten, wahrscheinlichkeiten))
        self.grundtermine = verteilung.rvs(size=self.anzahlTermine)

        for t in self.grundtermine:
            reale_dauer = random.gauss(t, self.terminarten[t][1])
            self.termine.append(
                model.Termin(random.choice(self.kunden), t, reale_dauer))
Пример #50
0
def generate_age_group_distribution(name, age_groups, probabilities):
    '''
    Generates discrete distribution from age demography data.
    '''
    probabilities = np.array(probabilities) / np.sum(probabilities)

    xk = np.arange(age_groups[-1][-1] + 1)  # max age
    pk = []
    for group, prob in zip(age_groups_demography, probabilities):
        a, b = group
        n = b - a + 1
        pk += n * [prob / n]

    return stats.rv_discrete(name=name, values=(xk, pk))
Пример #51
0
def get_area_distribution(tracks, fit=False):
    area = np.sum(tracks > 0, axis=(1, 2))

    if not fit:
        count = np.bincount(area)
        probability = count / float(np.sum(count))
        return stats.rv_discrete(a=0,
                                 b=np.max(probability.shape[0]),
                                 name='signal distribution',
                                 values=(np.arange(count.shape[0]),
                                         probability))
    else:
        exp_params = stats.expon.fit(area)
        return stats.expon(*exp_params)
Пример #52
0
def _hist_fill(data, bins, entries=0, freeze=None):
    if type(bins) == int:
        if bins == 0:
            bins = np.arange(data.size + 1, dtype=float)
        else:
            bins = np.linspace(data.min(), data.max(), bins)
    binc = (bins[1:] + bins[:-1]) / 2
    if freeze:
        return lambda: data
    else:
        rv = rv_discrete(values=(binc, data / data.sum()))
        return lambda: np.histogram(
            rv.rvs(size=data.sum()
                   if entries == 0 else entries), bins)[0].astype(float)
Пример #53
0
def mm():
    # 两袋各取一颗,黄,绿, 求黄色来自1994的概率
    # m&m problem

    color_1994 = ('brown', 'yellow', 'red', 'green', 'orange', 'yellow_brown')
    color_1996 = ('blue', 'green', 'orange', 'yellow', 'red', 'brown')
    p_1994 = (.3, .2, .2, .1, .1, .1)
    p_1996 = (.24, .2, .16, .14, .13, .13)

    # 把所有颜色转为对应的数字保存在字典中
    color = set(color_1996).union(set(color_1994))
    color_dict = {i: c for c, i in enumerate(color)}
    color_1996 = [color_dict[c] for c in color_1996]
    color_1994 = [color_dict[c] for c in color_1994]
    mm_1994 = st.rv_discrete(values=(color_1994, p_1994))
    mm_1996 = st.rv_discrete(values=(color_1996, p_1996))

    # s为取到一黄一绿的可能性
    # p(黄=1994|s) = p(s|黄=1994) * p(黄=1994)/p(s)= p(s,黄=1994)/p(s), 这里
    p1 = mm_1994.pmf(color_dict['yellow']) * mm_1996.pmf(color_dict['green'])
    p2 = mm_1996.pmf(color_dict['yellow']) * mm_1994.pmf(color_dict['green'])
    p = p1 / (p1 + p2)
    print(p)
Пример #54
0
    def generate_data(self, model_params, my_N):
        """ 
        Generate data according to the model. Internally uses generate_data_from_hidden.

        This method does _not_ obey gamma: The generated data may have more
        than gamma active causes for a given datapoint.
        """

        D = self.D
        H = self.H
        
        s = stats.rv_discrete(values = (np.arange(H),model_params['pies']), name = 'compProbDistr').rvs(size=my_N)

        return self.generate_from_hidden(model_params, {'s': s})
Пример #55
0
def choice_faces(verts, faces):
    num = 4000
    u1, u2, u3 = np.split(verts[faces[:, 0]] - verts[faces[:, 1]], 3, axis=1)
    v1, v2, v3 = np.split(verts[faces[:, 1]] - verts[faces[:, 2]], 3, axis=1)
    a = (u2 * v3 - u3 * v2)**2
    b = (u3 * v1 - u1 * v3)**2
    c = (u1 * v2 - u2 * v1)**2
    Areas = np.sqrt(a + b + c) / 2
    Areas = Areas / np.sum(Areas)
    choices = np.expand_dims(np.arange(Areas.shape[0]), 1)
    dist = stats.rv_discrete(name='custm', values=(choices, Areas))
    choices = dist.rvs(size=num)
    select_faces = faces[choices]
    return select_faces
Пример #56
0
def get_rolling_boundary(closing_values, bound, window, direction):
    coll = [np.nan] * window
    for i in range(window, closing_values.shape[0]):
        values = closing_values[i - window:i]
        # Create distribution
        perc = np.ones(values.shape[0]) / values.shape[0]
        dist = rv_discrete(values=(values, perc))

        # Get bounding of distribution
        if direction == 'upper' or direction == 'long':
            coll.append(dist.ppf(1 - bound))
        else:
            coll.append(dist.ppf(bound))
    return np.array(coll)
Пример #57
0
 def _assign_sub_sector(self, person):
     """
     Assign sub-sector job as defined in config
     """
     MC_random = np.random.uniform()
     ratio = self.sub_sector_ratio[person.sector][person.sex]
     distr = self.sub_sector_distr[person.sector][person.sex]
     if MC_random < ratio:
         sub_sector_idx = stats.rv_discrete(
             values=(np.arange(len(distr)), distr)
         ).rvs()
         person.sub_sector = self.sub_sector_distr[person.sector]["label"][
             sub_sector_idx
         ]
Пример #58
0
def get_power_law(a, m):
    """
    Defining a discrete power law probability density function of length

        a : a parameter of the distribution that controls 
        m : number of discrete values in the range of the r.v 
            following the power law
    """

    values = np.arange(1, m + 1, dtype='float')
    pmf = 1 / values**a
    pmf /= pmf.sum()

    return stats.rv_discrete(values=(range(1, m + 1), pmf)), pmf
def get_initial_probs(filename):
    positions = []
    initial_probs = []

    reader = csv.DictReader(open(filename))

    for row in reader:
        positions.append(row["Position"])
        initial_probs.append(float(row["p"]))

    return (positions,
            stats.rv_discrete(name="initial_probs",
                              values=(range(0,
                                            len(positions)), initial_probs)))
    def __init__(self,
                 data_sketch_root,
                 net,
                 plotter,
                 n=1000,
                 transform=None,
                 visdom=True,
                 model_train="",
                 image_size=224,
                 triplet=True):

        self.corr_files = []
        self.visdom = visdom
        self.data_sketch_root = data_sketch_root
        self.transform = transform
        self.n = n
        self.n_all = []
        self.df = []
        self.views = []
        self.image_size = image_size
        self.triplet = (triplet * 4) + 1
        for i in range(3):
            for j in range(3):
                self.views.append([dic_indices.get(i), dic_indices.get(j)])
                self.corr_files.append("{}/dataset_{}_train_{}_{}.csv".format(
                    data_sketch_root, model_train, i, j))
                self.df.append(pd.read_csv(self.corr_files[-1], index_col=0))
                self.n_all.append(self.df[-1].shape[0])

        self.prob = stats.rv_discrete(name='custm',
                                      values=([0, 1], [0.95, 0.05]))
        self.plotter = plotter
        self.n_part_len = np.array(self.n_all) // self.n
        self.part = np.zeros(9)
        self.prob_rotations = stats.rv_discrete(
            name='custm', values=([0, 1, 2, 3], [0.25, 0.25, 0.25, 0.25]))
        self.rotations = [
            lambda x: x, np.rot90, lambda x: np.rot90(np.rot90(x)),
            lambda x: np.rot90(np.rot90(np.rot90(x)))
        ]
        self.net = net
        self.prob_show_image = stats.rv_discrete(name='custm',
                                                 values=([0, 1], [0.95, 0.05]))
        self.prob_pixear_image = stats.rv_discrete(name='custm',
                                                   values=([0, 1, 2],
                                                           [0.60, 0.20, 0.20]))
        self.prob_view = stats.rv_discrete(name='custm',
                                           values=(np.arange(9),
                                                   np.ones(9) * 1 / 9))
        self.prob_view_negative = stats.rv_discrete(
            name='custm', values=(np.arange(9), np.ones(9) * 1 / 9))
        self.pixear = [
            lambda x: x,
            lambda x: cv2.resize(x, (64, 64), interpolation=cv2.INTER_CUBIC),
            lambda x: cv2.resize(x, (128, 128), interpolation=cv2.INTER_CUBIC)
        ]
        self.init_csv()