Пример #1
0
def BLD_3_Generator(p,q,samplesize,lmbd,C,C2):
	#Going to do a fixed species tree with given probabilities of introgression.
	outputList=[]
	for x in range (0,samplesize):
		if numpy.random.random_sample()<p:
			outputList.append(expon.rvs()*lmbd)
		elif numpy.random.random_sample()<(p+q):
			outputList.append(expon.rvs()*lmbd+(C-truncexpon.rvs(C))*lmbd)
		else:
			outputList.append(expon.rvs()*lmbd+(C2-truncexpon.rvs(C2))*lmbd)
			#print (expon.rvs()*lmbd+(C-truncexpon.rvs(C))*lmbd)
	return [[('testA','testB','testC'),outputList,[],[]]]
Пример #2
0
 def truncexpon_draws(self, lbd_scale, rndsd=0, thresh=1, datasize=1000):
     np.random.seed(rndsd)
     for i in range(self.numhyp):
         if self.alt_vec[i]==0:
             database=truncexpon.rvs(b=thresh, size=datasize)
         else:
             database=truncexpon.rvs(b=thresh, scale=lbd_scale, size=datasize)
         z=sum(database)
         pval=1 - norm.cdf(z,loc=datasize*(1+1/(1-np.exp(1))), scale=datasize*(1-np.exp(1)/(np.exp(1)-1)**2))
         self.pvec[i]=pval 
     dirname = './expsettings'
     filename = "P_NH%d_PM%.2f_lbd%.2f_SEED%d" % (self.numhyp, self.pi, lbd_scale,rndsd)
     saveres(dirname, filename, self.pvec)
Пример #3
0
 def sample_times(node, num_times):
     if not hasattr(GC, "NUMPY_SEEDED"):
         from numpy.random import seed as numpy_seed
         numpy_seed(seed=GC.random_number_seed)
         GC.random_number_seed += 1
         GC.NUMPY_SEEDED = True
     assert hasattr(
         GC, 'transmissions'
     ), "No transmission network found in global context! Run this after the transmission network simulation is done"
     first_time = node.get_first_infection_time()
     if first_time is None:
         return []
     windows = []
     last_time = first_time
     for u, v, t in GC.transmissions:
         if u == node and v == node:
             if last_time is not None and t > last_time:
                 windows.append((last_time, t))
             last_time = None
         elif last_time is None and v == node:
             last_time = t
     if last_time is not None and t > last_time:
         windows.append((last_time, GC.time))
     if len(windows) == 0:
         windows.append((first_time, GC.time))
     truncexpon_variates = (truncexpon.rvs(1, size=num_times))
     out = []
     for i in range(num_times):
         start, end = choice(windows)
         out.append((truncexpon_variates[i] * (end - start)) + start)
     return out
Пример #4
0
    def _generate_num_repeat(self, scale=2.0):
        """
        Helper function, randomly generate number of repetition for an image.
        Choose an integer with exponential distribution between [min, max].

        Output:
          integer between [min_repeat, max_repeat)
        """
        min_switch = truncexpon.rvs(b=(self.max_repeat - self.min_repeat) /
                                    scale, loc=self.min_repeat, scale=scale).astype(int)

        return min_switch
    def exponential(self, N, T):
        """
        :return: samples from the exponential distribution with support [self.left, self.right]
        Notes - We'll be using exponential with a support 0, 1. To achieve this we'll be
        truncating the exponential distribution after 1. The support of the exponential
        distribution is from 0 to infinity by default. We will be keeping the scale low so that
        not a lot of samples are truncated from the distribution and we have some consistency.
        """

        # truncexpon moves from 0 to b.
        return truncexpon.rvs(b=self.right,
                              loc=self.mean,
                              scale=self.stdev,
                              size=(N, T),
                              random_state=self.random_seed)
Пример #6
0
def day_bucket_to_points(bucket_hist, buckets, kind, n_points=10000):
    """
    Transforms one day to points, see `buckets_to_points` docs
    :param bucket_hist: One histogram i.e. one day of data from one metric
    :param buckets: List of buckets for the histogram
    :param kind: Kind of the histogram e.g. 'exponential'
    :param n_points: Number of points to be generated. Greater number of points indicates
    more accurate point distribution and more time that is needed.
    :return: Point representation of one day of data
    """
    if kind == 'categorical':
        buckets = list(range(len(buckets)))
    if kind in ['exponential', 'count']:
        points = []
        for i, dens in enumerate(bucket_hist):
            low = buckets[i]
            if i + 1 == len(buckets):
                high = low + (buckets[i] - buckets[i - 1])
            else:
                high = buckets[i + 1]
            points += truncexpon.rvs(
                high, size=max(int(round(dens*n_points, 0)), 0), loc=low
            ).tolist()
        points = list(points)
    else:
        points = []
        if len(bucket_hist) != len(buckets):
            buckets = list(range(len(bucket_hist)))
        for i, dens in enumerate(bucket_hist):
            low = buckets[i]
            if i + 1 == len(buckets):
                high = low + (buckets[i] - buckets[i - 1])
            else:
                high = buckets[i + 1]
            points += np.random.uniform(
                high=high, low=low,
                size=max(int(round(dens*n_points, 0)), 0)
            ).tolist()
        points = list(points)
    return points
Пример #7
0
# Display the probability density function (``pdf``):

x = np.linspace(truncexpon.ppf(0.01, b), truncexpon.ppf(0.99, b), 100)
ax.plot(x, truncexpon.pdf(x, b), 'r-', lw=5, alpha=0.6, label='truncexpon pdf')

# Alternatively, the distribution object can be called (as a function)
# to fix the shape, location and scale parameters. This returns a "frozen"
# RV object holding the given parameters fixed.

# Freeze the distribution and display the frozen ``pdf``:

rv = truncexpon(b)
ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')

# Check accuracy of ``cdf`` and ``ppf``:

vals = truncexpon.ppf([0.001, 0.5, 0.999], b)
np.allclose([0.001, 0.5, 0.999], truncexpon.cdf(vals, b))
# True

# Generate random numbers:

r = truncexpon.rvs(b, size=1000)

# And compare the histogram:

ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2)
ax.legend(loc='best', frameon=False)
plt.show()
Пример #8
0
 def soc_trunc_filter(traffic, min_soc):
     socs = truncexpon.rvs(1, size=traffic, random_state=49816317) # TODO random seed might be counter productive
     should_charge = len(socs[socs < min_soc])
     return should_charge
Пример #9
0
 def absorption_profile(size):  #Exponential random absorption profile
     return truncexpon.rvs(thickness / attenuation_length,
                           scale=attenuation_length,
                           size=size)
Пример #10
0
 def absorption_profile(size): #Exponential random absorption profile
     return truncexpon.rvs(thickness/attenuation_length,
                           scale=attenuation_length, size=size)
Пример #11
0
kQ = np.array([
    aa_properties[trp]['lambda'] if trp in aa_properties else -1
    for trp in tripepsQ
])

idxN = np.argsort(kN)
idxQ = np.argsort(kQ)
tripepsN = tripepsN[idxN]
kN = kN[idxN]
tripepsQ = tripepsQ[idxQ]
kQ = kQ[idxQ]

N = 20000
TmaxN = np.array([-np.log(1 - 0.99) / k for k in kN])
TmaxQ = np.array([-np.log(1 - 0.99) / k for k in kQ])
tN = [truncexpon.rvs(2.5, 0, tm / 2.5, int(N / len(tripepsN))) for tm in TmaxN]
tN = np.concatenate(tN)
tQ = [truncexpon.rvs(2.5, 0, tm / 2.5, int(N / len(tripepsQ))) for tm in TmaxQ]
tQ = np.concatenate(tQ)
sim_lim = 1e-7
deamid_cutoff = 0

# different levels of noise
sigmas = [1, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.15, 0.1, 0.05, 0.005]
bootstraps = 500  # For each level of noise, repeat expetiment
bootstrap_size = 100
corrsN_1 = []
corrsQ_1 = []  # sigmas x bootstraps
corrsN_2 = []
corrsQ_2 = []
corrsN_3 = []
def truncated_exponential_1(b):
    return truncexpon.rvs(b=b)
Пример #13
0
def g_rvs(num_rvs, limit=4.5):
    return truncexpon.rvs(loc=limit, b=np.inf, size=num_rvs)
Пример #14
0
def transmission_neutral_coalescent_tree(transmission_network,
                                         sample_times,
                                         rate=1):
    '''Sample a tree under the pure-neutral coalescent model constrained to a given transmission network with given patient sampling times.

    Args:
        ``transmission_network`` (``list``): The transmission network as a ``list`` of ``(u,v,t)`` tuples denoting the transmission from ``u`` to ``v`` at time ``t``. The transmission network must be sorted in ascending order of time

        ``sample_times`` (``dict``): The times at which each individual was sampled (i.e., the times of the leaves) as a ``dict`` in which keys are individuals from ``transmission_network`` and the value associated with an individual ``u`` is a ``list`` of times at which ``u`` was sampled (i.e., the times of the leaves from individual ``u``)

        ``rate`` (``float``): The rate of the Poisson process of coalescing two lineages

    Returns:
        A ``Tree`` object storing the sampled pure-neutral tree, where leaves are labeled ``ID|u|t``, where ``ID`` is a unique identifier for the leaf, ``u`` is the corresponding individual from ``transmission_network``, and ``t`` is the sample time (which equals the leaf's distance from the root). If there are multiple seed individuals (infected beforehand), a tree will be output for each.
    '''
    if not isinstance(transmission_network, list):
        raise TypeError(
            "transmission_network must be a list, but it was a %s" %
            str(type(transmission_network)))
    if not isinstance(sample_times, dict):
        raise TypeError("sample_times must be a dict, but it was a %s" %
                        str(type(sample_times)))
    time = dict()
    ID = 0
    root = dict()
    leaves = dict()
    infected_by = dict()
    to_visit = set()
    infection_time = dict()
    for i in range(len(transmission_network) - 1, -1, -1):
        check_transmission_event(transmission_network[i])
        if i != 0 and transmission_network[i][2] < transmission_network[i -
                                                                        1][2]:
            raise ValueError(
                "transmission_network must be sorted in ascending order of time"
            )
        u, v, t = transmission_network[i]
        if v in infection_time:
            raise ValueError(
                "Encountered duplicate transmission recipient: %s" % str(v))
        else:
            infection_time[v] = t
        if u not in infected_by:
            infected_by[u] = list()
        if v not in infected_by:
            infected_by[v] = list()
        if v not in sample_times:
            raise KeyError("Individual not in sample_times: %s" % str(v))
        if not isinstance(sample_times[v], list):
            if isinstance(sample_times[v], float) or isinstance(
                    sample_times[v], int):
                sample_times[v] = [sample_times[v]]
            elif isinstance(sample_times[v], set):
                sample_times[v] = list(sample_times[v])
            else:
                raise TypeError("Values in sample_times must be list")
        if u is not None and u != 'None':
            infected_by[u].append(v)
            to_visit.add(u)
        leaves[v] = list()
        for w in infected_by[v]:
            if w not in root:
                raise ValueError("Missing links in transmission_network")
            leaves[v].append(root[w])
            del root[w]
        for st in sample_times[v]:
            if not isinstance(st, float) and not isinstance(st, int):
                raise TypeError("Values in sample_times must be list of float")
            newnode = Node(label='%d|%s|%f' % (ID, str(v), st))
            ID += 1
            time[newnode] = st
            leaves[v].append(newnode)
        leaves[v].sort(key=lambda x: time[x], reverse=True)
        lineages = Set()
        curr_time = time[leaves[v][0]]
        for w in leaves[v]:
            if time[w] < curr_time:
                while len(lineages) > 1:
                    L = rate * len(lineages) * (len(lineages) - 1) / 2.
                    coal_time = curr_time - exponential(1. / L)
                    if coal_time >= time[w]:
                        c1 = lineages.pop()
                        c2 = lineages.pop()
                        newnode = Node()
                        time[newnode] = coal_time
                        curr_time = coal_time
                        newnode.add_child(c1)
                        newnode.add_child(c2)
                        lineages.add(newnode)
                curr_time = time[w]
            lineages.add(w)
        while len(lineages) != 1:
            L = rate * len(lineages) * (len(lineages) - 1) / 2.
            coal_time = curr_time - truncexpon.rvs(curr_time - t, scale=1. / L)
            c1 = lineages.pop()
            c2 = lineages.pop()
            newnode = Node()
            time[newnode] = coal_time
            curr_time = coal_time
            newnode.add_child(c1)
            newnode.add_child(c2)
            lineages.add(newnode)
        root[v] = lineages.pop()
        to_visit.discard(v)
        del leaves[v]
    if len(to_visit) != 0:
        raise ValueError(
            "Malformed transmission network. Missing the following seeds:\n%s"
            % '\n'.join(str(v) for v in to_visit))
    out = list()
    for k, r in root.items():
        if r.is_root():
            tmp = Tree()
            tmp.root = r
            out.append(tmp)
            for node in tmp.traverse_preorder():
                if node.is_root():
                    node.edge_length = time[node] - infection_time[k]
                else:
                    node.edge_length = time[node] - time[node.parent]
    return out
Пример #15
0
    aa_properties[trp]['lambda'] if trp in aa_properties else -1
    for trp in tripepsQ
])

idxN = np.argsort(kN)
idxQ = np.argsort(kQ)
tripepsN = tripepsN[idxN]
kN = kN[idxN]
tripepsQ = tripepsQ[idxQ]
kQ = kQ[idxQ]

N = 20000
TmaxN = np.array([-np.log(1 - 0.99) / k for k in kN])
TmaxQ = np.array([-np.log(1 - 0.99) / k for k in kQ])
tN = [
    truncexpon.rvs(2.35, 0, tm / 2.35, int(N / len(tripepsN))) for tm in TmaxN
]
tN = np.concatenate(tN)
tQ = [truncexpon.rvs(3, 0, tm / 3, int(N / len(tripepsQ))) for tm in TmaxQ]
tQ = np.concatenate(tQ)
sim_lim = 1e-7
deamid_cutoff = 0

s = 0.2
print('Simulating with sigma = {}'.format(s))
sigmasN = np.repeat(s, len(tripepsN))
sigmasQ = np.repeat(s, len(tripepsQ))

# K, sigmas, Tmean, Tstd, Tmax, N, tol
tN_s = tN[np.random.randint(tN.shape[0], size=2000)]
DN = rr.simulate_deamidation(kN, sigmasN, tN_s, sim_lim)