def ATANPIItv( self, a): aa=str(a) if (aa=="nan;nan"): return "nan;nan" elif aa.count(";")==0: b1=down(lambda: float(a)) b2=up(lambda: float(a)) res1=atanpi(interval([b1,b2])) res2=str(interval.hull([res1])) aa=res2.split("[")[1] bb=aa.split(",")[0] cc=aa.split(" ")[1] dd=cc.split("]")[0] return bb +";"+dd else: a1=down(lambda: float(a.split(";")[0])) a2=up(lambda: float(a.split(";")[1])) c=interval([a1,a2]) res=atanpi(c) res1=str(interval.hull([res])) aa=res1.split("[")[1] bb=aa.split(",")[0] cc=aa.split(" ")[1] dd=cc.split("]")[0] return bb +";"+dd
def test_cosh(self): self.assertEqual(imath.cosh(0), interval[1]) assert imath.cosh(2) in (imath.exp(2) + imath.exp(-2))/2 assert imath.cosh(interval[1, 2]) == interval.hull((imath.cosh(1), imath.cosh(2))) assert imath.cosh(interval[-2, -1]) == imath.cosh(interval[1, 2]) assert imath.cosh(interval[-2, 1]) == interval.hull((interval[1], imath.cosh(2))) assert imath.cosh(interval[-1, 2]) == interval.hull((interval[1], imath.cosh(2)))
def test_cosh(self): self.assertEqual(imath.cosh(0), interval[1]) assert imath.cosh(2) in (imath.exp(2) + imath.exp(-2)) / 2 assert imath.cosh(interval[1, 2]) == interval.hull( (imath.cosh(1), imath.cosh(2))) assert imath.cosh(interval[-2, -1]) == imath.cosh(interval[1, 2]) assert imath.cosh(interval[-2, 1]) == interval.hull( (interval[1], imath.cosh(2))) assert imath.cosh(interval[-1, 2]) == interval.hull( (interval[1], imath.cosh(2)))
def __init__(self, x, y): ## or use interval.hull self.a = interval.hull((x.a, y.a)) self.b = interval.hull((x.b, y.b)) self.real_max = max(max(self.a)) self.real_min = min(min(self.a)) self.im_max = max(max(self.b)) self.im_min = min(min(self.b))
def phi(x): def g1(x): return x[0] * imath.sin(x[0]) + 0.1 * (x[0]**2) + 1.0 def g2(x): return imath.cos(x[1]) + 0.1 * (x[1]**2) res = [g1(x), g2(x)] phi = interval[max(interval.hull([interval(u) for u in x])[0][0] for x in res),\ max(interval.hull([interval(u) for u in x])[0][1] for x in res)] return phi[0][0], phi[0][1]
def __step(self, box): dom = box[self.__v_name] f = self.fun.eval(box) #print('f: '+str(f)) if is_empty(f) or not is_superset(f, 0): return interval() df = self.fun.d_eval(self.__v_id, box) #print('df: '+str(df)) if is_empty(df): return interval() c = self.sample_fun(box[self.__v_name]) # TODO box[self.__v_name] = interval[c] fc = self.fun.eval(box) #print('fc: '+str(fc)) box[self.__v_name] = dom l, r = ext_div(fc, df) #print('l: '+str(l)+', r: '+str(r)) l = c - l r = c - r l &= dom r &= dom if is_empty(l): return r else: return interval.hull([l, r])
def contract(self, box): op, l, r = self.__constr l = l[0] r = r[0] # forward propagation self.__fwd_eval(l, box) self.__fwd_eval(r, box) print('fwd:') print(self.__fwd) print() # backward propagation fwd = self.__fwd bwd = self.__bwd if op == '=': v = fwd[l] & fwd[r] bwd[l] = v self.__bwd_propag(l, box) bwd[r] = v self.__bwd_propag(r, box) elif op == '>' or op == '>=': v = interval.hull([fwd[r], interval[inf]]) bwd[l] = fwd[l] & v self.__bwd_propag(l, box) v = interval.hull([interval[-inf], fwd[l]]) bwd[r] = fwd[r] & v self.__bwd_propag(r, box) elif op == '<' or op == '<=': v = interval.hull([interval[-inf], fwd[r]]) bwd[l] = fwd[l] & v self.__bwd_propag(l, box) v = interval.hull([fwd[l], interval[inf]]) bwd[r] = fwd[r] & v self.__bwd_propag(r, box) print('bwd:') print(self.__bwd) print()
def bound_root(self, x): diff = self.diffPoly() bdiff = diff.bound_naive(x) if bdiff[0].inf >= 0 or bdiff[0].sup <= 0: return interval[interval(self(x[0].inf)), interval(self(x[0].sup))] else: diff2 = diff.diffPoly() roots = x.newton(diff, diff2) bound = interval[interval(self(x[0].inf)), interval(self(x[0].sup))] for i in roots: bound = interval.hull((bound, self(interval(i)))) return bound
def element_inv(n, x0, D): coef = [] for i in range(n + 1): coef.append(Taylor.coeff('inv', i, x0, 1)) P = Poly(coef, n) coef1 = Taylor.coeff('inv', n + 1, D, 1) if coef1[0].inf >= 0 or coef1[0].sup <= 0: a = interval(D[0].inf) b = interval(D[0].sup) I1 = interval(1) / a - P.bound(a - x0) I2 = interval(1) / b - P.bound(b - x0) I0 = interval(1) / interval(x0) - P.bound(interval(0)) I = (interval.hull((I1, I2, I0))) & (coef1 * ((D - x0)**(n + 1))) else: I = coef1 * ((D - x0)**(n + 1)) return Taylor(P, I, x0, D)
def element(f, n, x0, D): coef = [] for i in range(n + 1): coef.append(Taylor.coeff(f, i, x0, 1)) P = Poly(coef, n) coef1 = Taylor.coeff(f, n + 1, D, 1) f = eval('imath.' + f) if coef1[0].inf >= 0 or coef1[0].sup <= 0: a = interval(D[0].inf) b = interval(D[0].sup) I1 = f(a) - P.bound_best(a - x0, ['n', 'H']) I2 = f(b) - P.bound_best(b - x0, ['n', 'H']) I0 = f(interval(x0)) - P.bound(interval(0)) I = (interval.hull((I1, I2, I0))) & (coef1 * ((D - x0)**(n + 1))) else: I = coef1 * ((D - x0)**(n + 1)) return Taylor(P, I, x0, D)
def __shrink_upper(self, box): self.__newton.sample_fun = sample_sup vn = self.__v_name old = box[vn] while True: self.__newton.contract(box) #print(box) if self.__is_consistent_u(box) or is_empty(box[vn]): # restore the lb box[vn] = interval.hull([interval[old[0].inf, box[vn]]]) return else: dom = box[vn] box[vn] = interval[dom.midpoint, dom[0].inf]
def UNIONItv( self, *args ): from interval import interval somme=interval() c=str(args) c=c.replace(",,",",") c=c.replace(")","") c=c.replace("(","") c=c.replace(",,",",") c=c.replace("'","") c=c.replace(" ","") c=c.replace("''","") c=c.replace(",,",",") mm=c.split(",") i=0 while (i<len(mm)): #< len(mm) cour=str(mm[i]) if cour != "None" and cour !="": cour=cour.replace("'","") cour=cour.replace(" ","") cour=cour.replace(",","") if cour=="nan;nan": somme=somme elif cour.count(";")==0: b1=down(lambda: float(cour)) b2=up(lambda: float(cour)) from interval import interval, inf, imath somme=somme|interval([b1,b2]) else: a1=down(lambda:float(cour.split(";")[0])) a2=up(lambda:float(cour.split(";")[1])) #from interval import interval somme=somme|interval([a1,a2]) somme=interval.hull([somme]) i+=1 res=str(somme) if res.count(",")==0: res1=res.split("[")[1] res2=res1.split("]")[0] return str(res2 ) else: aa=res.split("[")[1] bb=aa.split(",")[0] cc=aa.split(" ")[1] dd=cc.split("]")[0] return bb +";"+dd
def DIVItv( self, a, b ): if a=="nan;nan" or b=="nan;nan": return "nan;nan" else: if a.count(";")==0 and b.count(";")==0: a1=down(lambda: float(a)) b1=down(lambda: float(b)) a2=up(lambda: float(a)) b2=up(lambda: float(b)) elif a.count(";")==0: a1=down(lambda: float(a)) a2=up(lambda: float(a)) b1=down(lambda: float(b.split(";")[0])) b2=up(lambda: float(b.split(";")[1])) elif b.count(";")==0: a1=down(lambda: float(a.split(";")[0])) b1=down(lambda: float(b)) a2=up(lambda: float(a.split(";")[1])) b2=up(lambda: float(b)) else: a1=down(lambda: float(a.split(";")[0])) b1=down(lambda: float(b.split(";")[0])) a2=up(lambda: float(a.split(";")[1])) b2=up(lambda: float(b.split(";")[1])) from interval import interval inv=interval([b1,b2]) invcomplet=inv.inverse() res=interval([a1,a2])*invcomplet res1=str(interval.hull([res])) if res1.count(" ")==0: aa=res1.split("[")[1] dd=aa.split("]")[0] return dd +";"+dd else: aa=res1.split("[")[1] bb=aa.split(",")[0] cc=aa.split(" ")[1] dd=cc.split("]")[0] return bb +";"+dd
def ABSItv( self, a): aa=str(a) if (aa=="nan;nan"): return "nan;nan" elif aa.count(";")==0: a1=down(lambda: float(aa)) a2=up(lambda: float(aa)) else: a1=down(lambda: float(a.split(";")[0])) a2=up(lambda: float(a.split(";")[1])) c=interval([a1,a2]) res=c.__abs__() res1=str(interval.hull([res])) if res1.count(" ")==0: aa=res1.split("[")[1] dd=aa.split("]")[0] return dd +";"+dd else: aa=res1.split("[")[1] bb=aa.split(",")[0] cc=aa.split(" ")[1] dd=cc.split("]")[0] return bb +";"+dd
def test_hull(self): self.assertEqual( interval([1, 9]), interval.hull((interval([1, 3], [4, 6]), interval([2, 5], 9))))
def test_hull(self): self.assertEqual(interval([1, 9]), interval.hull((interval([1, 3], [4, 6]), interval([2, 5], 9))))
def __bwd_propag(self, n_id, box): n = self.dag[n_id] fwd = self.__fwd bwd = self.__bwd rec = self.__bwd_propag if n[0] == '+': bwd[n[1]] = bwd[n_id] - fwd[n[2]] rec(n[1], box) bwd[n[2]] = bwd[n_id] - fwd[n[1]] rec(n[2], box) elif n[0] == '-': bwd[n[1]] = bwd[n_id] + fwd[n[2]] rec(n[1], box) bwd[n[2]] = fwd[n[1]] - bwd[n_id] rec(n[2], box) elif n[0] == '*': bwd[n[1]] = bwd[n_id] / fwd[n[2]] rec(n[1], box) bwd[n[2]] = bwd[n_id] / fwd[n[1]] rec(n[2], box) elif n[0] == '/': bwd[n[1]] = bwd[n_id] * fwd[n[2]] rec(n[1], box) bwd[n[2]] = fwd[n[1]] / bwd[n_id] rec(n[2], box) elif n[0] == '^': i = self.dag[n[2]][1] if i % 2 == 0: p = root(bwd[n_id], i) pp = p & fwd[n[1]] np = (-p) & fwd[n[1]] if is_empty(pp) or is_empty(np): bwd[n[1]] = interval() else: bwd[n[1]] = interval.hull([pp, np]) else: bwd[n[1]] = root(bwd[n_id], i) rec(n[1], box) elif n[0] == 'sqrt': if is_empty(bwd[n_id]) or bwd[n_id][0].sup < 0: bwd[n[1]] = interval() elif bwd[n_id][0].inf < 0: i = interval([0, bwd[n_id][0].sup]) bwd[n[1]] &= i * i else: bwd[n[1]] &= bwd[n_id] * bwd[n_id] assert (not is_empty(bwd[n[1]])) # TODO #elif n[0] == 'sin': elif n[0] == 'C': bwd[n_id] &= n[1] elif n[0] == 'V': box[n[1]] &= bwd[n_id] else: print('unsupported node: ' + str(n)) assert (False)
def test_hull(self): assert interval([1, 9]) == interval.hull((interval([1, 3], [4, 6]), interval([2, 5], 9)))
def mutant_locations(self, loc, count=1, max=False): ''' this function takes from the dict of all potential _mutation_locations a set of mutations that fall within a loc tuple. It returns an iterator that spits out potential mutants at these locations; it is randomized by position first, then by mutation. count is the number of mutations to return, setting to one returns all possible sequences off by one, setting to two returns all sequences with two mutations made, etc, etc. ''' #first, make sure my self._mutant_locations dict is instantiatied if not hasattr(self, '_mutant_locations'): self._mutant_locations = mutate.mutant_locations(self) #deal with interval() versus tuple inputs if isinstance(loc, interval): if len(loc) == 0: return iter([]) loc_ivl = loc loc = interval.hull([loc_ivl]).to_tuple() else: loc_ivl = interval(loc) #create an iterator that returns all keys for _mutant_locations that are #in this location range mut_ivls = (interval(ml) for ml in self._mutant_locations.keys()) #now loc iter will output a non-random set of mutation locations which are #keys to the _mutation_locations dict loc_iter = ifilter(itemgetter(1), ((ivl, ivl.overlaps(loc_ivl)) for \ ivl in mut_ivls)) #change interval obj into loc tuple loc_tup = lambda loc: loc[0].to_tuple() #get the mutation set (the values) for a loc tuple loc_muts = lambda loc: self._mutant_locations[loc_tup(loc)] #expand the mutation set into individual mutations for a loc tuple loc_mset = lambda loc: ((loc_tup(loc), i) for i in loc_muts(loc)) #put them all together for a randomized list of generators, one generator #for each loc tuple pos_mut_sets = map(lambda loc: (loc_mset(loc)), loc_iter) emit_sets = combinations(util.irandomize( chain.from_iterable(pos_mut_sets), seed=random_seed), count) emit_sets = imap(frozenset, emit_sets) is_unique_pos = \ lambda mset: ( len([m[0] for m in mset]) == len(set([m[0] for m in mset])) and set(mset) not in self.mut_sets) mut_iter = util.irandomize( ifilter(is_unique_pos, util.irandomize(emit_sets, seed=random_seed)), seed=random_seed) # if this feature overlaps exons #expand the motif to codons, so that we can check that mutants are # synonymous if interval(self.exon_list[0].extract_pos()).overlaps(loc_ivl): codon_loc = \ (interval(mutate.expand_motif_to_codons(self, loc)) \ & interval(self.exon_list[0].extract_pos())).to_tuple() #check all mutations for synonymousness seq_str = str(self.seq)[slice(*codon_loc)] is_synon = lambda seq_str, codon_loc: lambda mut_tups: \ mutate.check_translation(\ string.upper(mutate.tups_to_str(seq_str, codon_loc, mut_tups)), seq_str) is_synon = is_synon(seq_str, codon_loc) return util.irandomize(ifilter(lambda mut: is_synon(mut), mut_iter), seed=random_seed) else: return mut_iter
def im_radius(self): """ Returns the radius of this complex interval's imaginary part """ hull = interval.hull((self.b, self.b)) return max(max(hull)) - min(min(hull))
def gis(bedfiles, names=None, prefix="similarity", sim_thresh=0.5): """ Calculate genomic similarity of BED files Parameters ---------- beds : str Path to BED files to compare names : List<str> or NoneType Name for each of the input BED files prefix : str Output file prefix sim_thresh : float Minimum similarity threshold to consider recording a locus """ # file handles for each BED file n = len(bedfiles) # store sample indexes as `names` is `names` is not defined if names is None: names = ["Sample_" + str(i) for i in range(n)] # store similarity matrix sim_mat = np.identity(n) # columns of information to store column_names = ["chr", "start", "end"] + ["similarity"] + names # initialize `bed` objects beds = [Bed(b) for b in bedfiles] # get first intervals from each file intvls = [b.next() for b in beds] # initialize the matrix for i, j in combinations(range(n), 2): sim_mat[i, j] = similarity(intvls[i], intvls[j]) sim_mat = symmetrize(sim_mat) # store minimum similarity for each interval, and which sample it comes from minsim = [{ "idx": j, "s": sim_mat[i, j] } for i, j in enumerate(np.argmin(sim_mat, axis=1))] # records to keep for printing records = [] # iterate over intervals from the sorted BED files while True: minsim_set = min([ms["s"] for ms in minsim]) chrom = intvls[0].chr # skip if not all intervals are on the same chromosome # find interval that spans entire set of intervals if np.all([intvl.chr == chrom for intvl in intvls]): # record this set if the similarity of the set passes the threshold if minsim_set >= sim_thresh: hull = interval.hull([intvl.interval for intvl in intvls]) set_locus = GenomicInterval(chrom, hull[0].inf, hull[0].sup) records.append( dict((colname, v) for colname, v in zip( column_names, [ set_locus.chr, set_locus.inf, set_locus.sup, minsim_set, *[b.counter for b in beds], ], ))) else: # update to latest chromosome and skip remaining intervals # get latest chromosome (via `natsorted` to account for chromosome names) newchrom = natsorted([intvl.chr for intvl in intvls])[-1] for i in range(n): if intvls[i].chr == newchrom: skip_idx = i break # keep track of which samples to update samples_to_update = list(range(skip_idx)) + list( range(skip_idx + 1, n)) for i in samples_to_update: while True: # iterate through intervals in BED files until all samples are on the new chromosome intvls[i] = beds[i].next() if intvls[i].chr == newchrom: break # find sample with the smallest upper bound update_idx = np.argmin([intvl.sup for intvl in intvls]) # pop this interval intvls[update_idx] = beds[update_idx].next() # check that we're not at the end of the file if intvls[update_idx] is None: break # recalculate column of sim_mat (calc once, ensure sim_mat is symmetric) for i in range(update_idx): sim_mat[i, update_idx] = similarity(intvls[i], intvls[update_idx]) sim_mat[update_idx, i] = sim_mat[i, update_idx] for i in range(update_idx + 1, n): sim_mat[i, update_idx] = similarity(intvls[i], intvls[update_idx]) sim_mat[update_idx, i] = sim_mat[i, update_idx] # update minsim for (update_idx)-th sample minsim[update_idx]["idx"] = np.argmin(sim_mat[:, update_idx]) minsim[update_idx]["s"] = sim_mat[update_idx, minsim[update_idx]["idx"]] # update minsim for any sample where minsim[j]["idx"] == update_idx for i in [i for i, ms in enumerate(minsim) if ms["idx"] == update_idx]: minsim[i]["idx"] = np.argmin(sim_mat[:, i]) minsim[i]["s"] = sim_mat[i, minsim[i]["idx"]] # save records as a DataFrame df = pd.DataFrame(records, columns=column_names) # save to output df.to_csv(prefix + ".tsv", index=False, sep="\t") return df
def _hull(w, z): """ Returns the hull of two complex intervals """ return ComplexInterval(interval.hull((w.a, z.a)), interval.hull((w.b, z.b)))
def mutate_all_positions(self, loc): ''' mutate every codon and/or nucleotide within feature bounds ''' #first, make sure my self._mutant_locations dict is instantiated if not hasattr(self, '_mutant_locations'): self._mutant_locations = mutate.mutant_locations(self) #deal with interval() versus tuple inputs if isinstance(loc, interval): loc_ivl = loc loc = interval.hull([loc_ivl]).to_tuple() else: loc_ivl = interval(loc) ivl_len = loc_ivl.sum_len() (e_coords, i_coords) = mutate.get_motif_boundaries(loc, self) #mutant choices will be a list of random.choice lambda functions #that randomly chooses a different codon for every position or a different #nucleotide for every intronic base mutant_choices = set() for codon_loc in e_coords: #go through every codon in codon_loc for c_loc in range(codon_loc[0], codon_loc[1], 3): codon = str(self.seq[c_loc:(c_loc + 3)]).upper() #get other codons bckt = mutate.codon_back_table() fwdt = mutate.codon_fwd_table() other_codons = bckt[fwdt[codon]] other_codons = other_codons.difference((codon,)) if len(other_codons) == 0: continue #convert these codons into mut tuples (cmut_tuples) # (one codon might be two or even three tuples) cmut_tuples = () for other_cod in other_codons: cod_tup = () for diff in util.str_diff(other_cod, codon): diff_loc = (c_loc + diff, c_loc + diff + 1) cod_tup += ((diff_loc, other_cod[diff]),) cmut_tuples += (cod_tup,) #finally store a lambda function that randomly chooses a #different codon for this position, using a unique-state #random generator rgen = random.Random() rgen.seed(random_seed ^ hash(cmut_tuples) ^ hash(loc)) codon_choice = lambda cmt, rgen: lambda: rgen.choice(cmt) mutant_choices.add(codon_choice(cmut_tuples, rgen)) for intron_loc in i_coords: intron_ivl = interval(intron_loc) mut_ivls = (interval(ml) for ml in self._mutant_locations.keys()) loc_list = filter(itemgetter(1), \ [(ivl, ivl.overlaps(intron_ivl)) for ivl in mut_ivls]) #change interval obj into loc tuple loc_tup = lambda loc: loc[0].to_tuple() #get the mutation set (the values) for a loc tuple loc_muts = lambda loc: self._mutant_locations[loc_tup(loc)] #expand the mutation set into individual mutations for a loc tuple loc_mset = lambda loc, rgen: \ lambda: rgen.choice([((loc_tup(loc), i),) for i in loc_muts(loc)]) #generate independently seeded random number gens for each pos rgens = [random.Random() for i in loc_list] [rg.seed((random_seed, loc)) for rg, loc in zip(rgens, loc_list)] pos_rnd_muts = map(lambda loc, rgen: loc_mset(loc, rgen), loc_list, rgens) mutant_choices.update(pos_rnd_muts) #now that we have a mutant choices list with one function for every # codon/nt, we need to create a generator that calls each function in the # list once only while True: yielded = set() next_mut = frozenset( chain.from_iterable(map(lambda f: f(), mutant_choices))) seen_count = 0 if next_mut not in yielded: yielded.add(next_mut) yield next_mut elif next_mut in yielded and seen_count < 20: seen_count += 1 elif next_mut in yielded and seen_count >= 20: raise StopIteration
def re_radius(self): """ Returns the radius of this complex interval's real part """ hull = interval.hull((self.a, self.a)) return max(max(hull)) - min(min(hull))