def looped(self): """ Returns a circular version of the Dseqrecord object. The underlying Dseq object has to have compatible ends. Examples -------- >>> from pydna.dseqrecord import Dseqrecord >>> a=Dseqrecord("aaa") >>> a Dseqrecord(-3) >>> b=a.looped() >>> b Dseqrecord(o3) >>> See also -------- pydna.dseq.Dseq.looped """ new = _copy.copy(self) for key, value in list(self.__dict__.items()): setattr(new, key, value) new._seq = self.seq.looped() five_prime = self.seq.five_prime_end() for fn, fo in zip(new.features, self.features): if five_prime[0] == "5'": fn.location = fn.location + self.seq.ovhg elif five_prime[0] == "3'": fn.location = fn.location + (-self.seq.ovhg) if fn.location.start < 0: loc1 = _FeatureLocation(len(new) + fn.location.start, len(new), strand=fn.strand) loc2 = _FeatureLocation(0, fn.location.end, strand=fn.strand) fn.location = _CompoundLocation([loc1, loc2]) if fn.location.end > len(new): loc1 = _FeatureLocation(fn.location.start, len(new), strand=fn.strand) loc2 = _FeatureLocation(0, fn.location.end - len(new), strand=fn.strand) fn.location = _CompoundLocation([loc1, loc2]) fn.qualifiers = fo.qualifiers return new
def assemble_circular(self): cps = {} # circular assembly cpsrc = {} cpaths = sorted( _nx.simple_cycles(self.G), key=len) cpaths_sorted=[] for cpath in cpaths: order, node = min((self.G.nodes[node]["order"],node) for node in cpath) i=cpath.index(node) cpaths_sorted.append((order, cpath[i:]+cpath[:i])) cpaths_sorted.sort() for _, cp in cpaths_sorted: # cpaths is a list of nodes representing a circular assembly edgelol = [] # edgelol is a list of lists of all edges along cp cp+= cp[0:1] for u,v in zip(cp, cp[1:]): e=[] for d in self.G[u][v].values(): e.append((u,v,d)) edgelol.append(e) for edges in _itertools.product(*edgelol): if [True for ((u,v,e),(x,y,z)) in zip(edges, edges[1:]) if (e["seq"],e["piece"].stop) == (z["seq"],z["piece"].start)]: continue ct = "".join(e["seq"][e["piece"]] for u,v,e in edges) key=ct.upper() if key in cps or key in cpsrc: continue # TODO: cpsrc not needed? sg=_nx.DiGraph() sg.add_edges_from(edges) sg.add_nodes_from( (n,d) for n,d in self.G.nodes(data=True) if n in cp ) edgefeatures=[] offset=0 for u,v,e in edges: feats = _deepcopy(e["features"]) for feat in feats: feat.location+=offset edgefeatures.extend(feats) offset+=e["piece"].stop-e["piece"].start for f in edgefeatures: if f.location.start>len(ct) and f.location.end>len(ct): f.location+=(-len(ct)) elif f.location.end>len(ct): f.location = _CompoundLocation((_FeatureLocation(f.location.start,_ExactPosition(len(ct))),_FeatureLocation(_ExactPosition(0), f.location.end-len(ct)))) cps[key] = cpsrc[_rc(key)] = ct, edgefeatures, sg, {n:self.nodemap[n] for n in cp[:-1]}, cp return sorted((_Contig.from_string(cp[0], features = cp[1], graph = cp[2], nodemap = cp[3], linear=False, circular=True) for cp in cps.values()), key=len, reverse=True)
def __init__(self, primers, template, limit=13, **kwargs): r"""The Anneal class has to be initiated with at least an iterable of primers and a template. Parameters ---------- primers : iterable of :class:`Primer` or Biopython SeqRecord like objects Primer sequences 5'-3'. template : Dseqrecord The template sequence 5'-3'. limit : int, optional limit length of the annealing part of the primers. Attributes ---------- products: list A list of Amplicon objects, one for each primer pair that may form a PCR product. Examples -------- >>> from pydna.readers import read >>> from pydna.amplify import Anneal >>> from pydna.dseqrecord import Dseqrecord as Ds >>> t = Ds("tacactcaccgtctatcattatcta" ... "ctatcgactgtatcatctgatagcac") >>> from Bio.SeqRecord import SeqRecord >>> p1 = read(">p1\ntacactcaccgtctatcattatc", ds = False) >>> p2 = read(">p2\ngtgctatcagatgatacagtcg", ds = False) >>> ann = Anneal((p1, p2), t) >>> print(ann.report()) Template name 51 nt linear: p1 anneals forward (--->) at 23 p2 anneals reverse (<---) at 29 >>> ann.products [Amplicon(51)] >>> amplicon_list = ann.products >>> amplicon = amplicon_list.pop() >>> amplicon Amplicon(51) >>> print(amplicon.figure()) 5tacactcaccgtctatcattatc...cgactgtatcatctgatagcac3 |||||||||||||||||||||| 3gctgacatagtagactatcgtg5 5tacactcaccgtctatcattatc3 ||||||||||||||||||||||| 3atgtgagtggcagatagtaatag...gctgacatagtagactatcgtg5 >>> print(amplicon) Dseqrecord circular: False size: 51 ID: 51bp U96-TO06Y6pFs74SQx8M1IVTBiY Name: 51bp_PCR_prod Description: pcr product_p1_p2 Number of features: 2 /molecule_type=DNA Dseq(-51) taca..gcac atgt..cgtg >>> print(amplicon.program()) <BLANKLINE> |95°C|95°C | |tmf:59.5 |____|_____ 72°C|72°C|tmr:59.7 |5min|30s \ 47.7°C _____|____|30s/kb | | \______/ 0: 1|5min|GC 39% | | 30s | |51bp >>> """ self.primers = primers self.template = _copy.deepcopy(template) self.limit = limit self.kwargs = kwargs self._products = None self.forward_primers = [] self.reverse_primers = [] twl = len(self.template.seq.watson) tcl = len(self.template.seq.crick) if self.template.linear: tw = self.template.seq.watson tc = self.template.seq.crick else: tw = self.template.seq.watson + self.template.seq.watson tc = self.template.seq.crick + self.template.seq.crick for p in self.primers: self.forward_primers.extend(( _Primer( p, # template = self.template, position=tcl - pos - min(self.template.seq.ovhg, 0), footprint=fp, ) for pos, fp in _annealing_positions(str(p.seq), tc, self.limit) if pos < tcl)) self.reverse_primers.extend(( _Primer( p, # template = self.template, position=pos + max(0, self.template.seq.ovhg), footprint=fp, ) for pos, fp in _annealing_positions(str(p.seq), tw, self.limit) if pos < twl)) self.forward_primers.sort(key=_operator.attrgetter("position")) self.reverse_primers.sort(key=_operator.attrgetter("position"), reverse=True) for fp in self.forward_primers: if fp.position - fp._fp >= 0: start = fp.position - fp._fp end = fp.position self.template.features.append( _SeqFeature( _FeatureLocation(start, end), type="primer_bind", strand=1, qualifiers={ "label": [fp.name], "ApEinfo_fwdcolor": ["#baffa3"], "ApEinfo_revcolor": ["#ffbaba"], }, )) else: start = len(self.template) - fp._fp + fp.position end = start + fp._fp - len(self.template) sf = _SeqFeature( _CompoundLocation([ _FeatureLocation(start, len(self.template)), _FeatureLocation(0, end), ]), type="primer_bind", location_operator="join", qualifiers={ "label": [fp.name], "ApEinfo_fwdcolor": ["#baffa3"], "ApEinfo_revcolor": ["#ffbaba"], }, ) self.template.features.append(sf) for rp in self.reverse_primers: if rp.position + rp._fp <= len(self.template): start = rp.position end = rp.position + rp._fp self.template.features.append( _SeqFeature( _FeatureLocation(start, end), type="primer_bind", strand=-1, qualifiers={ "label": [rp.name], "ApEinfo_fwdcolor": ["#baffa3"], "ApEinfo_revcolor": ["#ffbaba"], }, )) else: start = rp.position end = rp.position + rp._fp - len(self.template) self.template.features.append( _SeqFeature( _CompoundLocation([ _FeatureLocation(0, end), _FeatureLocation(start, len(self.template)), ]), type="primer_bind", location_operator="join", strand=-1, qualifiers={"label": [rp.name]}, ))
def map_trace_files(self, pth, limit=25): # TODO allow path-like objects import glob traces = [] for name in glob.glob(pth): trace = SeqIO.read(name, "abi").lower() trace.annotations["filename"] = trace.fname = name traces.append(trace) if not traces: raise ValueError("No trace files found in {}".format(pth)) if hasattr(self.map_target, "step"): area = self.map_target elif hasattr(self.map_target, "extract"): area = slice(self.map_target.location.start, self.map_target.location.end) else: area = None # TODO allow other objects as well and do some checks on map target if area: self.matching_reads = [] self.not_matching_reads = [] target = str(self[area].seq).lower() target_rc = str(self[area].seq.rc()).lower() for trace in traces: if target in str(trace.seq) or target_rc in str(trace.seq): self.matching_reads.append(trace) else: self.not_matching_reads.append(trace) reads = self.matching_reads else: self.matching_reads = None self.not_matching_reads = None reads = traces matching_reads = [] for read_ in reads: matches = _common_sub_strings( str(self.seq).lower(), str(read_.seq), limit) if not matches: continue if len(matches) > 1: newmatches = [ matches[0], ] for i, x in enumerate(matches[1:]): g, f, h = matches[i] if g + h < x[0] and f + h < x[1]: newmatches.append(x) else: # len(matches)==1 newmatches = matches matching_reads.append(read_) if len(newmatches) > 1: ms = [] for m in newmatches: ms.append(_FeatureLocation(m[0], m[0] + m[2])) loc = _CompoundLocation(ms) else: a, b, c = newmatches[0] loc = _FeatureLocation(a, a + c) self.features.append( _SeqFeature( loc, qualifiers={"label": [read_.annotations["filename"]]}, type="trace", )) return [x.annotations["filename"] for x in matching_reads]
def __init__( self, primers, template, limit=13, primerc=1000.0, # nM saltc=50, # mM **kwargs): r'''The Anneal class has to be initiated with at least an iterable of primers and a template. Parameters ---------- primers : iterable of :class:`Primer` or Biopython SeqRecord like objects Primer sequences 5'-3'. template : Dseqrecord The template sequence 5'-3'. limit : int, optional limit length of the annealing part of the primers. fprimerc : float, optional Concentration of forward primer in nM, set to 1000.0 nM by default rprimerc : float, optional Concentration of reverse primer in nM, set to 1000.0 nM by default saltc : float, optional Salt concentration (monovalet cations) :mod:`tmbresluc` set to 50.0 mM by default Attributes ---------- products: list A list of Amplicon objects, one for each primer pair that may form a PCR product. Examples -------- >>> from pydna.readers import read >>> from pydna.amplify import Anneal >>> from pydna.dseqrecord import Dseqrecord >>> template = Dseqrecord("tacactcaccgtctatcattatctactatcgactgtatcatctgatagcac") >>> from Bio.SeqRecord import SeqRecord >>> p1 = read(">p1\ntacactcaccgtctatcattatc", ds = False) >>> p2 = read(">p2\ngtgctatcagatgatacagtcg", ds = False) >>> ann = Anneal((p1, p2), template) >>> print(ann.report()) Template name 51 nt linear: Primer p1 anneals forward at position 23 <BLANKLINE> Primer p2 anneals reverse at position 29 >>> ann.products [Amplicon(51)] >>> amplicon_list = ann.products >>> amplicon = amplicon_list.pop() >>> amplicon Amplicon(51) >>> print(amplicon.figure()) 5tacactcaccgtctatcattatc...cgactgtatcatctgatagcac3 |||||||||||||||||||||| tm 55.9 (dbd) 60.5 3gctgacatagtagactatcgtg5 5tacactcaccgtctatcattatc3 ||||||||||||||||||||||| tm 54.6 (dbd) 58.8 3atgtgagtggcagatagtaatag...gctgacatagtagactatcgtg5 >>> amplicon.annotations['date'] = '02-FEB-2013' # Set the date for this example to pass the doctest >>> print(amplicon) Dseqrecord circular: False size: 51 ID: 51bp U96-TO06Y6pFs74SQx8M1IVTBiY Name: 51bp_PCR_prod Description: pcr product_p1_p2 Number of features: 2 /date=02-FEB-2013 Dseq(-51) taca..gcac atgt..cgtg >>> print(amplicon.program()) <BLANKLINE> Taq (rate 30 nt/s) 35 cycles |51bp 95.0°C |95.0°C | |Tm formula: Biopython Tm_NN |_________|_____ 72.0°C |72.0°C|SaltC 50mM | 03min00s|30s \ ________|______|Primer1C 1.0µM | | \ 45.4°C/ 0min 2s| 5min |Primer2C 1.0µM | | \_____/ | |GC 39% | | 30s | |4-12°C >>> ''' self.primers = primers self.primerc = primerc self.saltc = saltc self.template = _copy.deepcopy(template) self.limit = limit self.kwargs = defaultdict(str, kwargs) self._products = None self.forward_primers = [] self.reverse_primers = [] twl = len(self.template.seq.watson) tcl = len(self.template.seq.crick) if self.template.linear: tw = self.template.seq.watson tc = self.template.seq.crick else: tw = self.template.seq.watson + self.template.seq.watson tc = self.template.seq.crick + self.template.seq.crick for p in self.primers: self.forward_primers.extend(( _Primer(p, position=tcl - pos - min(self.template.seq.ovhg, 0), footprint=fp) for pos, fp in _annealing_positions(str(p.seq), tc, self.limit) if pos < tcl)) self.reverse_primers.extend(( _Primer(p, position=pos + max(0, self.template.seq.ovhg), footprint=fp) for pos, fp in _annealing_positions(str(p.seq), tw, self.limit) if pos < twl)) self.forward_primers.sort(key=_operator.attrgetter('position')) self.reverse_primers.sort(key=_operator.attrgetter('position'), reverse=True) for fp in self.forward_primers: if fp.position - fp._fp >= 0: start = fp.position - fp._fp end = fp.position self.template.features.append( _SeqFeature(_FeatureLocation(start, end), type="primer_bind", strand=1, qualifiers={ "label": [fp.name], "ApEinfo_fwdcolor": ["#baffa3"], "ApEinfo_revcolor": ["#ffbaba"] })) else: start = len(self.template) - fp._fp + fp.position end = start + fp._fp - len(self.template) sf = _SeqFeature(_CompoundLocation([ _FeatureLocation(start, len(self.template)), _FeatureLocation(0, end) ]), type="primer_bind", location_operator="join", qualifiers={ "label": [fp.name], "ApEinfo_fwdcolor": ["#baffa3"], "ApEinfo_revcolor": ["#ffbaba"] }) self.template.features.append(sf) for rp in self.reverse_primers: if rp.position + rp._fp <= len(self.template): start = rp.position end = rp.position + rp._fp self.template.features.append( _SeqFeature(_FeatureLocation(start, end), type="primer_bind", strand=-1, qualifiers={ "label": [rp.name], "ApEinfo_fwdcolor": ["#baffa3"], "ApEinfo_revcolor": ["#ffbaba"] })) else: start = rp.position end = rp.position + rp._fp - len(self.template) self.template.features.append( _SeqFeature(_CompoundLocation([ _FeatureLocation(0, end), _FeatureLocation(start, len(self.template)) ]), type="primer_bind", location_operator="join", strand=-1, qualifiers={"label": [rp.name]}))