def ab1toassembly(filename1, filename2): """Takes two AB1 filenames and returns an Assembly of them.""" read1 = ab1.read(filename1) read2 = ab1.read(filename2) return assemble(read1['sequence'], read1['confidences'], read1['traces'], rcbases(read2['sequence']), rcconfidences(read2['confidences']), rctraces(read2['traces']))
def ab1toassembly(filename1, filename2): """Takes two AB1 filenames and returns an Assembly of them.""" read1 = ab1.read(filename1) read2 = ab1.read(filename2) return assemble( read1["sequence"], read1["confidences"], read1["traces"], rcbases(read2["sequence"]), rcconfidences(read2["confidences"]), rctraces(read2["traces"]), )
def assemble(read1, read2, *extra_seqs): tracks1 = ab1.read(read1) tracks2 = ab1.read(read2) ref = contig.contig(tracks1['sequence'], tracks1['confidences'], tracks.revcomp(tracks2['sequence']), tracks.revcomp(tracks2['confidences'])) t = tracks.TrackSet() read1_offset, read1_sequence = ref['read1'] read2_offset, read2_sequence = ref['read2'] read1_confs = tracks.regap(read1_sequence, tracks1['confidences']) read2_confs = tracks.regap(read2_sequence, tracks.revcomp(tracks2['confidences'])) read1_traces = tracks.regap(read1_sequence, tracks1['traces']) read2_traces = tracks.regap(read2_sequence, tracks.revcomp(tracks2['traces'])) t.extend([ tracks.TrackEntry('read 1 traces', read1_offset, read1_traces), tracks.TrackEntry('read 1 confidences', read1_offset, read1_confs), tracks.TrackEntry('read 1 bases', read1_offset, read1_sequence), tracks.TrackEntry('read 2 traces', read2_offset, read2_traces), tracks.TrackEntry('read 2 confidences', read2_offset, read2_confs), tracks.TrackEntry('read 2 bases', read2_offset, read2_sequence)]) if ref['reference'] != None: reference_offset, reference_sequence = ref['reference'] t.append(tracks.TrackEntry('reference', reference_offset, reference_sequence)) for (name,s) in extra_seqs: if ref['reference'] != None: (roffset, _), (soffset, saligned) = fasta.fasta(reference_sequence, s) t.append(tracks.TrackEntry(name, reference_offset + soffset - roffset, tracks.sequence(saligned))) else: t.append(tracks.TrackEntry(name, 0, s)) # Now add an assembly of the lab sequence and reference sequence if len(extra_seqs) == 1 and ref['reference'] != None: labtrack = t[-1] reftrack = t[-2] offset = max(labtrack.offset, reftrack.offset) loffset = offset - labtrack.offset roffset = offset - reftrack.offset assert loffset >= 0 and roffset >= 0 and (loffset == 0 or roffset == 0) bases = tracks.sequence(''.join([a == b and ' ' or 'X' for a,b in zip(labtrack.track[loffset:], reftrack.track[roffset:])])) if 'X' in bases: t.append(tracks.TrackEntry('mismatches', offset, bases)) return (t, ref['strands'])
def ab1tohtml(ab1filename): r = ab1.read(ab1filename) a = Assembly([('traces', aflist(0, r['traces'], gap=None, trackclass='svg')), ('confidences', aflist(0, r['confidences'], gap=None, trackclass='integer')), ('bases', aflist(0, r['sequence'], gap='-', trackclass='nucleotide'))]) s = "<!DOCTYPE html>" s += "<html><body>" s += renderassembly(a) s += "</body></html>" return s
def ab1tohtml(ab1filename): r = ab1.read(ab1filename) a = Assembly([ ('traces', aflist(0, r['traces'], gap=None, trackclass='svg')), ('confidences', aflist(0, r['confidences'], gap=None, trackclass='integer')), ('bases', aflist(0, r['sequence'], gap='-', trackclass='nucleotide')) ]) s = "<!DOCTYPE html>" s += "<html><body>" s += renderassembly(a) s += "</body></html>" return s
'vent', r'\b[Bb]acteri(um|a)', r'sp\.', r'str\.' ])) def is_unclassified(s): """Is string *s* containining and organism description ill posed?""" return re.search(unclassified_regex, s) and True or False # workup should be a dictionary with the keys "accession", "workup", # "pat_name", "amp_name", "seq_key", as selected directly from the # workups view of the database. In production, it will be found in a # JSON file written in the directory. def generate_report(lookup_fun, assembled_render, strandwise_render): def f((workup, read1path, read2path), omit_blast=False): read1 = ab1.read(read1path) read2 = ab1.read(read2path) assembly = contig.assemble(read1['sequence'], read1['confidences'], read1['traces'], read2['sequence'], read2['confidences'], read2['traces']) if 'contig' in assembly: if not omit_blast: v = lookup_fun(''.join(assembly['contig'].values), save_path=os.path.join(workup['path'], 'blast.xml'), save_json=os.path.join(workup['path'], 'blast.json')) body = assembled_render(workup, assembly, v, omit_blast=False) else: body = assembled_render(workup, assembly,
r'sp\.', r'str\.'])) def is_unclassified(s): """Is string *s* containining and organism description ill posed?""" return re.search(unclassified_regex, s) and True or False # workup should be a dictionary with the keys "accession", "workup", # "pat_name", "amp_name", "seq_key", as selected directly from the # workups view of the database. In production, it will be found in a # JSON file written in the directory. def generate_report(lookup_fun, assembled_render, strandwise_render): def f((workup, read1path, read2path), omit_blast=False): read1 = ab1.read(read1path) read2 = ab1.read(read2path) assembly = contig.assemble(read1['sequence'], read1['confidences'], read1['traces'], read2['sequence'], read2['confidences'], read2['traces']) if 'contig' in assembly: if not omit_blast: v = lookup_fun(''.join(assembly['contig'].values), save_path=os.path.join(workup['path'], 'blast.xml'), save_json=os.path.join(workup['path'], 'blast.json')) body = assembled_render(workup, assembly, v, omit_blast=False) else: body = assembled_render(workup, assembly, None, omit_blast=True) return ('assembled', body) else: if not omit_blast: v1 = lookup_fun(''.join(assembly['bases 1'].values), save_path=workup['path'], save_json=os.path.join(workup['path'], 'blast.json'))