def process_report(report): """ Parse a pit XML report - returns a dict of unique mutants and a score """ mutant_dict = {} root = parse_report(report) for child in root: mutant = Mutant(child.attrib.get("detected"), child.attrib.get("status"), child[0].text, child[1].text, child[2].text,\ child[3].text, child[4].text, child[5].text, child[6].text, child[7].text, child[8].text) if mutant.key() not in mutant_dict: mutant_dict[mutant.key()] = mutant return mutant_dict
def api_mutant(): try: content = request.get_json() print(content, file=sys.stderr) adn = content.get('dna') mutant_classes = Mutant(adn) except Exception as e: print(e) result = False response = json.dumps({'error': result}), 400, { 'ContentType': 'application/json' } return response # Function que valida que la cadena de ADN tenga la longitud y las letras correctas: devuelve True cuando tiene # el formato correcto y False cuando no result = mutant_classes.validate_adn_chain() if result: # funcion para validar si la en la BD ya tenemos dicha cadena de ADN salvada result = mutant_classes.validate_exist_dna() #Si es verdadera devolvemos el estatus, sino lo creamos if result['status'] == 0: response = app.response_class(status=403, mimetype='application/json', response=json.dumps(False)) return response elif result['status'] == 1: response = app.response_class(status=200, mimetype='application/json', response=json.dumps(True)) return response else: result = mutant_classes.create_dna_chain() if result: mutant_classes.save_dna(dna_status=1) response = app.response_class(status=200, mimetype='application/json', response=json.dumps(True)) return response else: mutant_classes.save_dna(dna_status=0) response = app.response_class(status=403, mimetype='application/json', response=json.dumps(False)) return response else: response = json.dumps({'error': result}), 403, { 'ContentType': 'application/json' } return response
class MutantsTest(unittest.TestCase): def setUp(self): self.client = app self.humano = Mutant() self.estadistica = Stats() def humano_no_mutante(self): resp = self.humano.is_mutant( str('["AAATTT","CCCGGG","CTCAGT","ACAGGC","AATTCG","CCCATG"]')) self.assertFalse(resp) def humano_mutante(self): resp = self.humano.is_mutant( str('["AAAAAT","CCCGGG","CTCAGT","ACAGGC","AATTCG","CCCATG"]')) self.assertTrue(resp) def humano_invalido(self): resp = self.humano.is_mutant( str('["AAATTT","CCCGGG","CTCHHT","ACAGGC","AATTCG","CCCATG"]')) self.assertFalse(resp) def humano_mutante_horizontal(self): resp = self.humano.is_mutant( str('["AAAAAT","CCCGCG","CTCAGT","ACAGGC","AATTCG","CCCATG"]')) self.assertTrue(resp) def humano_mutante_vertical(self): resp = self.humano.is_mutant( str('["AAATTT","ACCGGG","ATCAGT","ACAGGC","AATTCG","CCCATG"]')) self.assertTrue(resp) def humano_mutante_diagonal(self): resp = self.humano.is_mutant( str('["AAATTT","CACGGG","CTAAGT","ACAAGC","AATTAG","CCCATA"]')) self.assertTrue(resp) def obtener_estadisticas(self): resp = self.estadistica.get_stats_dna() assert resp is not None def method_invalid(self): res = self.client.get('/mutant/') self.assertEqual(400, res.status_code) def method_ok_no_json(self): res = self.client.post('/mutant/') self.assertEqual(400, res.status_code) def stats_ok(self): res = self.client.get('/stats') self.assertEqual(200, res.status_code)
def Write(self, version): """ Write() generates an excel file for the MutantSet. version is the version number of the package. """ filename = self.screen + '_' + self.fragment + ".xlsx" workbook = xlsxwriter.Workbook(filename, { 'strings_to_numbers': True, }) # Summary information. summary = WorksheetWriter(workbook.add_worksheet("Summary")) summary.write("screen", self.screen) summary.write("fragment", self.fragment) summary.write("version", version) summary.write("synth_from", self.synth_from) summary.write("synth_to", self.synth_to) summary.write("mut_from", self.mut_from) summary.write("mut_to", self.mut_to) summary.write("synth_nt_sequence", self.synth_nt_sequence) summary.write("mut_nt_sequence", self.coding_sequence.coding_sequence) summary.write("aa_sequence", self.coding_sequence.aa_sequence) summary.write("mut_from_0", self.mut_from_0) summary.write("mut_to_0", self.mut_to_0) pages = [ ('Point Mutations', 'point'), ('In-frame Insertions', 'insertion'), ('In-frame Deletions', 'deletion'), ('WT Controls', 'WT'), ] for (worksheet_name, mutant_type) in pages: page = WorksheetWriter(workbook.add_worksheet(worksheet_name)) page.worksheet.freeze_panes(1, 0) page.write(Mutant.TableHeader().split()) for m in filter(lambda x: x.type == mutant_type, self.mutants): if m.barcode or not m.passes_filters: page.write(m.AsTuple()) workbook.close()
def kunkel_full(protocol, params): growth_media = params["construct_setup"]['growth_media'] #num_colonies = params["construct_setup"]['num_colonies'] ssDNA = params["construct_setup"]['ssDNA'] mutant_constructs = [] # make mutant objects for accessibility construct_collect = {} for csv_row in params["construct_setup"]['mutant_upload']: if csv_row["mutant_label"] not in construct_collect.keys(): construct_collect[csv_row["mutant_label"]] = [] construct_collect[csv_row["mutant_label"]].append({ "sequence": csv_row["sequence"], "purification": csv_row["purification"], "scale": csv_row["scale"], "oligo_label": csv_row["oligo_label"] }) else: construct_collect[csv_row["mutant_label"]].append({ "sequence": csv_row["sequence"], "purification": csv_row["purification"], "scale": csv_row["scale"], "oligo_label": csv_row["oligo_label"] }) oligo_collect = {} for row in params["construct_setup"]["mutant_upload"]: if (row["sequence"] not in oligo_collect.keys() and row["oligo_label"] in protocol.refs.keys()): raise RuntimeError("You cannot specify two different " "oligos to be synthesized with the " "same name %s" % row['oligo_label']) elif row["sequence"] not in oligo_collect.keys(): oligo_collect[row["sequence"]] = { "sequence": row["sequence"], "purification": row["purification"], "scale": row["scale"], "destination": protocol.ref(row["oligo_label"], None, "micro-2.0", storage="cold_4").well(0) } for mut in construct_collect.keys(): mut_oligos = [o for o in construct_collect[mut]] mutant = Mutant(mut) for oligo in mut_oligos: mutant.add_oligos(oligo_collect[oligo["sequence"]]["destination"]) mutant_constructs.append(mutant) oligos_to_synthesize = [] for o in oligo_collect.keys(): scale_default(len(oligo_collect[o]["sequence"]), oligo_collect[o]["scale"], oligo_collect[o]["destination"].container.name) oligos_to_synthesize.append(oligo_collect[o]) protocol.oligosynthesize(oligos_to_synthesize) assemble_params = { 'ssDNA': ssDNA, 'constructs': [{ 'mutant_name': mu.name, 'oligos': mu.oligos } for mu in mutant_constructs], 'mutant_objs': mutant_constructs } annealing_plate = assemble(protocol, assemble_params) protocol.unseal(annealing_plate) transform_params = { #'num_colonies': num_colonies, 'growth_media': growth_media, 'constructs': [mu.anneal_well for mu in mutant_constructs], 'mutant_objs': mutant_constructs } # get agar plates back from transform protocol agar_plates = transform(protocol, transform_params) for agar_plate in agar_plates: protocol.cover(agar_plate)
def classFactory(iface): from mutant import Mutant return Mutant(iface)
from flask import Flask, request, abort from mutant import Mutant, Stats import os app = Flask(__name__) port = int(os.environ.get('PORT', 5000)) humano = Mutant() estadisticas = Stats() @app.route('/') def index(): return 'MeLi Challenge - Juan Fresneda' @app.route('/mutant/', methods=['POST']) def mutant(): if not request.json or not 'dna' in request.json: abort(400) req_body = request.get_json() if humano.validate_dna(req_body['dna']): abort(400) if humano.is_mutant(req_body['dna']): return 'true' else: abort(403)
def __init__(self, screen, fragment, basesequence, synth_from, synth_to, mut_from, mut_to, first_aa_coord, num_barcodes=0, ms=[], ifi=[], ifd=[], ms_barcode_choices=[], id_barcode_choices=[]): # Name of the MutantSet. self.screen = screen self.fragment = fragment # Sequence synthesis coords, 1-based, applies to entire # base sequence. self.synth_from = synth_from self.synth_to = synth_to # Sequence mutagenesis coords, 1-based, applies to entire # base sequence. self.mut_from = mut_from self.mut_to = mut_to # The cooridinate in the WT protein of the first amino acid # that is getting mutated. This is used for the GVCF notation # of the protein mutations. self.first_aa_coord = first_aa_coord # Store the number of bar codes per mutation to generate. self.num_barcodes = num_barcodes # Store the lists of missense, in-frame insertions (ifi's) and # in-frame deletions (ifd's) to be generated. If these # lists are empty then generate all of them. self.ms = ms[:] self.ifi = ifi[:] self.ifd = ifd[:] # Sequence being synthesized. self.synth_nt_sequence = basesequence[self.synth_from - 1:self.synth_to] # Overhang sequences before and after coding regions. self.overhang_seq_5 = basesequence[self.synth_from - 1:self.mut_from - 1] self.overhang_seq_3 = basesequence[self.mut_to:self.synth_to] # Sequence being mutated. self.coding_sequence = Mutant( self.screen, self.fragment, basesequence[self.mut_from - 1:self.mut_to], self.overhang_seq_5, self.overhang_seq_3, self.first_aa_coord) # The overhang_seq_5 + coding_sequence.nt_sequence + overhang_seq_3 # should be identical to the synthesized sequence. assert len(self.overhang_seq_5) + len( self.coding_sequence.coding_sequence ) + len(self.overhang_seq_3) == len( self.synth_nt_sequence ), "Length of fragments (%d, %d, %d) != total synthesized length (%d)" % ( len(self.overhang_seq_5), len( self.coding_sequence.coding_sequence), len( self.overhang_seq_3), len(self.synth_nt_sequence)) # Mutagenesis coords, 0-based, applies to synthesized # sequence. self.mut_from_0 = self.mut_from - self.synth_from self.mut_to_0 = self.mut_from_0 + (self.mut_to - self.mut_from) # List of mutants to be generated. self.mutants = []
class MutantSet: """ MutantSet contains the base sequence and parameters for a mutant screen. Nucleotide 1 of each mutation range must be the first nucleotide of a codon. base sequence ---------------------------------------------------- synth sequence -------------------------------------- mutated seq --------------------------- 5' overhang seq ------- 3' overhang seq ---- The synth sequence may not be in frame because the 5' overhang may not be a multiple of 3 in length, but the mutate sequence must be in frame. The mutate sequence is represented by a Mutant object, as are all the mutants derived from it. Revised 7/13/2017 Scaling down screen due to lab technical issues. Now using few bar codes and re-using them. Specifying in frame insertions (ifi's) and in-frame deletions (ifd's). If ifi's or ifd's not specified then generate all of them, otherwise only use those that are specified. 7/27/2017 - added ms_barcode_choices and ifd_barcode_choices parameters. These let you specify a list of barcodes for ms and ifi/ifd mutants respectively. """ def __init__(self, screen, fragment, basesequence, synth_from, synth_to, mut_from, mut_to, first_aa_coord, num_barcodes=0, ms=[], ifi=[], ifd=[], ms_barcode_choices=[], id_barcode_choices=[]): # Name of the MutantSet. self.screen = screen self.fragment = fragment # Sequence synthesis coords, 1-based, applies to entire # base sequence. self.synth_from = synth_from self.synth_to = synth_to # Sequence mutagenesis coords, 1-based, applies to entire # base sequence. self.mut_from = mut_from self.mut_to = mut_to # The cooridinate in the WT protein of the first amino acid # that is getting mutated. This is used for the GVCF notation # of the protein mutations. self.first_aa_coord = first_aa_coord # Store the number of bar codes per mutation to generate. self.num_barcodes = num_barcodes # Store the lists of missense, in-frame insertions (ifi's) and # in-frame deletions (ifd's) to be generated. If these # lists are empty then generate all of them. self.ms = ms[:] self.ifi = ifi[:] self.ifd = ifd[:] # Sequence being synthesized. self.synth_nt_sequence = basesequence[self.synth_from - 1:self.synth_to] # Overhang sequences before and after coding regions. self.overhang_seq_5 = basesequence[self.synth_from - 1:self.mut_from - 1] self.overhang_seq_3 = basesequence[self.mut_to:self.synth_to] # Sequence being mutated. self.coding_sequence = Mutant( self.screen, self.fragment, basesequence[self.mut_from - 1:self.mut_to], self.overhang_seq_5, self.overhang_seq_3, self.first_aa_coord) # The overhang_seq_5 + coding_sequence.nt_sequence + overhang_seq_3 # should be identical to the synthesized sequence. assert len(self.overhang_seq_5) + len( self.coding_sequence.coding_sequence ) + len(self.overhang_seq_3) == len( self.synth_nt_sequence ), "Length of fragments (%d, %d, %d) != total synthesized length (%d)" % ( len(self.overhang_seq_5), len( self.coding_sequence.coding_sequence), len( self.overhang_seq_3), len(self.synth_nt_sequence)) # Mutagenesis coords, 0-based, applies to synthesized # sequence. self.mut_from_0 = self.mut_from - self.synth_from self.mut_to_0 = self.mut_from_0 + (self.mut_to - self.mut_from) # List of mutants to be generated. self.mutants = [] def __str__(self): return "Screen: %s, fragment: %s, synth_from: %d, synth_to: %d, mut_from: %d, mut_to: %d, synth_nt_sequence %s...%s, mut_nt_sequence %s...%s, aa_sequence %s, mut_from_0: %d, mut_to_0: %d, first_aa_coord: %d" % ( self.screen, self.fragment, self.synth_from, self.synth_to, self.mut_from, self.mut_to, self.synth_nt_sequence[0:5], self.synth_nt_sequence[-5:], self.coding_sequence.coding_sequence[0:5], self.coding_sequence.coding_sequence[-5:], self.coding_sequence.aa_sequence, self.mut_from_0, self.mut_to_0, self.first_aa_coord) def Aa(self, i): """ Returns amino acid i (zero-based) from the amino acid sequence. """ return self.coding_sequence.aa_sequence[i] def GenerateMutants(self): """ Populates list of all Mutants in this MutantSet. These represent the missense substitutions possible via single nucleotide substitutions, not all single nucleotide substitutions. """ print "Generating mutants for mutant set %s %s." % (self.screen, self.fragment) self.mutants = [self.coding_sequence] self.mutants += self.coding_sequence.PointMutants(self.ms) self.mutants += self.coding_sequence.InsertionMutants(self.ifi) self.mutants += self.coding_sequence.DeletionMutants(self.ifd) print "Generated %d mutants." % len(self.mutants) def FilterMutants(self): """ Applies filters to mutants, discarding any that don't pass filters. """ print "Applying filters to mutant set %s %s." % (self.screen, self.fragment) passed_filters = 0 for m in self.mutants: m.ApplyFilters() if m.passes_filters: passed_filters += 1 print "%d mutants pass filters." % passed_filters def CreateBarcodes(self): """ Creates bar codes for each Mutant object. Replaces this mutantset's list of mutants with a new and expanded list of bar coded mutants. """ print "Creating bar codes for mutant set %s %s." % (self.screen, self.fragment) orig_mutant_list = self.mutants[:] self.mutants = [] # Create bar coded versions of mutants for each mutant in the list. for mutant in orig_mutant_list: self.mutants.append(mutant) if mutant.passes_filters: self.mutants += mutant.GenerateBarcodedMutants() else: print "NOT generating bar codes for %s mutant %s." % ( mutant.type, mutant.name) # Then, for each bar coded mutant, create a copy of the WT # coding sequence that has the same bar code. print "Creating barcoded copies of WT sequence." barcode_wt = [] barcode_problems = 0 for mutant in self.mutants: if mutant.barcode: barcode = mutant.AdjustedBarcode() wt = copy.deepcopy(self.coding_sequence) print "Applying barcode %s from %s to WT." % ( FormatBarcode(barcode), mutant.name) try: wt.ApplyBarcode(barcode) wt.ApplyFilters() barcode_wt.append(wt) except AssertionError: barcode_problems += 1 print "AssertionError:" print "aa sequence changed after bar code applied." print "%-5s: %s" % ('mut', SpacedAa(mutant.aa_sequence)) print "%-5s: %s" % ('mut', mutant.coding_sequence) print "%-5s: %s" % ( 'orig', SpacedAa(self.coding_sequence.aa_sequence)) print "%-5s: %s" % ('orig', self.coding_sequence.coding_sequence) print "%-5s: %s" % ( 'alt', SpacedAa(Translate(wt.coding_sequence))) print "%-5s: %s" % ('alt', wt.coding_sequence) raise print "Generated %d barcoded WT sequences, %d problems." % ( len(barcode_wt), barcode_problems) self.mutants += barcode_wt print "Done - mutant set contains %d mutants." % len(self.mutants) def Write(self, version): """ Write() generates an excel file for the MutantSet. version is the version number of the package. """ filename = self.screen + '_' + self.fragment + ".xlsx" workbook = xlsxwriter.Workbook(filename, { 'strings_to_numbers': True, }) # Summary information. summary = WorksheetWriter(workbook.add_worksheet("Summary")) summary.write("screen", self.screen) summary.write("fragment", self.fragment) summary.write("version", version) summary.write("synth_from", self.synth_from) summary.write("synth_to", self.synth_to) summary.write("mut_from", self.mut_from) summary.write("mut_to", self.mut_to) summary.write("synth_nt_sequence", self.synth_nt_sequence) summary.write("mut_nt_sequence", self.coding_sequence.coding_sequence) summary.write("aa_sequence", self.coding_sequence.aa_sequence) summary.write("mut_from_0", self.mut_from_0) summary.write("mut_to_0", self.mut_to_0) pages = [ ('Point Mutations', 'point'), ('In-frame Insertions', 'insertion'), ('In-frame Deletions', 'deletion'), ('WT Controls', 'WT'), ] for (worksheet_name, mutant_type) in pages: page = WorksheetWriter(workbook.add_worksheet(worksheet_name)) page.worksheet.freeze_panes(1, 0) page.write(Mutant.TableHeader().split()) for m in filter(lambda x: x.type == mutant_type, self.mutants): if m.barcode or not m.passes_filters: page.write(m.AsTuple()) workbook.close() def AsFasta(self): """ AsFasta() writes the mutant set as a fasta file. """ filename = self.screen + '_' + self.fragment + ".fasta" print "Writing fasta file for mutant set %s %s to %s." % ( self.screen, self.fragment, filename) ofs = open(filename, "w") #ofs.write(self.__str__()) #ofs.write("\n") for m in self.mutants: #if m.barcode or not m.passes_filters: # ofs.write(m.AsFasta()) ofs.write(m.AsFasta()) ofs.close()
def setUp(self): self.client = app self.humano = Mutant() self.estadistica = Stats()
def kunkel_full(protocol, params): growth_media = params["construct_setup"]['growth_media'] num_colonies = params["construct_setup"]['num_colonies'] ssDNA = params["construct_setup"]['ssDNA'] mutant_constructs = [] # make mutant objects for accessibility construct_collect = {} for csv_row in params["construct_setup"]['mutant_upload']: if csv_row["mutant_label"] not in construct_collect.keys(): construct_collect[csv_row["mutant_label"]] = [] construct_collect[csv_row["mutant_label"]].append( { "sequence": csv_row["sequence"], "purification": csv_row["purification"], "scale": csv_row["scale"], "oligo_label": csv_row["oligo_label"] }) else: construct_collect[csv_row["mutant_label"]].append( { "sequence": csv_row["sequence"], "purification": csv_row["purification"], "scale": csv_row["scale"], "oligo_label": csv_row["oligo_label"] } ) oligo_collect = {} for row in params["construct_setup"]["mutant_upload"]: if (row["sequence"] not in oligo_collect.keys() and row["oligo_label"] in protocol.refs.keys()): raise RuntimeError("You cannot specify two different " "oligos to be synthesized with the " "same name %s" % row['oligo_label']) elif row["sequence"] not in oligo_collect.keys(): oligo_collect[row["sequence"]] = { "sequence": row["sequence"], "purification": row["purification"], "scale": row["scale"], "destination": protocol.ref(row["oligo_label"], None, "micro-2.0", storage="cold_4").well(0) } for mut in construct_collect.keys(): mut_oligos = [o for o in construct_collect[mut]] mutant = Mutant(mut) for oligo in mut_oligos: mutant.add_oligos(oligo_collect[oligo["sequence"]]["destination"]) mutant_constructs.append(mutant) oligos_to_synthesize = [] for o in oligo_collect.keys(): scale_default(len(oligo_collect[o]["sequence"]), oligo_collect[o]["scale"], oligo_collect[o]["destination"].container.name) oligos_to_synthesize.append(oligo_collect[o]) protocol.oligosynthesize(oligos_to_synthesize) assemble_params = { 'ssDNA': ssDNA, 'constructs': [{ 'mutant_name': mu.name, 'oligos': mu.oligos} for mu in mutant_constructs], 'mutant_objs': mutant_constructs } annealing_plate = assemble(protocol, assemble_params) protocol.unseal(annealing_plate) transform_params = { 'num_colonies': num_colonies, 'growth_media': growth_media, 'constructs': [mu.anneal_well for mu in mutant_constructs], 'mutant_objs': mutant_constructs } growth_plate = transform(protocol, transform_params) seq_primers = [] for seq_primer in params["sequencing"]: if seq_primer["seq_choice"] != "No sequencing.": # make temp container with name of stock primer primer = protocol.ref(seq_primer["seq_choice"], None, "micro-1.5", discard=True).well(0) primer.set_name(seq_primer["seq_choice"]) primer_vol = "1:microliter" seq_primers.append(primer) sequence_params = { 'seq_set': [{ 'growth_wells': WellGroup([w for w in mu.growth_wells]), 'seq_primers': seq_primers} for mu in mutant_constructs] } if seq_primers: protocol.uncover(growth_plate) sequence(protocol, sequence_params) protocol.cover(growth_plate, lid="low_evaporation") if params["other_processing"]["other_processing"] != "No processing.": protocol.uncover(growth_plate) if params["other_processing"]["other_processing"] == "Miniprep": mini_samples = [] for mu in mutant_constructs: for w in mu.growth_wells: mini_samples.append({"sample": w, "name": w.name}) miniprep_params = { "type": "Miniprep", "media": growth_media, 'samples': mini_samples, "growth_plate": growth_plate } plasmidprep(protocol, miniprep_params) if params["other_processing"]["other_processing"] == "Return Colonies": return_plate = protocol.ref("return_plate_%s" % printdatetime(time=False), cont_type='96-pcr', storage='cold_4') for mut in mutant_constructs: for g_well in mut.growth_wells: protocol.transfer(g_well, return_plate.well(g_well.index), "30:microliter") return_plate.well(g_well.index).set_name(g_well.name) protocol.seal(return_plate) protocol.cover(growth_plate, lid="low_evaporation") if params["other_processing"]["other_processing"] == "Return Colonies Glycerol": return_plate = protocol.ref("return_plate_glycerol_%s" % printdatetime(time=False), cont_type='96-pcr', storage='cold_4') for mut in mutant_constructs: for g_well in mut.growth_wells: protocol.transfer(g_well, return_plate.well(g_well.index), "30:microliter") return_plate.well(g_well.index).set_name(g_well.name) for mut in mutant_constructs: for g_well in mut.growth_wells: protocol.provision("rs17rrhqpsxyh2", return_plate.well(g_well.index), "30:microliter") protocol.seal(return_plate) protocol.cover(growth_plate, lid="low_evaporation")