def setUp(self): self.ticket1 = ticket.ImportTicket() self.src1 = source.Source() self.src1.id = "L5_SRC_1" self.src2 = source.Source() self.src2.id = "L5_SRC_2" self.src3 = source.Source() self.src3.id = "L5_SRC_3" self.cds1 = cds.Cds() self.cds1.id = "L5_CDS_1" self.cds2 = cds.Cds() self.cds2.id = "L5_CDS_2" self.cds3 = cds.Cds() self.cds3.id = "L5_CDS_3" self.trna1 = trna.Trna() self.trna1.id = "L5_TRNA_1" self.trna2 = trna.Trna() self.trna2.id = "L5_TRNA_2" self.trna3 = trna.Trna() self.trna3.id = "L5_TRNA_3" self.tmrna1 = tmrna.Tmrna() self.tmrna1.id = "L5_TMRNA_1" self.tmrna2 = tmrna.Tmrna() self.tmrna2.id = "L5_TMRNA_2" self.tmrna3 = tmrna.Tmrna() self.tmrna3.id = "L5_TMRNA_3" self.genome1 = genome.Genome() self.genome1.type = "flat_file" self.genome1.cds_features = [self.cds1, self.cds2] self.genome1.source_features = [self.src1, self.src2] self.genome1.trna_features = [self.trna1, self.trna2] self.genome1.tmrna_features = [self.tmrna1, self.tmrna2] self.genome2 = genome.Genome() self.genome2.type = "mysql" self.genome_pair1 = genomepair.GenomePair() self.genome_pair2 = genomepair.GenomePair() self.bndl = bundle.Bundle() self.bndl.ticket = self.ticket1 self.bndl.genome_dict[self.genome1.type] = self.genome1 self.bndl.genome_dict[self.genome2.type] = self.genome2 self.bndl.genome_pair_dict["genome_pair1"] = self.genome_pair1 self.bndl.genome_pair_dict["genome_pair2"] = self.genome_pair2 self.eval_correct1 = evaluation.Evaluation(status="correct") self.eval_correct2 = evaluation.Evaluation(status="correct") self.eval_error1 = evaluation.Evaluation(status="error") self.eval_error2 = evaluation.Evaluation(status="error")
def test_create_genome_statements_3(self): """Verify list of INSERT statements is created correctly for: 'add' ticket, and two CDS features.""" cds1 = cds.Cds() cds1.genome_id = "L5" cds1.start = 10 cds1.stop = 100 cds1.parts = 1 cds1.length = 1000 cds1.name = "1" cds1.type = "CDS" cds1.translation = "AGGPT" cds1.orientation = "F" cds1.description = "description" cds1.locus_tag = "SEA_L5_001" cds2 = cds.Cds() cds2.genome_id = "L5" cds2.start = 100 cds2.stop = 1000 cds2.parts = 1 cds2.length = 10000 cds2.name = "2" cds2.type = "CDS" cds2.translation = "AKKQE" cds2.orientation = "R" cds2.description = "description" cds2.locus_tag = "SEA_L5_002" self.genome1.id = "L5" self.genome1.name = "L5_Draft" self.genome1.host_genus = "Mycobacterium" self.genome1.annotation_status = "final" self.genome1.accession = "ABC123" self.genome1.seq = "ATCG" self.genome1.length = 4 self.genome1.gc = 0.5001 self.genome1.date = '1/1/2000' self.genome1.retrieve_record = "1" self.genome1.annotation_author = "1" self.genome1.cluster = "A" self.genome1.subcluster = "A2" self.genome1.cds_features = [cds1, cds2] statements = mysqldb.create_genome_statements(self.genome1, tkt_type="add") self.assertEqual(len(statements), 3)
def test_create_gene_table_insert_2(self): """Verify gene table INSERT statement is created correctly when locus_tag is empty.""" # Note: even though this function returns a string and doesn't # actually utilize a MySQL database, this test ensures # that the returned statement will function properly in MySQL. cds1 = cds.Cds() cds1.id = "SEA_TRIXIE_123" cds1.genome_id = "Trixie" cds1.start = 5 cds1.stop = 10 cds1.parts = 1 cds1.translation_length = 20 cds1.name = "Int" cds1.type = "CDS" cds1.translation = Seq("ACKLG", IUPAC.protein) cds1.orientation = "F" cds1.description = "integrase" cds1.locus_tag = "" statement = mysqldb.create_gene_table_insert(cds1) test_db_utils.execute(statement) result = test_db_utils.get_data(self.gene_query) results = result[0] exp = ("""INSERT INTO gene """ """(GeneID, PhageID, Start, Stop, Length, Name, """ """Translation, Orientation, Notes, LocusTag, Parts) """ """VALUES """ """("SEA_TRIXIE_123", "Trixie", 5, 10, 20, "Int", """ """"ACKLG", "F", "integrase", NULL, 1);""") with self.subTest(): self.assertEqual(statement, exp) with self.subTest(): self.assertEqual(results["GeneID"], "SEA_TRIXIE_123") with self.subTest(): self.assertEqual(results["LocusTag"], None)
def test_create_gene_table_insert_1(self): """Verify gene table INSERT statement is created correctly when locus_tag is not empty and description contains a "'".""" # Note: even though this function returns a string and doesn't # actually utilize a MySQL database, this test ensures # that the returned statement will function properly in MySQL. cds1 = cds.Cds() cds1.id = "SEA_TRIXIE_123" cds1.genome_id = "Trixie" cds1.start = 5 cds1.stop = 10 cds1.parts = 1 cds1.translation_length = 20 cds1.length = 200 cds1.name = "Int" cds1.type = "CDS" cds1.translation = Seq("ACKLG", IUPAC.protein) cds1.orientation = "F" cds1.description = "5' nucleotide phosphatase" cds1.locus_tag = "TAG1" statement = mysqldb.create_gene_table_insert(cds1) test_db_utils.execute(statement) result = test_db_utils.get_data(GENE_QUERY2) results = result[0] exp = ("""INSERT INTO gene """ """(GeneID, PhageID, Start, Stop, Length, Name, """ """Translation, Orientation, Notes, LocusTag, Parts) """ """VALUES """ """("SEA_TRIXIE_123", "Trixie", 5, 10, 200, "Int", """ """"ACKLG", "F", "5' nucleotide phosphatase", "TAG1", 1);""") with self.subTest(): self.assertEqual(statement, exp) with self.subTest(): self.assertEqual(results["GeneID"], "SEA_TRIXIE_123") with self.subTest(): self.assertEqual(results["PhageID"], "Trixie") with self.subTest(): self.assertEqual(results["Start"], 5) with self.subTest(): self.assertEqual(results["Stop"], 10) with self.subTest(): self.assertEqual(results["Parts"], 1) with self.subTest(): self.assertEqual(results["Length"], 200) with self.subTest(): self.assertEqual(results["Name"], "Int") with self.subTest(): self.assertEqual(results["Translation"].decode("utf-8"), "ACKLG") with self.subTest(): self.assertEqual(results["Orientation"], "F") with self.subTest(): self.assertEqual(results["Notes"].decode("utf-8"), "5' nucleotide phosphatase") with self.subTest(): self.assertEqual(results["LocusTag"], "TAG1")
def setUp(self): """ Creates objects for unit testing of the export_db pipeline """ #Creates test phage name list phage_name_list = [] phage_name_list.append("TestPhage_1") phage_name_list.append("TestPhage_2") phage_name_list.append("TestPhage_3") self.names = phage_name_list #Creates test Genome objects test_phage1 = genome.Genome() test_phage1.name = self.names[0] test_phage2 = genome.Genome() test_phage2.name = self.names[1] test_phage3 = genome.Genome() test_phage3.name = self.names[2] self.genomes = [test_phage1, test_phage2, test_phage3] #Creates test Cds objects test_cds1 = cds.Cds() test_cds1.start = 1 test_cds1.stop = 2 test_cds1.coordinate_format = "0_half_open" test_cds1.orientation = 1 test_cds2 = cds.Cds() test_cds2.start = 2 test_cds2.stop = 3 test_cds2.coordinate_format = "0_half_open" test_cds2.orientation = 1 test_cds3 = cds.Cds() test_cds3.start = 3 test_cds3.stop = 4 test_cds3.coordinate_format = "0_half_open" test_cds3.orientation = 1 self.cds_list = [test_cds1, test_cds2, test_cds3] #Creates a test SeqRecord object seq = Seq("ATGC") seqrecord = SeqRecord(seq) seqrecord.annotations.update({"comment": ()}) self.test_seqrecord = seqrecord self.test_version_dictionary = \ {"Version" : "Test", "SchemaVersion": "Test"}
def setUp(self): self.ticket1 = ticket.ImportTicket() self.src1 = source.Source() self.src1.id = "L5_SRC_1" self.src2 = source.Source() self.src2.id = "L5_SRC_2" self.src3 = source.Source() self.src3.id = "L5_SRC_3" self.cds1 = cds.Cds() self.cds1.id = "L5_CDS_1" self.cds2 = cds.Cds() self.cds2.id = "L5_CDS_2" self.cds3 = cds.Cds() self.cds3.id = "L5_CDS_3" self.genome1 = genome.Genome() self.genome1.type = "flat_file" self.genome1.cds_features.append(self.cds1) self.genome1.cds_features.append(self.cds2) self.genome1.source_features.append(self.src1) self.genome1.source_features.append(self.src2) self.genome2 = genome.Genome() self.genome2.type = "mysql" self.genome_pair1 = genomepair.GenomePair() self.genome_pair2 = genomepair.GenomePair() self.bndl = bundle.Bundle() self.bndl.ticket = self.ticket1 self.bndl.genome_dict[self.genome1.type] = self.genome1 self.bndl.genome_dict[self.genome2.type] = self.genome2 self.bndl.genome_pair_dict["genome_pair1"] = self.genome_pair1 self.bndl.genome_pair_dict["genome_pair2"] = self.genome_pair2 self.eval_correct1 = eval.Eval(status="correct") self.eval_correct2 = eval.Eval(status="correct") self.eval_error1 = eval.Eval(status="error") self.eval_error2 = eval.Eval(status="error")
def setUp(self): self.cds_pair = cdspair.CdsPair() self.cds1 = cds.Cds() self.cds2 = cds.Cds()
def parse_cds_seqfeature(seqfeature): """Parse data from a Biopython CDS SeqFeature object into a Cds object. :param seqfeature: Biopython SeqFeature :type seqfeature: SeqFeature :param genome_id: An identifier for the genome in which the seqfeature is defined. :type genome_id: str :returns: A pdm_utils Cds object :rtype: Cds """ generic_words = {"gp", "orf", ""} cds_ftr = cds.Cds() cds_ftr.seqfeature = seqfeature try: locus_tag = seqfeature.qualifiers["locus_tag"][0] except: locus_tag = "" finally: cds_ftr.set_locus_tag(locus_tag) cds_ftr.set_orientation(seqfeature.strand, "fr_short", case = True) cds_ftr.start, cds_ftr.stop, cds_ftr.parts = parse_coordinates(seqfeature) # Coordinate format for GenBank flat file features parsed by Biopython # are 0-based half open intervals. cds_ftr.coordinate_format = "0_half_open" # For translation, convert it to a Biopython Seq object. try: translation = seqfeature.qualifiers["translation"][0] except: translation = "" finally: translation = Seq(translation, Alphabet.IUPAC.protein) cds_ftr.set_translation(translation) cds_ftr.set_nucleotide_length() try: translation_table = seqfeature.qualifiers["transl_table"][0] except: translation_table = 0 finally: cds_ftr.set_translation_table(translation_table) try: product = seqfeature.qualifiers["product"][0] except: product = "" finally: cds_ftr.set_description_field("product", product, generic_words) try: function = seqfeature.qualifiers["function"][0] except: function = "" finally: cds_ftr.set_description_field("function", function, generic_words) try: note = seqfeature.qualifiers["note"][0] except: note = "" finally: cds_ftr.set_description_field("note", note, generic_words) try: cds_ftr.gene = seqfeature.qualifiers["gene"][0] except: cds_ftr.gene = "" cds_ftr.set_name() return cds_ftr
def setUp(self): self.cds_ftr = cds.Cds() self.src_ftr = source.Source() self.gnm = genome.Genome()
def parse_gene_table_data(data_dict, trans_table=11): """Parse a MySQL database dictionary to create a Cds object. :param data_dict: Dictionary of data retrieved from the gene table. :type data_dict: dict :param trans_table: The translation table that can be used to translate CDS features. :type trans_table: int :returns: A pdm_utils Cds object. :rtype: Cds """ ftr = cds.Cds() ftr.type = "CDS" try: ftr.id = data_dict["GeneID"] except: pass try: ftr.genome_id = data_dict["PhageID"] except: pass try: ftr.start = int(data_dict["Start"]) except: pass try: ftr.stop = int(data_dict["Stop"]) except: pass try: ftr.parts = int(data_dict["Parts"]) except: pass ftr.coordinate_format = "0_half_open" try: ftr.length = int(data_dict["Length"]) except: pass try: ftr.name = data_dict["Name"] except: pass try: ftr.set_translation(data_dict["Translation"].decode("utf-8")) except: pass try: ftr.orientation = data_dict["Orientation"] except: pass try: ftr.description = data_dict["Notes"].decode("utf-8") except: pass try: ftr.set_locus_tag(data_dict["LocusTag"]) except: pass try: ftr.pham_id = int(data_dict["PhamID"]) except: pass try: ftr.domain_status = int(data_dict["DomainStatus"]) except: pass try: ftr.translation_table = trans_table except: pass return ftr
def setUp(self): self.genome1 = genome.Genome() self.genome1.id = "L5" self.genome1.name = "L5_Draft" self.genome1.host_genus = "Mycobacterium" self.genome1.annotation_status = "final" self.genome1.accession = "ABC123" self.genome1.seq = "ATCG" self.genome1.length = 4 self.genome1.gc = 0.5001 self.genome1.date = '1/1/2000' self.genome1.retrieve_record = "1" self.genome1.annotation_author = "1" self.genome1.cluster = "A" self.genome1.subcluster = "A2" self.cds1 = cds.Cds() self.cds1.genome_id = "L5" self.cds1.start = 10 self.cds1.stop = 100 self.cds1.parts = 1 self.cds1.length = 1000 self.cds1.name = "1" self.cds1.type = "CDS" self.cds1.translation = "AGGPT" self.cds1.orientation = "F" self.cds1.description = "description" self.cds1.locus_tag = "SEA_L5_001" self.cds2 = cds.Cds() self.cds2.genome_id = "L5" self.cds2.start = 100 self.cds2.stop = 1000 self.cds2.parts = 1 self.cds2.length = 10000 self.cds2.name = "2" self.cds2.type = "CDS" self.cds2.translation = "AKKQE" self.cds2.orientation = "R" self.cds2.description = "description" self.cds2.locus_tag = "SEA_L5_002" self.cds_features = [self.cds1, self.cds2] self.trna1 = trna.Trna() self.trna1.id = "Trixie_1" self.trna1.genome_id = "Trixie" self.trna1.name = "1" self.trna1.locus_tag = "TAG1" self.trna1.start = 5 self.trna1.stop = 10 self.trna1.length = 200 self.trna1.orientation = "F" self.trna1.note = "misc" self.trna1.amino_acid = "Ala" self.trna1.anticodon = "AAA" self.trna1.structure = "random" self.trna1.use = "aragorn" self.trna2 = trna.Trna() self.trna2.id = "Trixie_1" self.trna2.genome_id = "Trixie" self.trna2.name = "1" self.trna2.locus_tag = "TAG1" self.trna2.start = 5 self.trna2.stop = 10 self.trna2.length = 200 self.trna2.orientation = "F" self.trna2.note = "misc" self.trna2.amino_acid = "Ala" self.trna2.anticodon = "AAA" self.trna2.structure = "random" self.trna2.use = "aragorn" self.trna_features = [self.trna1, self.trna2] self.tmrna1 = tmrna.Tmrna() self.tmrna1.id = "Trixie_1" self.tmrna1.genome_id = "Trixie" self.tmrna1.name = "1" self.tmrna1.locus_tag = "TAG1" self.tmrna1.start = 5 self.tmrna1.stop = 10 self.tmrna1.length = 200 self.tmrna1.orientation = "F" self.tmrna1.note = "misc" self.tmrna1.peptide_tag = "random" self.tmrna2 = tmrna.Tmrna() self.tmrna2.id = "Trixie_1" self.tmrna2.genome_id = "Trixie" self.tmrna2.name = "1" self.tmrna2.locus_tag = "TAG1" self.tmrna2.start = 5 self.tmrna2.stop = 10 self.tmrna2.length = 200 self.tmrna2.orientation = "F" self.tmrna2.note = "misc" self.tmrna2.peptide_tag = "random" self.tmrna_features = [self.tmrna1, self.tmrna2]