def test_finds_sequence_on_specified_genome(self): s1 = 'atcggtatcttctatgcgtatgcgtcatgattatatatattagcggcatg' s2 = 'agcgtcgatgcatgagtcgatcggcagtcgtgtagtcgtcgtatgcgtta' g1 = Genome(name='Foo') g1.save() f1 = Fragment.create_with_sequence('Bar', s1) f2 = Fragment.create_with_sequence('Baz', s2) Genome_Fragment(genome=g1, fragment=f1, inherited=False).save() Genome_Fragment(genome=g1, fragment=f2, inherited=False).save() g2 = Genome(name='Far') g2.save() f3 = Fragment.create_with_sequence('Bar', s1) Genome_Fragment(genome=g2, fragment=f3, inherited=False).save() try: os.unlink(fragment_fasta_fn(f1)) os.unlink(fragment_fasta_fn(f2)) os.unlink(fragment_fasta_fn(f3)) except: pass build_all_genome_dbs(refresh=True) g1 = Genome.objects.get(pk=g1.id) query = s1[6:20] + 'aaaaaaaaa' r = blast_genome(g1, 'blastn', query) # only returns hit from genome self.assertEquals(len(r), 1) self.assertEquals(r[0].fragment_id, f1.id) self.assertEquals(r[0].query_start, 1) self.assertEquals(r[0].query_end, 14) self.assertEquals(r[0].subject_start, 7) self.assertEquals(r[0].subject_end, 20) self.assertEquals(r[0].strand(), 1)
def test_blast_aligns_sequence_to_antisense_strand(self): s1 = 'atcggtatcttctatgcgtatgcgtcatgattatatatattagcggcatg' g1 = Genome(name='Foo') g1.save() f1 = Fragment.create_with_sequence('Bar', s1) Genome_Fragment(genome=g1, fragment=f1, inherited=False).save() try: os.unlink(fragment_fasta_fn(f1)) except: pass build_all_genome_dbs(refresh=True) query = str(Seq(s1[6:20]).reverse_complement())+'tttttttttt' res = self.client.post('/edge/genomes/%s/blast/' % g1.id, data=json.dumps(dict(program='blastn', query=query)), content_type='application/json') self.assertEquals(res.status_code, 200) d = json.loads(res.content) # only returns hit from genome self.assertEquals(len(d), 1) self.assertEquals(d[0]['fragment_id'], f1.id) self.assertEquals(d[0]['query_start'], 1) self.assertEquals(d[0]['query_end'], 14) self.assertEquals(d[0]['subject_start'], 20) self.assertEquals(d[0]['subject_end'], 7)
def build_genome(self, circular, *templates): g = Genome(name='Foo') g.save() for seq in templates: f = Fragment.create_with_sequence('Bar', seq, circular=circular) Genome_Fragment(genome=g, fragment=f, inherited=False).save() try: os.unlink(fragment_fasta_fn(f)) except: pass build_all_genome_dbs(refresh=True) return Genome.objects.get(pk=g.id)
def test_blast_finds_sequence_on_specified_genome(self): s1 = get_random_sequence(200) s2 = get_random_sequence(200) g1 = Genome(name="Foo") g1.save() f1 = Fragment.create_with_sequence("Bar", s1) f2 = Fragment.create_with_sequence("Baz", s2) Genome_Fragment(genome=g1, fragment=f1, inherited=False).save() Genome_Fragment(genome=g1, fragment=f2, inherited=False).save() g2 = Genome(name="Far") g2.save() f3 = Fragment.create_with_sequence("Bar", s1) Genome_Fragment(genome=g2, fragment=f3, inherited=False).save() try: os.unlink(fragment_fasta_fn(f1)) os.unlink(fragment_fasta_fn(f2)) os.unlink(fragment_fasta_fn(f3)) except BaseException: pass build_all_genome_dbs(refresh=True) query = s1[6:20] + "aaaaaaaaa" res = self.client.post( "/edge/genomes/%s/blast/" % g1.id, data=json.dumps(dict(program="blastn", query=query)), content_type="application/json", ) self.assertEquals(res.status_code, 200) d = json.loads(res.content) # only returns hit from genome self.assertEquals(len(d), 1) self.assertEquals(d[0]["fragment_id"], f1.id) self.assertEquals(d[0]["query_start"], 1) self.assertEquals(d[0]["query_end"], 14) self.assertEquals(d[0]["subject_start"], 7) self.assertEquals(d[0]["subject_end"], 20) # blast in other genome works too res = self.client.post( "/edge/genomes/%s/blast/" % g2.id, data=json.dumps(dict(program="blastn", query=query)), content_type="application/json", ) self.assertEquals(res.status_code, 200) d = json.loads(res.content) self.assertEquals(len(d), 1) self.assertEquals(d[0]["fragment_id"], f3.id)
def test_blast_finds_sequence_on_specified_genome(self): s1 = 'atcggtatcttctatgcgtatgcgtcatgattatatatattagcggcatg' s2 = 'agcgtcgatgcatgagtcgatcggcagtcgtgtagtcgtcgtatgcgtta' g1 = Genome(name='Foo') g1.save() f1 = Fragment.create_with_sequence('Bar', s1) f2 = Fragment.create_with_sequence('Baz', s2) Genome_Fragment(genome=g1, fragment=f1, inherited=False).save() Genome_Fragment(genome=g1, fragment=f2, inherited=False).save() g2 = Genome(name='Far') g2.save() f3 = Fragment.create_with_sequence('Bar', s1) Genome_Fragment(genome=g2, fragment=f3, inherited=False).save() try: os.unlink(fragment_fasta_fn(f1)) os.unlink(fragment_fasta_fn(f2)) os.unlink(fragment_fasta_fn(f3)) except: pass build_all_genome_dbs(refresh=True) query = s1[6:20] + 'aaaaaaaaa' res = self.client.post('/edge/genomes/%s/blast/' % g1.id, data=json.dumps( dict(program='blastn', query=query)), content_type='application/json') self.assertEquals(res.status_code, 200) d = json.loads(res.content) # only returns hit from genome self.assertEquals(len(d), 1) self.assertEquals(d[0]['fragment_id'], f1.id) self.assertEquals(d[0]['query_start'], 1) self.assertEquals(d[0]['query_end'], 14) self.assertEquals(d[0]['subject_start'], 7) self.assertEquals(d[0]['subject_end'], 20) # blast in other genome works too res = self.client.post('/edge/genomes/%s/blast/' % g2.id, data=json.dumps( dict(program='blastn', query=query)), content_type='application/json') self.assertEquals(res.status_code, 200) d = json.loads(res.content) self.assertEquals(len(d), 1) self.assertEquals(d[0]['fragment_id'], f3.id)
def test_blast_finds_sequence_on_specified_genome(self): s1 = 'atcggtatcttctatgcgtatgcgtcatgattatatatattagcggcatg' s2 = 'agcgtcgatgcatgagtcgatcggcagtcgtgtagtcgtcgtatgcgtta' g1 = Genome(name='Foo') g1.save() f1 = Fragment.create_with_sequence('Bar', s1) f2 = Fragment.create_with_sequence('Baz', s2) Genome_Fragment(genome=g1, fragment=f1, inherited=False).save() Genome_Fragment(genome=g1, fragment=f2, inherited=False).save() g2 = Genome(name='Far') g2.save() f3 = Fragment.create_with_sequence('Bar', s1) Genome_Fragment(genome=g2, fragment=f3, inherited=False).save() try: os.unlink(fragment_fasta_fn(f1)) os.unlink(fragment_fasta_fn(f2)) os.unlink(fragment_fasta_fn(f3)) except: pass build_all_genome_dbs(refresh=True) query = s1[6:20]+'aaaaaaaaa' res = self.client.post('/edge/genomes/%s/blast/' % g1.id, data=json.dumps(dict(program='blastn', query=query)), content_type='application/json') self.assertEquals(res.status_code, 200) d = json.loads(res.content) # only returns hit from genome self.assertEquals(len(d), 1) self.assertEquals(d[0]['fragment_id'], f1.id) self.assertEquals(d[0]['query_start'], 1) self.assertEquals(d[0]['query_end'], 14) self.assertEquals(d[0]['subject_start'], 7) self.assertEquals(d[0]['subject_end'], 20) # blast in other genome works too res = self.client.post('/edge/genomes/%s/blast/' % g2.id, data=json.dumps(dict(program='blastn', query=query)), content_type='application/json') self.assertEquals(res.status_code, 200) d = json.loads(res.content) self.assertEquals(len(d), 1) self.assertEquals(d[0]['fragment_id'], f3.id)
def test_does_not_return_duplicate_hits_for_circular_fragments(self): s1 = get_random_sequence(200) g1 = Genome(name="Foo") g1.save() f1 = Fragment.create_with_sequence("Bar", s1, circular=True) Genome_Fragment(genome=g1, fragment=f1, inherited=False).save() try: os.unlink(fragment_fasta_fn(f1)) except BaseException: pass build_all_genome_dbs(refresh=True) g1 = Genome.objects.get(pk=g1.id) query = s1[5:20] + "tttttttttt" r = blast_genome(g1, "blastn", query) self.assertEquals(len(r), 1)
def test_does_not_return_duplicate_hits_for_circular_fragments(self): s1 = 'atcggtatcttctatgcgtatgcgtcatgattatatatattagcggcatg' g1 = Genome(name='Foo') g1.save() f1 = Fragment.create_with_sequence('Bar', s1, circular=True) Genome_Fragment(genome=g1, fragment=f1, inherited=False).save() try: os.unlink(fragment_fasta_fn(f1)) except: pass build_all_genome_dbs(refresh=True) g1 = Genome.objects.get(pk=g1.id) query = s1[5:20] + 'tttttttttt' r = blast_genome(g1, 'blastn', query) self.assertEquals(len(r), 1)
def test_does_not_align_sequence_across_boundry_for_non_circular_fragment(self): s1 = get_random_sequence(200) g1 = Genome(name="Foo") g1.save() f1 = Fragment.create_with_sequence("Bar", s1, circular=False) Genome_Fragment(genome=g1, fragment=f1, inherited=False).save() try: os.unlink(fragment_fasta_fn(f1)) except BaseException: pass build_all_genome_dbs(refresh=True) g1 = Genome.objects.get(pk=g1.id) query = (s1[-10:] + s1[0:10]) + "tttttttttt" res = blast_genome(g1, "blastn", query) for r in res: self.assertEquals(r.subject_start > 0 and r.subject_start <= len(s1), True) self.assertEquals(r.subject_end > 0 and r.subject_end <= len(s1), True)
def test_does_not_align_sequence_across_boundry_for_non_circular_fragment(self): s1 = 'atcggtatcttctatgcgtatgcgtcatgattatatatattagcggcatg' g1 = Genome(name='Foo') g1.save() f1 = Fragment.create_with_sequence('Bar', s1, circular=False) Genome_Fragment(genome=g1, fragment=f1, inherited=False).save() try: os.unlink(fragment_fasta_fn(f1)) except: pass build_all_genome_dbs(refresh=True) g1 = Genome.objects.get(pk=g1.id) query = (s1[-10:] + s1[0:10]) + 'tttttttttt' res = blast_genome(g1, 'blastn', query) for r in res: self.assertEquals(r.subject_start > 0 and r.subject_start <= len(s1), True) self.assertEquals(r.subject_end > 0 and r.subject_end <= len(s1), True)
def test_aligns_sequence_to_antisense_strand(self): s1 = 'atcggtatcttctatgcgtatgcgtcatgattatatatattagcggcatg' g1 = Genome(name='Foo') g1.save() f1 = Fragment.create_with_sequence('Bar', s1) Genome_Fragment(genome=g1, fragment=f1, inherited=False).save() try: os.unlink(fragment_fasta_fn(f1)) except: pass build_all_genome_dbs(refresh=True) g1 = Genome.objects.get(pk=g1.id) query = str(Seq(s1[6:20]).reverse_complement()) + 'tttttttttt' r = blast_genome(g1, 'blastn', query) self.assertEquals(len(r), 1) self.assertEquals(r[0].fragment_id, f1.id) self.assertEquals(r[0].query_start, 1) self.assertEquals(r[0].query_end, 14) self.assertEquals(r[0].subject_start, 20) self.assertEquals(r[0].subject_end, 7) self.assertEquals(r[0].strand(), -1)
def test_does_not_align_sequence_across_boundry_for_non_circular_fragment( self): s1 = 'atcggtatcttctatgcgtatgcgtcatgattatatatattagcggcatg' g1 = Genome(name='Foo') g1.save() f1 = Fragment.create_with_sequence('Bar', s1, circular=False) Genome_Fragment(genome=g1, fragment=f1, inherited=False).save() try: os.unlink(fragment_fasta_fn(f1)) except: pass build_all_genome_dbs(refresh=True) g1 = Genome.objects.get(pk=g1.id) query = (s1[-10:] + s1[0:10]) + 'tttttttttt' res = blast_genome(g1, 'blastn', query) for r in res: self.assertEquals( r.subject_start > 0 and r.subject_start <= len(s1), True) self.assertEquals(r.subject_end > 0 and r.subject_end <= len(s1), True)
def test_aligns_sequence_to_antisense_strand(self): s1 = get_random_sequence(200) g1 = Genome(name="Foo") g1.save() f1 = Fragment.create_with_sequence("Bar", s1) Genome_Fragment(genome=g1, fragment=f1, inherited=False).save() try: os.unlink(fragment_fasta_fn(f1)) except BaseException: pass build_all_genome_dbs(refresh=True) g1 = Genome.objects.get(pk=g1.id) query = str(Seq(s1[6:20]).reverse_complement()) + "tttttttttt" r = blast_genome(g1, "blastn", query) self.assertEquals(len(r), 1) self.assertEquals(r[0].fragment_id, f1.id) self.assertEquals(r[0].query_start, 1) self.assertEquals(r[0].query_end, 14) self.assertEquals(r[0].subject_start, 20) self.assertEquals(r[0].subject_end, 7) self.assertEquals(r[0].strand(), -1)
def test_aligns_sequence_across_boundry_for_circular_fragment(self): s1 = 'atcggtatctactatgcgtatgcgtcatgattatatatattagcggcatg' g1 = Genome(name='Foo') g1.save() f1 = Fragment.create_with_sequence('Bar', s1, circular=True) Genome_Fragment(genome=g1, fragment=f1, inherited=False).save() try: os.unlink(fragment_fasta_fn(f1)) except: pass build_all_genome_dbs(refresh=True) g1 = Genome.objects.get(pk=g1.id) query = (s1[-10:] + s1[0:10]) + 'ttttttttttt' res = blast_genome(g1, 'blastn', query) # we are not removing redundant matches when matching across circular # boundaries, since blasting across circular boundary of a genome is a # rare case. so in this particular case, you will find two results, one # for the end of the query at the start of the genome, one for across # the circular boundary. found = False for r in res: if r.query_start == 1 and r.query_end == 20: self.assertEquals(r.fragment_id, f1.id) self.assertEquals(r.query_start, 1) self.assertEquals(r.query_end, 20) self.assertEquals(r.subject_start, len(s1) - 10 + 1) self.assertEquals(r.subject_end, len(s1) + 10) self.assertEquals(r.fragment_length, len(s1)) self.assertEquals(r.strand(), 1) found = True break self.assertEquals(found, True)
def test_aligns_sequence_across_boundry_for_circular_fragment(self): s1 = get_random_sequence(200) g1 = Genome(name="Foo") g1.save() f1 = Fragment.create_with_sequence("Bar", s1, circular=True) Genome_Fragment(genome=g1, fragment=f1, inherited=False).save() try: os.unlink(fragment_fasta_fn(f1)) except BaseException: pass build_all_genome_dbs(refresh=True) g1 = Genome.objects.get(pk=g1.id) query = (s1[-10:] + s1[0:10]) + "ttttttttttt" res = blast_genome(g1, "blastn", query) # we are not removing redundant matches when matching across circular # boundaries, since blasting across circular boundary of a genome is a # rare case. so in this particular case, you will find two results, one # for the end of the query at the start of the genome, one for across # the circular boundary. found = False for r in res: if r.query_start == 1 and r.query_end == 20: self.assertEquals(r.fragment_id, f1.id) self.assertEquals(r.query_start, 1) self.assertEquals(r.query_end, 20) self.assertEquals(r.subject_start, len(s1) - 10 + 1) self.assertEquals(r.subject_end, len(s1) + 10) self.assertEquals(r.fragment_length, len(s1)) self.assertEquals(r.strand(), 1) found = True break self.assertEquals(found, True)
def __test_verification_primers(self, template, middle, cassette, arm_len, is_reversed): from edge.pcr import pcr_from_genome g = self.build_genome(False, template) r = find_swap_region(g, cassette, arm_len, design_primers=True) self.assertEquals(len(r), 1) self.assertEquals(len(r[0].verification_cassette), 5) self.assertEquals(len(r[0].verification_front), 5) self.assertEquals(len(r[0].verification_back), 5) # cassette verification primers should work on unmodified genome for primer in r[0].verification_cassette: p = pcr_from_genome(g, primer['PRIMER_LEFT_SEQUENCE'], primer['PRIMER_RIGHT_SEQUENCE']) self.assertNotEqual(p[0], None) self.assertEquals(p[0].index(middle) >= 0, True) # front verification primers should NOT produce product for primer in r[0].verification_front: p = pcr_from_genome(g, primer['PRIMER_LEFT_SEQUENCE'], primer['PRIMER_RIGHT_SEQUENCE']) self.assertEqual(p[0], None) # back verification primers should NOT produce product for primer in r[0].verification_back: p = pcr_from_genome(g, primer['PRIMER_LEFT_SEQUENCE'], primer['PRIMER_RIGHT_SEQUENCE']) self.assertEqual(p[0], None) # do recombination, then try primers again on modified genome c = recombine(g, cassette, arm_len) for f in c.fragments.all(): try: os.unlink(fragment_fasta_fn(f)) except: pass build_all_genome_dbs(refresh=True) # reload to get blastdb c = Genome.objects.get(pk=c.id) if is_reversed: cassette = str(Seq(cassette).reverse_complement()) # cassette verification primers should work on modified genome, finding cassette for primer in r[0].verification_cassette: p = pcr_from_genome(c, primer['PRIMER_LEFT_SEQUENCE'], primer['PRIMER_RIGHT_SEQUENCE']) self.assertNotEqual(p[0], None) self.assertEquals(p[0].index(cassette) >= 0, True) # front verification primers should find a product including front of cassette for primer in r[0].verification_front: p = pcr_from_genome(c, primer['PRIMER_LEFT_SEQUENCE'], primer['PRIMER_RIGHT_SEQUENCE']) self.assertNotEqual(p[0], None) self.assertEquals( p[0].index(cassette[0:edge.recombine.CHECK_JUNCTION_LEFT_DN]) >= 0, True) # back verification primers should find a product including back of cassette for primer in r[0].verification_back: p = pcr_from_genome(c, primer['PRIMER_LEFT_SEQUENCE'], primer['PRIMER_RIGHT_SEQUENCE']) self.assertNotEqual(p[0], None) self.assertEquals( p[0].index(cassette[-edge.recombine.CHECK_JUNCTION_RIGHT_UP:]) >= 0, True)
def __test_verification_primers(self, template, middle, cassette, arm_len, is_reversed): from edge.pcr import pcr_from_genome g = self.build_genome(False, template) r = find_swap_region(g, cassette, arm_len, design_primers=True) self.assertEquals(len(r), 1) self.assertEquals(len(r[0].verification_cassette), 5) self.assertEquals(len(r[0].verification_front), 5) self.assertEquals(len(r[0].verification_back), 5) # cassette verification primers should work on unmodified genome for primer in r[0].verification_cassette: p = pcr_from_genome(g, primer['PRIMER_LEFT_SEQUENCE'], primer['PRIMER_RIGHT_SEQUENCE']) self.assertNotEqual(p[0], None) self.assertEquals(p[0].index(middle) >= 0, True) # front verification primers should NOT produce product for primer in r[0].verification_front: p = pcr_from_genome(g, primer['PRIMER_LEFT_SEQUENCE'], primer['PRIMER_RIGHT_SEQUENCE']) self.assertEqual(p[0], None) # back verification primers should NOT produce product for primer in r[0].verification_back: p = pcr_from_genome(g, primer['PRIMER_LEFT_SEQUENCE'], primer['PRIMER_RIGHT_SEQUENCE']) self.assertEqual(p[0], None) # do recombination, then try primers again on modified genome c = recombine(g, cassette, arm_len) for f in c.fragments.all(): try: os.unlink(fragment_fasta_fn(f)) except: pass build_all_genome_dbs(refresh=True) # reload to get blastdb c = Genome.objects.get(pk=c.id) if is_reversed: cassette = str(Seq(cassette).reverse_complement()) # cassette verification primers should work on modified genome, finding cassette for primer in r[0].verification_cassette: p = pcr_from_genome(c, primer['PRIMER_LEFT_SEQUENCE'], primer['PRIMER_RIGHT_SEQUENCE']) self.assertNotEqual(p[0], None) self.assertEquals(p[0].index(cassette) >= 0, True) # front verification primers should find a product including front of cassette for primer in r[0].verification_front: p = pcr_from_genome(c, primer['PRIMER_LEFT_SEQUENCE'], primer['PRIMER_RIGHT_SEQUENCE']) self.assertNotEqual(p[0], None) self.assertEquals(p[0].index(cassette[0:edge.recombine.CHECK_JUNCTION_LEFT_DN]) >= 0, True) # back verification primers should find a product including back of cassette for primer in r[0].verification_back: p = pcr_from_genome(c, primer['PRIMER_LEFT_SEQUENCE'], primer['PRIMER_RIGHT_SEQUENCE']) self.assertNotEqual(p[0], None) self.assertEquals(p[0].index(cassette[-edge.recombine.CHECK_JUNCTION_RIGHT_UP:]) >= 0, True)
def handle(self, *args, **options): build_all_genome_dbs()
def handle(self, *args, **options): if options['refresh']: build_all_genome_dbs(refresh=True) else: build_all_genome_dbs(refresh=False)