示例#1
0
    def build_fragment(self):
        # pre-chunk the fragment sequence at feature start and end locations.
        # there should be no need to further divide any chunk during import.
        break_points = list(set(
            [f[0] for f in self.__features] +
            [f[1] + 1 for f in self.__features]))
        break_points = sorted(break_points)
        chunk_sizes = []
        for i, bp in enumerate(break_points):
            if i == 0:
                if bp > 1:
                    chunk_sizes.append(break_points[i] - 1)
            else:
                chunk_sizes.append(break_points[i] - break_points[i - 1])
        print '%d chunks' % (len(chunk_sizes),)

        new_fragment = Fragment(name=self.__rec.id, circular=False, parent=None, start_chunk=None)
        new_fragment.save()
        new_fragment = new_fragment.indexed_fragment()

        prev = None
        flen = 0
        seqlen = len(self.__sequence)
        for sz in chunk_sizes:
            prev = new_fragment._append_to_fragment(prev, flen, self.__sequence[flen:flen + sz])
            flen += sz
        if flen < seqlen:
            f = new_fragment._append_to_fragment(prev, flen, self.__sequence[flen:seqlen])

        return new_fragment
示例#2
0
    def build_fragment(self):
        # pre-chunk the fragment sequence at feature start and end locations.
        # there should be no need to further divide any chunk during import.
        break_points = list(
            set([f[0] for f in self.__features] +
                [f[1] + 1 for f in self.__features]))
        break_points = sorted(break_points)
        chunk_sizes = []
        for i, bp in enumerate(break_points):
            if i == 0:
                if bp > 1:
                    chunk_sizes.append(break_points[i] - 1)
            else:
                chunk_sizes.append(break_points[i] - break_points[i - 1])
        print '%d chunks' % (len(chunk_sizes), )

        new_fragment = Fragment(name=self.__rec.id,
                                circular=False,
                                parent=None,
                                start_chunk=None)
        new_fragment.save()
        new_fragment = new_fragment.indexed_fragment()

        prev = None
        flen = 0
        seqlen = len(self.__sequence)
        for sz in chunk_sizes:
            prev = new_fragment._append_to_fragment(
                prev, flen, self.__sequence[flen:flen + sz])
            flen += sz
        if flen < seqlen:
            f = new_fragment._append_to_fragment(prev, flen,
                                                 self.__sequence[flen:seqlen])

        return new_fragment
示例#3
0
    def test_finds_genomes_with_specified_fragment_ids(self):
        g1 = Genome(name="Foo")
        g1.save()
        g2 = Genome(name="Bar")
        g2.save()
        f1 = Fragment(circular=True, name="FooF1")
        f1.save()
        f2 = Fragment(circular=True, name="FooF2")
        f2.save()
        f3 = Fragment(circular=True, name="FooF3", parent=f2)
        f3.save()
        Genome_Fragment(genome=g1, fragment=f1, inherited=False).save()
        Genome_Fragment(genome=g1, fragment=f2, inherited=False).save()
        Genome_Fragment(genome=g2, fragment=f1, inherited=True).save()
        Genome_Fragment(genome=g2, fragment=f3, inherited=False).save()

        # no filter, return both genomes
        url = reverse("genome_list")
        res = self.client.get(url)
        self.assertEquals(res.status_code, 200)
        d = json.loads(res.content)
        self.assertCountEqual([g["id"] for g in d], [g1.id, g2.id])

        # looking for f1 and f2
        res = self.client.get("%s?f=%d&f=%d" % (url, f1.id, f2.id))
        self.assertEquals(res.status_code, 200)
        d = json.loads(res.content)
        self.assertCountEqual([g["id"] for g in d], [g1.id])

        # looking for f1 and f3
        res = self.client.get("%s?f=%d&f=%d" % (url, f1.id, f3.id))
        self.assertEquals(res.status_code, 200)
        d = json.loads(res.content)
        self.assertCountEqual([g["id"] for g in d], [g2.id])

        # looking for f2 and f3
        res = self.client.get("%s?f=%d&f=%d" % (url, f2.id, f3.id))
        self.assertEquals(res.status_code, 200)
        d = json.loads(res.content)
        self.assertEquals(d, [])

        # looking for f1
        res = self.client.get("%s?f=%d" % (
            url,
            f1.id,
        ))
        self.assertEquals(res.status_code, 200)
        d = json.loads(res.content)
        self.assertEquals(d, [])

        # bad input, return []
        res = self.client.get("%s?f=[1,2,3]" % url)
        self.assertEquals(res.status_code, 200)
        d = json.loads(res.content)
        self.assertEquals(d, [])
示例#4
0
文件: test_views.py 项目: Chris7/edge
    def test_finds_genomes_with_specified_fragment_ids(self):
        from edge.models import Genome, Fragment, Genome_Fragment

        g1 = Genome(name='Foo')
        g1.save()
        g2 = Genome(name='Bar')
        g2.save()
        f1 = Fragment(circular=True, name='FooF1')
        f1.save()
        f2 = Fragment(circular=True, name='FooF2')
        f2.save()
        f3 = Fragment(circular=True, name='FooF3', parent=f2)
        f3.save()
        Genome_Fragment(genome=g1, fragment=f1, inherited=False).save()
        Genome_Fragment(genome=g1, fragment=f2, inherited=False).save()
        Genome_Fragment(genome=g2, fragment=f1, inherited=True).save()
        Genome_Fragment(genome=g2, fragment=f3, inherited=False).save()

        # no filter, return both genomes
        res = self.client.get('/edge/genomes/')
        self.assertEquals(res.status_code, 200)
        d = json.loads(res.content)
        self.assertItemsEqual([g['id'] for g in d], [g1.id, g2.id])

        # looking for f1 and f2
        res = self.client.get('/edge/genomes/?f=%d&f=%d' % (f1.id, f2.id))
        self.assertEquals(res.status_code, 200)
        d = json.loads(res.content)
        self.assertItemsEqual([g['id'] for g in d], [g1.id])

        # looking for f1 and f3
        res = self.client.get('/edge/genomes/?f=%d&f=%d' % (f1.id, f3.id))
        self.assertEquals(res.status_code, 200)
        d = json.loads(res.content)
        self.assertItemsEqual([g['id'] for g in d], [g2.id])

        # looking for f2 and f3
        res = self.client.get('/edge/genomes/?f=%d&f=%d' % (f2.id, f3.id))
        self.assertEquals(res.status_code, 200)
        d = json.loads(res.content)
        self.assertEquals(d, [])

        # looking for f1
        res = self.client.get('/edge/genomes/?f=%d' % (f1.id,))
        self.assertEquals(res.status_code, 200)
        d = json.loads(res.content)
        self.assertEquals(d, [])

        # bad input, return []
        res = self.client.get('/edge/genomes/?f=[1,2,3]')
        self.assertEquals(res.status_code, 200)
        d = json.loads(res.content)
        self.assertEquals(d, [])
示例#5
0
    def test_finds_genomes_with_specified_fragment_ids(self):
        g1 = Genome(name='Foo')
        g1.save()
        g2 = Genome(name='Bar')
        g2.save()
        f1 = Fragment(circular=True, name='FooF1')
        f1.save()
        f2 = Fragment(circular=True, name='FooF2')
        f2.save()
        f3 = Fragment(circular=True, name='FooF3', parent=f2)
        f3.save()
        Genome_Fragment(genome=g1, fragment=f1, inherited=False).save()
        Genome_Fragment(genome=g1, fragment=f2, inherited=False).save()
        Genome_Fragment(genome=g2, fragment=f1, inherited=True).save()
        Genome_Fragment(genome=g2, fragment=f3, inherited=False).save()

        # no filter, return both genomes
        res = self.client.get('/edge/genomes/')
        self.assertEquals(res.status_code, 200)
        d = json.loads(res.content)
        self.assertItemsEqual([g['id'] for g in d], [g1.id, g2.id])

        # looking for f1 and f2
        res = self.client.get('/edge/genomes/?f=%d&f=%d' % (f1.id, f2.id))
        self.assertEquals(res.status_code, 200)
        d = json.loads(res.content)
        self.assertItemsEqual([g['id'] for g in d], [g1.id])

        # looking for f1 and f3
        res = self.client.get('/edge/genomes/?f=%d&f=%d' % (f1.id, f3.id))
        self.assertEquals(res.status_code, 200)
        d = json.loads(res.content)
        self.assertItemsEqual([g['id'] for g in d], [g2.id])

        # looking for f2 and f3
        res = self.client.get('/edge/genomes/?f=%d&f=%d' % (f2.id, f3.id))
        self.assertEquals(res.status_code, 200)
        d = json.loads(res.content)
        self.assertEquals(d, [])

        # looking for f1
        res = self.client.get('/edge/genomes/?f=%d' % (f1.id, ))
        self.assertEquals(res.status_code, 200)
        d = json.loads(res.content)
        self.assertEquals(d, [])

        # bad input, return []
        res = self.client.get('/edge/genomes/?f=[1,2,3]')
        self.assertEquals(res.status_code, 200)
        d = json.loads(res.content)
        self.assertEquals(d, [])
示例#6
0
    def build_fragment(self, reference_based=True, dirn='.'):
        # pre-chunk the fragment sequence at feature start and end locations.
        # there should be no need to further divide any chunk during import.
        starts_and_ends = []
        for feature in self.__features:
            name = feature[2]
            starts_and_ends.append(feature[0])
            starts_and_ends.append(feature[1] + 1)
            for subfeature in self.__subfeatures_dict[name]:
                starts_and_ends.append(subfeature[0])
                starts_and_ends.append(subfeature[1] + 1)
        break_points = sorted(list(set(starts_and_ends)))

        cur_len = 0
        chunk_sizes = []
        for i, bp in enumerate(break_points):
            if i == 0:
                if bp > 1:
                    chunk_sizes.append(break_points[i] - 1)
                    cur_len += chunk_sizes[-1]
            else:
                chunk_sizes.append(break_points[i] - break_points[i - 1])
                cur_len += chunk_sizes[-1]

        if cur_len < self.__seqlen:
            chunk_sizes.append(self.__seqlen - cur_len)

        fragment_circular = False
        for feature in self.__rec.features:
            # skip features that cover the entire sequence
            if feature.type.upper() in ['REGION', 'CHR', 'CHROM', 'CHROMOSOME']:
                if 'Is_circular' in feature.qualifiers:
                    fragment_circular = feature.qualifiers['Is_circular'][0].upper() == 'TRUE'
                break

        new_fragment = Fragment(
            name=self.__rec.id, circular=fragment_circular, parent=None, start_chunk=None
        )
        new_fragment.save()
        print("Fragment %s" % (new_fragment.id))
        new_fragment = new_fragment.indexed_fragment()

        if reference_based:
            print("%d chunks" % (len(chunk_sizes),))
            t0 = time.time()
            Chunk.CHUNK_REFERENCE_CLASS.generate_from_fragment(
                new_fragment, str(self.__rec.seq), dirn=dirn
            )
            print("Reference file generation took %s seconds" % (time.time() - t0))

            new_fragment._bulk_create_fragment_chunks(chunk_sizes)
            return new_fragment

        # divide chunks bigger than a certain threshold to smaller chunks, to
        # allow insertion of sequence into database. e.g. MySQL has a packet
        # size that prevents chunks that are too large from being inserted.
        chunk_size_limit = 1000000
        new_chunk_sizes = []
        for original_chunk_size in chunk_sizes:
            if original_chunk_size < chunk_size_limit:
                new_chunk_sizes.append(original_chunk_size)
            else:
                divided_chunks = []
                while original_chunk_size > 0:
                    divided_chunks.append(min(original_chunk_size, chunk_size_limit))
                    original_chunk_size -= chunk_size_limit
                new_chunk_sizes.extend(divided_chunks)
        chunk_sizes = new_chunk_sizes
        print("%d chunks" % (len(chunk_sizes),))

        prev = None
        fragment_len = 0
        for chunk_size in chunk_sizes:
            t0 = time.time()
            prev = new_fragment._append_to_fragment(
                prev,
                fragment_len,
                str(self.__rec.seq[fragment_len : fragment_len + chunk_size]),
            )
            fragment_len += chunk_size
            print("add chunk to fragment: %.4f\r" % (time.time() - t0,), end="")

        return new_fragment