Пример #1
0
def functional_table():
    form = FunctionClassFilterForm()
    form.function_class.choices = [('cog', 'Cog'),
                    ('pfam', 'Pfam'),
                    ('tigrfam', 'TigrFam'),
                    ('all', 'All')
                ]

    form.select_sample_groups.choices = [(sample_set.name, sample_set.name) for sample_set in  SampleSet.query.all()]

    type_identifiers = []
    if form.validate_on_submit():
        function_class = form.function_class.data
        if function_class == 'all':
            function_class = None
        limit = form.limit.data
        if limit == 'all':
            limit = None
        else:
            limit = int(limit)

        filter_alternative = form.filter_alternative.data
        if filter_alternative == 'filter_with_type_identifiers':
            for type_identifier in form.type_identifiers.entries:
                if type_identifier.data != '':
                    type_identifiers.append(type_identifier.data)
        elif filter_alternative == 'filter_with_search':
            search_string = form.search_annotations
            if search_string.data != '':
                q = _search_query(search_string.data)
                type_identifiers = [a.type_identifier for a in q.all()]


        sample_sets = form.select_sample_groups.data
        if len(sample_sets) > 0:
            samples = [sample.scilifelab_code for sample in Sample.all_from_sample_sets(sample_sets)]
        else:
            samples = None

        download_action = False
        if form.submit_download.data:
            download_action = True
            download_select = form.download_select.data
    else:
        function_class=None
        limit=20
        samples = None
        download_action = False

    if len(form.type_identifiers) == 0:
        form.type_identifiers.append_entry()

    if type_identifiers == []:
        type_identifiers = None

    samples, table = Annotation.rpkm_table(limit=limit, samples=samples, function_class=function_class, type_identifiers=type_identifiers)
    samples = sorted(samples, key=lambda x: x.scilifelab_code)
    sample_scilifelab_codes = [sample.scilifelab_code for sample in samples]
    if download_action:
        if download_select == 'Gene List':
            # Fetch all contributing genes for all the annotations in the table
            annotation_ids = [annotation.id for annotation, sample in table.items()]
            genes_per_annotation = Annotation.genes_per_annotation(annotation_ids)
            csv_output = '\n'.join(
                    [','.join([gene.name, annotation.type_identifier]) \
                            for gene, annotation in genes_per_annotation])
            r = make_response(csv_output)
            r.headers["Content-Disposition"] = "attachment; filename=gene_list.csv"
            r.headers["Content-Type"] = "text/csv"
            return r
    return render_template('functional_table.html',
            table=table,
            samples=samples,
            sample_scilifelab_codes = sample_scilifelab_codes,
            form=form
        )
Пример #2
0
    def test_annotation_rpkm_table(self):
        annotation_types = [("Cog", {'class': Cog}),
                ("Pfam", {'class': Pfam}),
                ("TigrFam", {'class': TigrFam}),
                ("EcNumber", {'class': EcNumber})]

        nr_annotation_types = len(annotation_types)
        annotation_sources = {}
        for annotation_type, type_d in annotation_types:
            annotation_sources[annotation_type]= AnnotationSource(
                    annotation_type,
                    "v1.0",
                    "rpsblast",
                    "e_value=0.000001"
                )

        sample1 = Sample("P1993_101", None, None)
        sample2 = Sample("P1993_102", None, None)
        nr_samples = 2
        for i in range(50):
            gene1 = Gene("gene1{}".format(i), None)
            gene2 = Gene("gene2{}".format(i), None)

            gene_count1 = GeneCount(gene1, sample1, 0.001)
            gene_count2 = GeneCount(gene1, sample2, 0.01)
            gene_count3 = GeneCount(gene2, sample1, 0.002)
            gene_count4 = GeneCount(gene2, sample2, 0.02)

            for annotation_type, type_d in annotation_types:
                if annotation_type == 'Cog':
                    type_id = str(i)
                    type_id = "0"*(4-len(type_id))+type_id
                    annotation = type_d['class'](annotation_type.upper() + type_id, "H")
                elif annotation_type == 'EcNumber':
                    if i > 25:
                        type_id = "0.0.2.{}".format(i)
                    else:
                        type_id = "0.0.0.{}".format(i)
                    annotation = type_d['class'](type_id)
                else:
                    type_id = str(i)
                    type_id = "0"*(4-len(type_id))+type_id
                    annotation = type_d['class'](annotation_type.upper() + type_id)

                annotation_mode = i % 3
                gene_annotations = []
                if annotation_mode in [0,1]:
                    gene_annotations.append(GeneAnnotation(
                            annotation,
                            gene1,
                            annotation_sources[annotation_type]
                        ))
                if annotation_mode in [1,2]:
                    gene_annotations.append(GeneAnnotation(
                            annotation,
                            gene2,
                            annotation_sources[annotation_type]
                        ))
                self.session.add_all(gene_annotations)

            self.session.add(gene1)
            self.session.add(gene2)
        self.session.commit()
        refresh_all_mat_views()
        samples, rows = Annotation.rpkm_table()
        assert len(samples) == 2
        assert len(rows) == 20 # Default limit
        samples, rows = Annotation.rpkm_table(limit=100)
        assert len(samples) == 2
        assert len(rows) == 100
        samples, rows = Annotation.rpkm_table(limit=None)
        assert len(samples) == 2
        assert len(rows) == nr_annotation_types * 50

        for annotation, sample_d in rows.items():
            # sample_d should be a ordered dict
            assert ["P1993_101", "P1993_102"] == [sample.scilifelab_code for sample in sample_d.keys()]
        rpkms = [[rpkm for sample, rpkm in sample_d.items()] for annotation, sample_d in rows.items()]

        rpkms_flat = []
        for rpkm_row in rpkms:
            rpkms_flat += rpkm_row

        assert len(rpkms_flat) == nr_annotation_types * nr_samples * 50

        # Annotations sorted by total rpkm over all samples
        # and the rpkm values should be summed over all genes for that annotation
        # there should be roughly equal numbers of the three different counts
        for i, row in enumerate(rpkms[:67]):
            assert row == [0.003, 0.03]
        for row in rpkms[69:130]:
            assert row == [0.002, 0.02]
        for row in rpkms[150:200]:
            assert row == [0.001, 0.01]

        # possible to filter on function classes
        for annotation_type, type_d in annotation_types:
            samples, rows = Annotation.rpkm_table(limit=None, function_class=annotation_type.lower())
            assert len(rows) == 50
            for key in rows.keys():
                assert annotation_type[:3].lower() == key.annotation_type[:3]

        # possible to filter on samples
        for sample in [sample1, sample2]:
            samples, rows = Annotation.rpkm_table(samples=[sample.scilifelab_code], limit=None)
            assert len(rows) == 200
            assert len(samples) == 1
            assert samples[0] == sample
            for annotation, sample_d in rows.items():
                assert list(sample_d.keys()) == [sample]

            rpkms = [[rpkm for sample, rpkm in sample_d.items()] for annotation, sample_d in rows.items()]
            if sample.scilifelab_code == "P1993_101":
                for i, row in enumerate(rpkms[:65]):
                    assert row == [0.003]
                for row in rpkms[69:130]:
                    assert row == [0.002]
                for row in rpkms[150:200]:
                    assert row == [0.001]
            else:
                for row in rpkms[:67]:
                    assert row == [0.03]
                for row in rpkms[69:130]:
                    assert row == [0.02]
                for row in rpkms[150:200]:
                    assert row == [0.01]

        # possible to filter on sample and function class at the same time
        for annotation_type, type_d in annotation_types:
            for sample in [sample1, sample2]:
                samples, rows = Annotation.rpkm_table(limit=None, function_class=annotation_type.lower(), samples=[sample.scilifelab_code])
                assert len(rows) == 50
                for key in rows.keys():
                    assert annotation_type.lower()[:3] == key.annotation_type[:3]

                assert len(samples) == 1
                assert samples[0] == sample
                for annotation, sample_d in rows.items():
                    assert list(sample_d.keys()) == [sample]

                rpkms = [[rpkm for sample, rpkm in sample_d.items()] for annotation, sample_d in rows.items()]
                if sample.scilifelab_code == "P1993_101":
                    for row in rpkms[:9]:
                        assert row == [0.003]
                    for row in rpkms[19:29]:
                        assert row == [0.002]
                    for row in rpkms[39:]:
                        assert row == [0.001]
                else:
                    for row in rpkms[:9]:
                        assert row == [0.03]
                    for row in rpkms[19:29]:
                        assert row == [0.02]
                    for row in rpkms[39:]:
                        assert row == [0.01]

        # possible to filter on individual annotations
        annotation_ids = ["COG0001", "TIGRFAM0004", "COG0003", "PFAM0002", "0.0.2.26"]

        for r in range(5):
            for type_identifiers in itertools.combinations(annotation_ids, r+1):

                samples, rows = Annotation.rpkm_table(limit=None, type_identifiers=list(type_identifiers))
                assert len(samples) == 2
                assert len(rows) == len(type_identifiers)
                assert set([key.type_identifier for key in rows.keys()]) == set(type_identifiers)