Exemplo n.º 1
0
    def test_annotate_variants(self):
        """
        Annotate a file with observation frequencies.

        We first import a subset of the observations with coverage.
        """
        with self.fixture.data(AnnotationData, CoverageData, DataSourceData, VariationData) as data:
            coverage = Coverage.query.get(
                data.CoverageData.exome_subset_coverage.id)
            result = tasks.import_coverage.delay(coverage.id)
            assert coverage.task_done

            variation = Variation.query.get(
                data.VariationData.exome_subset_variation.id)
            result = tasks.import_variation.delay(variation.id)
            assert variation.task_done

            variation.sample.active = True

            annotation = Annotation.query.get(data.AnnotationData.exome_annotation.id)
            query = Query('GLOBAL', expressions.parse('*'))
            db.session.add(query)
            annotation.queries = [query]

            db.session.commit()

            data_source = DataSource.query.get(
                data.DataSourceData.exome_variation.id)
            annotated_file = StringIO.StringIO()

            with data_source.data() as data:
                checksum, records = utils.digest(data)

            with data_source.data() as data:
                tasks.annotate_variants(data, annotated_file,
                                        original_filetype=data_source.filetype,
                                        annotated_filetype='vcf',
                                        original_records=records,
                                        queries=[query])

            lines = annotated_file.getvalue().split('\n')
            reader = vcf.Reader(lines)

            assert_equal([(record.INFO['GLOBAL_VN'], record.INFO['GLOBAL_VF']) for record in reader],
                         [([1], [1.0]),
                          ([1], [1.0]),
                          ([1], [1.0]),
                          ([0], [0.0]),
                          ([0], [0.0]),
                          ([0], [0.0]),
                          ([1], [1.0]),
                          ([1], [1.0]),
                          ([1], [1.0]),
                          ([1], [1.0]),
                          ([1], [1.0]),
                          ([1, 1], [1.0, 0.0]),
                          ([0], [0.0]),
                          ([1], [0.0]),
                          ([1], [1.0]),
                          ([1], [1.0])])
Exemplo n.º 2
0
    def test_annotate_variants_multi_one_sample_one_sample(self):
        """
        Annotate a file with observation frequencies against multiple samples
        querying one and querying one.

        We first import two subsets of the observations with coverage.
        """
        with self.fixture.data(AnnotationData, CoverageData, DataSourceData, VariationData) as data:
            coverage = Coverage.query.get(
                data.CoverageData.exome_subset_coverage.id)
            result = tasks.import_coverage.delay(coverage.id)
            assert coverage.task_done

            variation = Variation.query.get(
                data.VariationData.exome_subset_variation.id)
            result = tasks.import_variation.delay(variation.id)
            assert variation.task_done

            sample_a = variation.sample

            coverage = Coverage.query.get(
                data.CoverageData.exome_subsubset_coverage.id)
            result = tasks.import_coverage.delay(coverage.id)
            assert coverage.task_done

            variation = Variation.query.get(
                data.VariationData.exome_subsubset_variation.id)
            result = tasks.import_variation.delay(variation.id)
            assert variation.task_done

            sample_b = variation.sample

            sample_a.active = True
            sample_b.active = True

            annotation = Annotation.query.get(data.AnnotationData.exome_annotation.id)

            query_a = Query('QA', expressions.parse('sample:%i' %
                                                    data.SampleData.exome_subset_sample.id))
            db.session.add(query_a)

            query_b = Query('QB', expressions.parse('sample:%i' %
                                                    data.SampleData.exome_subsubset_sample.id))
            db.session.add(query_b)
            annotation.queries = [query_a, query_b]

            db.session.commit()

            data_source = DataSource.query.get(
                data.DataSourceData.exome_variation.id)
            annotated_file = StringIO.StringIO()

            with data_source.data() as data:
                checksum, records = utils.digest(data)

            with data_source.data() as data:
                tasks.annotate_variants(data, annotated_file,
                                        original_filetype=data_source.filetype,
                                        annotated_filetype='vcf',
                                        original_records=records,
                                        queries=[query_a, query_b])

            lines = annotated_file.getvalue().split('\n')
            reader = vcf.Reader(lines)

            assert_equal([(record.INFO['QA_VN'], record.INFO['QA_VF']) for record in reader],
                         [([1], [1.0]),
                          ([1], [1.0]),
                          ([1], [1.0]),
                          ([0], [0.0]),
                          ([0], [0.0]),
                          ([0], [0.0]),
                          ([1], [1.0]),
                          ([1], [1.0]),
                          ([1], [1.0]),
                          ([1], [1.0]),
                          ([1], [1.0]),
                          ([1, 1], [1.0, 0.0]),
                          ([0], [0.0]),
                          ([1], [0.0]),
                          ([1], [1.0]),
                          ([1], [1.0])])

            reader = vcf.Reader(lines)

            assert_equal([(record.INFO['QB_VN'], record.INFO['QB_VF']) for record in reader],
                         [([1], [1.0]),
                          ([1], [0.0]),
                          ([1], [1.0]),
                          ([0], [0.0]),
                          ([0], [0.0]),
                          ([0], [0.0]),
                          ([1], [1.0]),
                          ([0], [0.0]),
                          ([1], [1.0]),
                          ([1], [0.0]),
                          ([1], [1.0]),
                          ([1, 1], [1.0, 0.0]),
                          ([0], [0.0]),
                          ([1], [0.0]),
                          ([1], [0.0]),
                          ([1], [0.0])])