def test_annotate_variants(self): """ Annotate a file with observation frequencies. We first import a subset of the observations with coverage. """ with self.fixture.data(AnnotationData, CoverageData, DataSourceData, VariationData) as data: coverage = Coverage.query.get( data.CoverageData.exome_subset_coverage.id) result = tasks.import_coverage.delay(coverage.id) assert coverage.task_done variation = Variation.query.get( data.VariationData.exome_subset_variation.id) result = tasks.import_variation.delay(variation.id) assert variation.task_done variation.sample.active = True annotation = Annotation.query.get(data.AnnotationData.exome_annotation.id) query = Query('GLOBAL', expressions.parse('*')) db.session.add(query) annotation.queries = [query] db.session.commit() data_source = DataSource.query.get( data.DataSourceData.exome_variation.id) annotated_file = StringIO.StringIO() with data_source.data() as data: checksum, records = utils.digest(data) with data_source.data() as data: tasks.annotate_variants(data, annotated_file, original_filetype=data_source.filetype, annotated_filetype='vcf', original_records=records, queries=[query]) lines = annotated_file.getvalue().split('\n') reader = vcf.Reader(lines) assert_equal([(record.INFO['GLOBAL_VN'], record.INFO['GLOBAL_VF']) for record in reader], [([1], [1.0]), ([1], [1.0]), ([1], [1.0]), ([0], [0.0]), ([0], [0.0]), ([0], [0.0]), ([1], [1.0]), ([1], [1.0]), ([1], [1.0]), ([1], [1.0]), ([1], [1.0]), ([1, 1], [1.0, 0.0]), ([0], [0.0]), ([1], [0.0]), ([1], [1.0]), ([1], [1.0])])
def test_annotate_variants_multi_one_sample_one_sample(self): """ Annotate a file with observation frequencies against multiple samples querying one and querying one. We first import two subsets of the observations with coverage. """ with self.fixture.data(AnnotationData, CoverageData, DataSourceData, VariationData) as data: coverage = Coverage.query.get( data.CoverageData.exome_subset_coverage.id) result = tasks.import_coverage.delay(coverage.id) assert coverage.task_done variation = Variation.query.get( data.VariationData.exome_subset_variation.id) result = tasks.import_variation.delay(variation.id) assert variation.task_done sample_a = variation.sample coverage = Coverage.query.get( data.CoverageData.exome_subsubset_coverage.id) result = tasks.import_coverage.delay(coverage.id) assert coverage.task_done variation = Variation.query.get( data.VariationData.exome_subsubset_variation.id) result = tasks.import_variation.delay(variation.id) assert variation.task_done sample_b = variation.sample sample_a.active = True sample_b.active = True annotation = Annotation.query.get(data.AnnotationData.exome_annotation.id) query_a = Query('QA', expressions.parse('sample:%i' % data.SampleData.exome_subset_sample.id)) db.session.add(query_a) query_b = Query('QB', expressions.parse('sample:%i' % data.SampleData.exome_subsubset_sample.id)) db.session.add(query_b) annotation.queries = [query_a, query_b] db.session.commit() data_source = DataSource.query.get( data.DataSourceData.exome_variation.id) annotated_file = StringIO.StringIO() with data_source.data() as data: checksum, records = utils.digest(data) with data_source.data() as data: tasks.annotate_variants(data, annotated_file, original_filetype=data_source.filetype, annotated_filetype='vcf', original_records=records, queries=[query_a, query_b]) lines = annotated_file.getvalue().split('\n') reader = vcf.Reader(lines) assert_equal([(record.INFO['QA_VN'], record.INFO['QA_VF']) for record in reader], [([1], [1.0]), ([1], [1.0]), ([1], [1.0]), ([0], [0.0]), ([0], [0.0]), ([0], [0.0]), ([1], [1.0]), ([1], [1.0]), ([1], [1.0]), ([1], [1.0]), ([1], [1.0]), ([1, 1], [1.0, 0.0]), ([0], [0.0]), ([1], [0.0]), ([1], [1.0]), ([1], [1.0])]) reader = vcf.Reader(lines) assert_equal([(record.INFO['QB_VN'], record.INFO['QB_VF']) for record in reader], [([1], [1.0]), ([1], [0.0]), ([1], [1.0]), ([0], [0.0]), ([0], [0.0]), ([0], [0.0]), ([1], [1.0]), ([0], [0.0]), ([1], [1.0]), ([1], [0.0]), ([1], [1.0]), ([1, 1], [1.0, 0.0]), ([0], [0.0]), ([1], [0.0]), ([1], [0.0]), ([1], [0.0])])