Пример #1
0
def get_bx_ids():
    # Get all bx_ids present in source files organized by source
    bx_ids = {}

    files = get_reports_files(ARGS.ready_input_dir)

    for file in files:
        file_path = abspath(ARGS.ready_input_dir + file)
        if file_path.endswith('.tsv'):
            source = "ENIGMA"
            bx_ids[source] = []
            tsv_file = csv.DictReader(open(file_path, "r"), delimiter='\t')
            for report in tsv_file:
                ids = map(int, report['BX_ID'].split(','))
                bx_ids[source] = bx_ids[source] + ids
        else:
            suffix = '.vcf'
            source = file[:(len(file)-len(suffix))]
            bx_ids[source] = []
            vcf_reader = vcf.Reader(open(file_path, 'r'), strict_whitespace=True)
            try:
                for record in vcf_reader:
                    ids = map(int, record.INFO['BX_ID'])
                    bx_ids[source] = bx_ids[source] + ids
            except ValueError as e:
                print e

    return bx_ids
Пример #2
0
    def test_aggregate_reports_maintains_proper_variant_effect_lovd_formatting(self):
        LOVD_reports_file = [INPUT_DIRECTORY + r for r in aggregate_reports.get_reports_files(INPUT_DIRECTORY) if r == 'LOVD.vcf']
        reports = aggregate_reports.aggregate_reports(LOVD_reports_file, self.columns, self.genome_regions_symbol_dict)

        # Check that two of each source are present
        variant_effect_lovd_index = self.columns.index("Variant_effect_LOVD")

        for variant in reports:
            self.assertIn(variant[variant_effect_lovd_index][0], ['?/.', '+/+'])
    def test_aggregate_reports_maintains_proper_variant_effect_lovd_formatting(self):
        LOVD_reports_file = [INPUT_DIRECTORY + r for r in aggregate_reports.get_reports_files(INPUT_DIRECTORY) if r == 'LOVD.vcf']
        reports = aggregate_reports.aggregate_reports(LOVD_reports_file, self.columns)

        # Check that two of each source are present
        variant_effect_lovd_index = self.columns.index("Variant_effect_LOVD")

        for variant in reports:
            self.assertIn(variant[variant_effect_lovd_index][0], ['?/.', '+/+'])
Пример #4
0
    def test_aggregate_reports(self):
        reports_files = [INPUT_DIRECTORY + r for r in aggregate_reports.get_reports_files(INPUT_DIRECTORY)]
        reports = aggregate_reports.aggregate_reports(reports_files, self.columns, self.genome_regions_symbol_dict)

        # Each test file contains two reports, check that all reports are present
        self.assertEqual(len(reports), (len(self.sources) * 2))

        # Check that two of each source are present
        source_reports = {}
        source_column_index = self.columns.index("Source")
        for report in reports:
            source = report[source_column_index]
            if source not in source_reports:
                source_reports[source] = 1
            else:
                source_reports[source] += 1
        for source in self.sources:
            self.assertEqual(source_reports[source], 2)
    def test_aggregate_reports(self):
        reports_files = [INPUT_DIRECTORY + r for r in aggregate_reports.get_reports_files(INPUT_DIRECTORY)]
        reports = aggregate_reports.aggregate_reports(reports_files, self.columns)

        # Each test file contains two reports, check that all reports are present
        self.assertEqual(len(reports), (len(self.sources) * 2))

        # Check that two of each source are present
        source_reports = {}
        source_column_index = self.columns.index("Source")
        for report in reports:
            source = report[source_column_index]
            if source not in source_reports:
                source_reports[source] = 1
            else:
                source_reports[source] += 1
        for source in self.sources:
            self.assertEqual(source_reports[source], 2)
Пример #6
0
 def test_get_reports_files(self):
     reports_files = aggregate_reports.get_reports_files(INPUT_DIRECTORY)
     self.assertEqual(len(reports_files), 9)
     self.assertNotIn("1000_Genomesready.vcf", reports_files)
 def test_get_reports_files(self):
     reports_files = aggregate_reports.get_reports_files(INPUT_DIRECTORY)
     self.assertEqual(len(reports_files), 9)
     self.assertNotIn("1000_Genomesready.vcf", reports_files)