Пример #1
0
def main(input_filepath, output_mappings_filepath, output_curation_filepath,
         filters, zooma_host, oxo_target_list, oxo_distance):
    logger.info('Started parsing trait names')
    trait_names_list = parse_trait_names(input_filepath)
    trait_names_counter = Counter(trait_names_list)
    logger.info("Loaded {} trait names".format(len(trait_names_counter)))

    with open(output_mappings_filepath, "w", newline='') as mapping_file, \
            open(output_curation_filepath, "wt") as curation_file:
        mapping_writer = csv.writer(mapping_file, delimiter="\t")
        mapping_writer.writerow(["#clinvar_trait_name", "uri", "label"])
        curation_writer = csv.writer(curation_file, delimiter="\t")

        logger.info('Processing trait names in parallel')
        trait_list = [
            Trait(trait_name, freq)
            for trait_name, freq in trait_names_counter.items()
        ]
        trait_process_pool = multiprocessing.Pool(processes=12)

        processed_trait_list = [
            trait_process_pool.apply(process_trait,
                                     args=(trait, filters, zooma_host,
                                           oxo_target_list, oxo_distance))
            for trait in trait_list
        ]

        for trait in processed_trait_list:
            output_trait(trait, mapping_writer, curation_writer)

    logger.info('Finished processing trait names')
Пример #2
0
def main(input_filepath, output_mappings_filepath, output_curation_filepath, filters, zooma_host, oxo_target_list,
         oxo_distance):
    logger.info('Started parsing trait names')
    trait_list = parse_trait_names(input_filepath)
    logger.info("Loaded {} trait names".format(len(trait_list)))

    with open(output_mappings_filepath, "w", newline='') as mapping_file, \
            open(output_curation_filepath, "wt") as curation_file:
        mapping_writer = csv.writer(mapping_file, delimiter="\t")
        mapping_writer.writerow(["#clinvar_trait_name", "uri", "label"])
        curation_writer = csv.writer(curation_file, delimiter="\t")

        logger.info('Processing trait names in parallel')
        trait_process_pool = multiprocessing.Pool(processes=24)
        processed_trait_list = [
            trait_process_pool.apply(
                process_trait,
                args=(trait, filters, zooma_host, oxo_target_list, oxo_distance)
            )
            for trait in trait_list
        ]

        logger.info('Writing output with the processed traits')
        for trait in processed_trait_list:
            # Remove non-specific trait names which should never be output
            if trait.name.lower() not in ClinVarTrait.NONSPECIFIC_TRAITS:
                output_trait(trait, mapping_writer, curation_writer)

    logger.info('Finished processing trait names')
 def test_trait_names_parsing(self):
     # Test file contains two records: one with a Pathogenic variant another with a Benign one. Trait names
     # from *both* records must be parsed and returned.
     test_filename = os.path.join(
         os.path.dirname(__file__),
         '../evidence_string_generation/resources/test_clinvar_record.xml.gz'
     )
     trait_names = [
         trait.name
         for trait in trait_names_parsing.parse_trait_names(test_filename)
     ]
     self.assertEqual(trait_names, ['leber congenital amaurosis 13'])
 def test_trait_names_parsing(self):
     # Test file contains two records: one with a Pathogenic variant another with a Benign one. Only trait names
     # from the Pathogenic one should get into the
     test_filename = os.path.join(os.path.dirname(__file__),
                                  'resources/variant_summary.tsv.gz')
     trait_names = [
         trait.name
         for trait in trait_names_parsing.parse_trait_names(test_filename)
     ]
     self.assertEqual(
         sorted(trait_names),
         sorted([
             'breast-ovarian cancer, familial 1',
             'hereditary breast and ovarian cancer syndrome',
             'hereditary cancer-predisposing syndrome', 'not provided'
         ]))
Пример #5
0
def main(input_filepath, output_mappings_filepath, output_curation_filepath, filters, zooma_host,
         oxo_target_list, oxo_distance):
    trait_names_list = parse_trait_names(input_filepath)
    trait_names_counter = Counter(trait_names_list)

    with open(output_mappings_filepath, "w", newline='') as mapping_file, \
            open(output_curation_filepath, "wt") as curation_file:
        mapping_writer = csv.writer(mapping_file, delimiter="\t")
        mapping_writer.writerow(["#clinvar_trait_name", "uri", "label"])
        curation_writer = csv.writer(curation_file, delimiter="\t")

        bar = progressbar.ProgressBar(max_value=len(trait_names_counter),
                                      widgets=[progressbar.AdaptiveETA(samples=1000)])

        for trait_name, freq in bar(trait_names_counter.items()):
            trait = Trait(trait_name, freq)
            trait = process_trait(trait, filters, zooma_host, oxo_target_list,
                                  oxo_distance)
            output_trait(trait, mapping_writer, curation_writer)
Пример #6
0
def main(input_filepath, output_mappings_filepath, output_curation_filepath,
         filters, zooma_host, oxo_target_list, oxo_distance):
    trait_names_list = parse_trait_names(input_filepath)
    trait_names_counter = Counter(trait_names_list)

    with open(output_mappings_filepath, "w", newline='') as mapping_file, \
            open(output_curation_filepath, "wt") as curation_file:
        mapping_writer = csv.writer(mapping_file, delimiter="\t")
        mapping_writer.writerow(["#clinvar_trait_name", "uri", "label"])
        curation_writer = csv.writer(curation_file, delimiter="\t")

        bar = progressbar.ProgressBar(
            max_value=len(trait_names_counter),
            widgets=[progressbar.AdaptiveETA(samples=1000)])

        for trait_name, freq in bar(trait_names_counter.items()):
            trait = Trait(trait_name, freq)
            trait = process_trait(trait, filters, zooma_host, oxo_target_list,
                                  oxo_distance)
            output_trait(trait, mapping_writer, curation_writer)