def main(input_filepath, output_mappings_filepath, output_curation_filepath, filters, zooma_host, oxo_target_list, oxo_distance): logger.info('Started parsing trait names') trait_names_list = parse_trait_names(input_filepath) trait_names_counter = Counter(trait_names_list) logger.info("Loaded {} trait names".format(len(trait_names_counter))) with open(output_mappings_filepath, "w", newline='') as mapping_file, \ open(output_curation_filepath, "wt") as curation_file: mapping_writer = csv.writer(mapping_file, delimiter="\t") mapping_writer.writerow(["#clinvar_trait_name", "uri", "label"]) curation_writer = csv.writer(curation_file, delimiter="\t") logger.info('Processing trait names in parallel') trait_list = [ Trait(trait_name, freq) for trait_name, freq in trait_names_counter.items() ] trait_process_pool = multiprocessing.Pool(processes=12) processed_trait_list = [ trait_process_pool.apply(process_trait, args=(trait, filters, zooma_host, oxo_target_list, oxo_distance)) for trait in trait_list ] for trait in processed_trait_list: output_trait(trait, mapping_writer, curation_writer) logger.info('Finished processing trait names')
def main(input_filepath, output_mappings_filepath, output_curation_filepath, filters, zooma_host, oxo_target_list, oxo_distance): logger.info('Started parsing trait names') trait_list = parse_trait_names(input_filepath) logger.info("Loaded {} trait names".format(len(trait_list))) with open(output_mappings_filepath, "w", newline='') as mapping_file, \ open(output_curation_filepath, "wt") as curation_file: mapping_writer = csv.writer(mapping_file, delimiter="\t") mapping_writer.writerow(["#clinvar_trait_name", "uri", "label"]) curation_writer = csv.writer(curation_file, delimiter="\t") logger.info('Processing trait names in parallel') trait_process_pool = multiprocessing.Pool(processes=24) processed_trait_list = [ trait_process_pool.apply( process_trait, args=(trait, filters, zooma_host, oxo_target_list, oxo_distance) ) for trait in trait_list ] logger.info('Writing output with the processed traits') for trait in processed_trait_list: # Remove non-specific trait names which should never be output if trait.name.lower() not in ClinVarTrait.NONSPECIFIC_TRAITS: output_trait(trait, mapping_writer, curation_writer) logger.info('Finished processing trait names')
def main(input_filepath, output_mappings_filepath, output_curation_filepath, filters, zooma_host, oxo_target_list, oxo_distance): trait_names_list = parse_trait_names(input_filepath) trait_names_counter = Counter(trait_names_list) with open(output_mappings_filepath, "w", newline='') as mapping_file, \ open(output_curation_filepath, "wt") as curation_file: mapping_writer = csv.writer(mapping_file, delimiter="\t") mapping_writer.writerow(["#clinvar_trait_name", "uri", "label"]) curation_writer = csv.writer(curation_file, delimiter="\t") bar = progressbar.ProgressBar(max_value=len(trait_names_counter), widgets=[progressbar.AdaptiveETA(samples=1000)]) for trait_name, freq in bar(trait_names_counter.items()): trait = Trait(trait_name, freq) trait = process_trait(trait, filters, zooma_host, oxo_target_list, oxo_distance) output_trait(trait, mapping_writer, curation_writer)
def main(input_filepath, output_mappings_filepath, output_curation_filepath, filters, zooma_host, oxo_target_list, oxo_distance): trait_names_list = parse_trait_names(input_filepath) trait_names_counter = Counter(trait_names_list) with open(output_mappings_filepath, "w", newline='') as mapping_file, \ open(output_curation_filepath, "wt") as curation_file: mapping_writer = csv.writer(mapping_file, delimiter="\t") mapping_writer.writerow(["#clinvar_trait_name", "uri", "label"]) curation_writer = csv.writer(curation_file, delimiter="\t") bar = progressbar.ProgressBar( max_value=len(trait_names_counter), widgets=[progressbar.AdaptiveETA(samples=1000)]) for trait_name, freq in bar(trait_names_counter.items()): trait = Trait(trait_name, freq) trait = process_trait(trait, filters, zooma_host, oxo_target_list, oxo_distance) output_trait(trait, mapping_writer, curation_writer)