def validate_assocs(ont, file, outfile, p, args): gafwriter = GafWriter(file=outfile) with open(file) as gafsource: associations = p.association_generator(file=gafsource) for assoc in associations: gafwriter.write_assoc(assoc)
def produce_gaf(dataset, source_gaf, ontology_graph, gpipath=None, paint=False, group="unknown"): filtered_associations = open(os.path.join(os.path.split(source_gaf)[0], "{}_noiea.gaf".format(dataset)), "w") config = assocparser.AssocParserConfig( ontology=ontology_graph, filter_out_evidence=["IEA"], filtered_evidence_file=filtered_associations, gpi_authority_path=gpipath, paint=paint ) validated_gaf_path = os.path.join(os.path.split(source_gaf)[0], "{}_valid.gaf".format(dataset)) outfile = open(validated_gaf_path, "w") gafwriter = GafWriter(file=outfile) click.echo("Validating source GAF: {}".format(source_gaf)) parser = GafParser(config=config, group=group, dataset=dataset) with open(source_gaf) as sg: lines = sum(1 for line in sg) with open(source_gaf) as gaf: with click.progressbar(iterable=parser.association_generator(file=gaf), length=lines) as associations: for assoc in associations: gafwriter.write_assoc(assoc) outfile.close() filtered_associations.close() with open(os.path.join(os.path.split(source_gaf)[0], "{}.report.md".format(dataset)), "w") as report_md: report_md.write(parser.report.to_markdown()) with open(os.path.join(os.path.split(source_gaf)[0], "{}.report.json".format(dataset)), "w") as report_json: report_json.write(json.dumps(parser.report.to_report_json(), indent=4)) return [validated_gaf_path, filtered_associations.name]
def produce_gaf(dataset, source_gaf, ontology_graph, gpipath=None, paint=False, group="unknown", rule_metadata=None, goref_metadata=None, db_entities=None, group_idspace=None, format="gaf", suppress_rule_reporting_tags=[], annotation_inferences=None): filtered_associations = open(os.path.join(os.path.split(source_gaf)[0], "{}_noiea.gaf".format(dataset)), "w") config = assocparser.AssocParserConfig( ontology=ontology_graph, filter_out_evidence=["IEA"], filtered_evidence_file=filtered_associations, gpi_authority_path=gpipath, paint=paint, rule_metadata=rule_metadata, goref_metadata=goref_metadata, entity_idspaces=db_entities, group_idspace=group_idspace, suppress_rule_reporting_tags=suppress_rule_reporting_tags, annotation_inferences=annotation_inferences ) split_source = os.path.split(source_gaf)[0] validated_gaf_path = os.path.join(split_source, "{}_valid.gaf".format(dataset)) outfile = open(validated_gaf_path, "w") gafwriter = GafWriter(file=outfile, source=dataset) click.echo("Validating source {}: {}".format(format, source_gaf)) parser = create_parser(config, group, dataset, format) with open(source_gaf) as sg: lines = sum(1 for line in sg) with open(source_gaf) as gaf: with click.progressbar(iterable=parser.association_generator(file=gaf), length=lines) as associations: for assoc in associations: gafwriter.write_assoc(assoc) outfile.close() filtered_associations.close() with open(os.path.join(os.path.split(source_gaf)[0], "{}.report.md".format(dataset)), "w") as report_md: report_md.write(parser.report.to_markdown()) with open(os.path.join(os.path.split(source_gaf)[0], "{}.report.json".format(dataset)), "w") as report_json: report_json.write(json.dumps(parser.report.to_report_json(), indent=4)) return [validated_gaf_path, filtered_associations.name]
def infer(ontology_path, target, gaf): ontology_graph = ontology(ontology_path) writer = GafWriter(file=target) assoc_generator = gafparser_generator(ontology_graph, gaf) line_count = 0 for association in assoc_generator: if association["relation"]["id"] != ENABLES: continue # Skip all non enables annotations inferred_associations = materialize_inferences(ontology_graph, association) if len(inferred_associations) > 0: click.echo("Materialized {} associations".format( len(inferred_associations))) for inferred in inferred_associations: writer.write_assoc(inferred) line_count += 1 if line_count % 100 == 0: click.echo("Processed {} lines".format(line_count))
def produce_gaf(dataset, source_gaf, ontology_graph, gpipath=None, paint=False, group="unknown", rule_metadata=None, goref_metadata=None, db_entities=None, group_idspace=None, format="gaf", suppress_rule_reporting_tags=[], annotation_inferences=None, group_metadata=None, extensions_constraints=None, rule_contexts=[]): filtered_associations = open( os.path.join( os.path.split(source_gaf)[0], "{}_noiea.gaf".format(dataset)), "w") config = assocparser.AssocParserConfig( ontology=ontology_graph, filter_out_evidence=["IEA"], filtered_evidence_file=filtered_associations, gpi_authority_path=gpipath, paint=paint, rule_metadata=rule_metadata, goref_metadata=goref_metadata, entity_idspaces=db_entities, group_idspace=group_idspace, suppress_rule_reporting_tags=suppress_rule_reporting_tags, annotation_inferences=annotation_inferences, group_metadata=group_metadata, extensions_constraints=extensions_constraints, rule_contexts=rule_contexts) logger.info("Producing {}".format(source_gaf)) # logger.info("AssocParserConfig used: {}".format(config)) split_source = os.path.split(source_gaf)[0] validated_gaf_path = os.path.join(split_source, "{}_valid.gaf".format(dataset)) outfile = open(validated_gaf_path, "w") gafwriter = GafWriter(file=outfile, source=dataset) click.echo("Validating source {}: {}".format(format, source_gaf)) parser = create_parser(config, group, dataset, format) with open(source_gaf) as sg: lines = sum(1 for line in sg) with open(source_gaf) as gaf: with click.progressbar(iterable=parser.association_generator(file=gaf), length=lines) as associations: for assoc in associations: gafwriter.write_assoc(assoc) outfile.close() filtered_associations.close() report_markdown_path = os.path.join( os.path.split(source_gaf)[0], "{}.report.md".format(dataset)) logger.info( "About to write markdown report to {}".format(report_markdown_path)) with open(report_markdown_path, "w") as report_md: logger.info("Opened for writing {}".format(report_markdown_path)) report_md.write(parser.report.to_markdown()) logger.info("markdown {} written out".format(report_markdown_path)) logger.info("Markdown current stack:") if logger.getEffectiveLevel() == logging.INFO: traceback.print_stack() report_json_path = os.path.join( os.path.split(source_gaf)[0], "{}.report.json".format(dataset)) logger.info("About to write json report to {}".format(report_json_path)) with open(report_json_path, "w") as report_json: logger.info("Opened for writing {}".format(report_json_path)) report_json.write(json.dumps(parser.report.to_report_json(), indent=4)) logger.info("json {} written out".format(report_markdown_path)) logger.info("gorule-13 first 10 messages: {}".format( json.dumps(parser.report.to_report_json()["messages"].get( "gorule-0000013", [])[:10], indent=4))) logger.info("json current Stack:") if logger.getEffectiveLevel() == logging.INFO: traceback.print_stack() return [validated_gaf_path, filtered_associations.name]
def find_iea_iba_matches(self, filename): all_bp_ev_counts = {} grouped_annots = {} leftover_annots = [] log(filename) mod_annots = self.parser.parse(filename, skipheader=True) self.all_annots = self.all_annots + mod_annots for a in mod_annots: term = a["object"]["id"] aspect = a["aspect"] if aspect == "P" or aspect == "F": grouped_annots, using_annot = file_away(grouped_annots, a) if aspect == "P": if not using_annot: leftover_annots.append(a) evidence_code = a["evidence"]["type"] if evidence_code not in all_bp_ev_counts: all_bp_ev_counts[evidence_code] = 1 else: all_bp_ev_counts[evidence_code] += 1 dismissed_annots = [] # match_rows = [] base_f = os.path.basename(f) match_outfile = base_f + "_matches.tsv" if args.match_output_suffix: match_outfile = "{}.{}.tsv".format(base_f, args.match_output_suffix) with open(match_outfile, 'w') as mof: writer = csv.writer(mof, delimiter="\t") for ec in grouped_annots: match_rows = [] ### For each evi_code, count unique annots that have with_matches (flatten dict) log("BP {} withs count: {}".format( ec, len( match_aspect( flatten_with_dict(grouped_annots[ec], uniqify=True), 'P')))) ### Loop through with_value annots, segregate BPs from MFs, if len(BPs) > 0 and len(MFs) > 0 this with_value set gets written out for with_value in grouped_annots[ec]: bp_annots = match_aspect(grouped_annots[ec][with_value], 'P') mf_annots = match_aspect(grouped_annots[ec][with_value], 'F') if len(bp_annots) < 1: grouped_annots[ec][with_value] = [] # Delete this key elif len(mf_annots) < 1: dismissed_annots = dismissed_annots + bp_annots # Cleanup (uniqify, remove annots promoted elsewhere) later grouped_annots[ec][with_value] = [] # Delete this key else: # Continue on promoting for a in bp_annots: gene_id = a["subject"]["id"] gene_id_bits = gene_id.split(":") id_ns = gene_id_bits[0] id = gene_id_bits[-1] # Find 'with-matched' MF annotations to same gene product gene_mf_annots = annots_by_subject( mf_annots, gene_id) if len(gene_mf_annots) == 0: # Should probably add this BP annot back to unused list if a not in leftover_annots: leftover_annots.append(a) continue gene_symbol = a["subject"]["label"] relation = first_qualifier(a) bp_term = a["object"]["id"] bp_term_label = self.ontology.label(bp_term) bp_evidence_code = a["evidence"]["type"] bp_reference = ",".join( a["evidence"]["has_supporting_reference"]) bp_assigned_by = a["provided_by"] for mfa in gene_mf_annots: mf_term = mfa["object"]["id"] mf_term_label = self.ontology.label(mf_term) mf_evidence_code = mfa["evidence"]["type"] mf_reference = ",".join( mfa["evidence"] ["has_supporting_reference"]) mf_assigned_by = mfa["provided_by"] out_fields = [ with_value, id_ns, id, gene_symbol, relation, bp_term, bp_term_label, bp_evidence_code, bp_reference, bp_assigned_by, mf_term, mf_term_label, mf_evidence_code, mf_reference, mf_assigned_by ] match_rows.append(out_fields) match_rows.sort(key=lambda k: k[2]) for mr in match_rows: writer.writerow(mr) # print("Total:", len(all_annots)) # print("Leftovers:", len(leftover_annots)) all_promoted_annots = [] for ev in grouped_annots: promoted_bp_annots = match_aspect( flatten_with_dict(grouped_annots[ev], uniqify=True), 'P') all_promoted_annots = all_promoted_annots + promoted_bp_annots log("{} {} BP annotations inputted".format(all_bp_ev_counts[ev], ev)) # 5000 IEA BP annotations ‘involved in’ log("{} {} BP annotations ‘involved in’".format( len(promoted_bp_annots), ev)) # self.all_promoted_annots[filename] = all_promoted_annots self.all_promoted_annots = self.all_promoted_annots + all_promoted_annots ## Something going on below is super slow! ### Cleanup leftovers for da in dismissed_annots: if da not in leftover_annots and da not in all_promoted_annots: leftover_annots.append(da) # print("Leftovers:", len(leftover_annots)) log("Leftovers: {}".format(len(leftover_annots))) outfile = base_f + "_leftovers.gaf" if args.leftover_output: outfile = "{}.{}_leftovers.gaf".format(base_f, args.leftover_output) with open(outfile, "w") as lf: gaf_writer = GafWriter(lf) for a in leftover_annots: gaf_writer.write_assoc(a)