def compare_associations(assocs1, assocs2, output, file1, file2): """ Method to compare files by turning them into collections of GoAssociation objects and comparing the content of the GoAssociations for matches between collections. :param assocs1: List of GoAssociations to compare from file1 :type assocs1: List[GoAssociation] :param assocs2: List of GoAssociations to compare from file2 :type assocs2: List[GoAssociation] :param file1: The file name of the file provided in the click for reporting purposes. :type file1: str :param file2: The file name of the file provided in the click for reporting purposes. :type file2: str :param output: Prefix of the reported files for reporting purposes. :type output: str """ compare_report_file = open(output + "_compare_report", "w") processed_associations = len(assocs1) report = Report() set1 = set((str(x.subject.id), str(x.object.id), normalize_relation(x.relation), x.negated, x.evidence.type, x.evidence._supporting_reference_to_str(), x.evidence._with_support_from_to_str() ) for x in assocs2 if type(x) != dict) difference = [y for y in assocs1 if type(y) != dict if (str(y.subject.id), str(y.object.id), normalize_relation(y.relation), y.negated, y.evidence.type, y.evidence._supporting_reference_to_str(), y.evidence._with_support_from_to_str() ) not in set1] for diff in difference: report.add_association(diff) report.n_lines = report.n_lines + 1 report.error(diff.source_line, qc.ResultType.ERROR, "line from %s has NO match in %s" % (file1, file2), "") md_report, number_of_messages = markdown_report(report, processed_associations) s = "\n\n## DIFF SUMMARY\n\n" s += "This report generated on {}\n\n".format(datetime.date.today()) s += " * Total Unmatched Associations: {}\n".format(number_of_messages) s += " * Total Associations Compared: " + str(len(assocs1)) + "\n" s += " * See report: " + output + "_compare_report" + "\n" print(s) compare_report_file.write(md_report) compare_report_file.close()
class GpiParser(EntityParser): def __init__(self, config=None): """ Arguments: --------- config : a AssocParserConfig object """ if config is None: config = AssocParserConfig() self.config = config self.report = Report() def parse_line(self, line): """Parses a single line of a GPI. Return a tuple `(processed_line, entities)`. Typically there will be a single entity, but in some cases there may be none (invalid line) or multiple (disjunctive clause in annotation extensions) Note: most applications will only need to call this directly if they require fine-grained control of parsing. For most purposes, :method:`parse_file` can be used over the whole file Arguments --------- line : str A single tab-seperated line from a GPAD file """ vals = line.split("\t") if len(vals) < 7: self.report.error(line, Report.WRONG_NUMBER_OF_COLUMNS, "") return line, [] if len(vals) < 10 and len(vals) >= 7: missing_columns = 10 - len(vals) vals += ["" for i in range(missing_columns)] [ db, db_object_id, db_object_symbol, db_object_name, db_object_synonym, db_object_type, taxon, parent_object_id, xrefs, properties ] = vals split_line = assocparser.SplitLine(line=line, values=vals, taxon=taxon) ## -- ## db + db_object_id. CARD=1 ## -- id = self._pair_to_id(db, db_object_id) if not self._validate_id(id, split_line, ENTITY): return line, [] ## -- ## db_object_synonym CARD=0..* ## -- synonyms = db_object_synonym.split("|") if db_object_synonym == "": synonyms = [] # TODO: DRY parents = parent_object_id.split("|") if parent_object_id == "": parents = [] else: parents = [self._normalize_id(x) for x in parents] for p in parents: self._validate_id(p, split_line, ENTITY) xref_ids = xrefs.split("|") if xrefs == "": xref_ids = [] obj = { 'id': id, 'label': db_object_symbol, 'full_name': db_object_name, 'synonyms': synonyms, 'type': db_object_type, 'parents': parents, 'xrefs': xref_ids, 'taxon': { 'id': self._taxon_id(taxon, split_line) } } return line, [obj]
def to_association(gaf_line: List[str], report=None, group="unknown", dataset="unknown", qualifier_parser=assocparser.Qualifier2_1(), bio_entities=None) -> assocparser.ParseResult: report = Report(group=group, dataset=dataset) if report is None else report bio_entities = collections.BioEntities(dict()) if bio_entities is None else bio_entities source_line = "\t".join(gaf_line) if source_line == "": report.error(source_line, "Blank Line", "EMPTY", "Blank lines are not allowed", rule=1) return assocparser.ParseResult(source_line, [], True, report=report) if len(gaf_line) > 17: # If we see more than 17 columns, we will just cut off the columns after column 17 report.warning(source_line, assocparser.Report.WRONG_NUMBER_OF_COLUMNS, "", msg="There were more than 17 columns in this line. Proceeding by cutting off extra columns after column 17.", rule=1) gaf_line = gaf_line[:17] if 17 > len(gaf_line) >= 15: gaf_line += [""] * (17 - len(gaf_line)) if len(gaf_line) != 17: report.error(source_line, assocparser.Report.WRONG_NUMBER_OF_COLUMNS, "", msg="There were {columns} columns found in this line, and there should be 15 (for GAF v1) or 17 (for GAF v2)".format(columns=len(gaf_line)), rule=1) return assocparser.ParseResult(source_line, [], True, report=report) ## check for missing columns ## We use indeces here because we run GO RULES before we split the vals into individual variables DB_INDEX = 0 DB_OBJECT_INDEX = 1 TAXON_INDEX = 12 REFERENCE_INDEX = 5 if gaf_line[DB_INDEX] == "": report.error(source_line, Report.INVALID_IDSPACE, "EMPTY", "col1 is empty", taxon=gaf_line[TAXON_INDEX], rule=1) return assocparser.ParseResult(source_line, [], True, report=report) if gaf_line[DB_OBJECT_INDEX] == "": report.error(source_line, Report.INVALID_ID, "EMPTY", "col2 is empty", taxon=gaf_line[TAXON_INDEX], rule=1) return assocparser.ParseResult(source_line, [], True, report=report) if gaf_line[REFERENCE_INDEX] == "": report.error(source_line, Report.INVALID_ID, "EMPTY", "reference column 6 is empty", taxon=gaf_line[TAXON_INDEX], rule=1) return assocparser.ParseResult(source_line, [], True, report=report) parsed_taxons_result = gaf_line_validators["taxon"].validate(gaf_line[TAXON_INDEX]) # type: assocparser.ValidateResult if not parsed_taxons_result.valid: report.error(source_line, Report.INVALID_TAXON, parsed_taxons_result.original, parsed_taxons_result.message, taxon=parsed_taxons_result.original, rule=1) return assocparser.ParseResult(source_line, [], True, report=report) taxon = parsed_taxons_result.parsed[0] date = assocparser.parse_date(gaf_line[13], report, source_line) if date is None: return assocparser.ParseResult(source_line, [], True, report=report) interacting_taxon = parsed_taxons_result.parsed[1] if len(parsed_taxons_result.parsed) == 2 else None subject_curie = association.Curie(gaf_line[0], gaf_line[1]) subject = association.Subject(subject_curie, gaf_line[2], [gaf_line[9]], gaf_line[10].split("|"), [association.map_gp_type_label_to_curie(gaf_line[11])], taxon) gpi_entity = bio_entities.get(subject_curie) if gpi_entity is not None and subject != gpi_entity: subject = gpi_entity # column 4 is qualifiers -> index 3 # For allowed, see http://geneontology.org/docs/go-annotations/#annotation-qualifiers # We use the below validate to check validaty if qualifiers, not as much to *parse* them into the GoAssociation object. # For GoAssociation we will use the above qualifiers list. This is fine because the above does not include `NOT`, etc # This is confusing, and we can fix later on by consolidating qualifier and relation in GoAssociation. parsed_qualifiers = qualifier_parser.validate(gaf_line[3]) if not parsed_qualifiers.valid: report.error(source_line, Report.INVALID_QUALIFIER, parsed_qualifiers.original, parsed_qualifiers.message, taxon=gaf_line[TAXON_INDEX], rule=1) return assocparser.ParseResult(source_line, [], True, report=report) aspect = gaf_line[8] negated, relation_label, qualifiers = assocparser._parse_qualifier(gaf_line[3], aspect) # Note: Relation label is grabbed from qualifiers, if any exist in _parse_qualifier qualifiers = [association.Curie.from_str(curie_util.contract_uri(relations.lookup_label(q), strict=False)[0]) for q in qualifiers] object = association.Term(association.Curie.from_str(gaf_line[4]), taxon) if isinstance(object, association.Error): report.error(source_line, Report.INVALID_SYMBOL, gaf_line[4], "Problem parsing GO Term", taxon=gaf_line[TAXON_INDEX], rule=1) # References references = [association.Curie.from_str(e) for e in gaf_line[5].split("|") if e] for r in references: if isinstance(r, association.Error): report.error(source_line, Report.INVALID_SYMBOL, gaf_line[5], "Problem parsing references", taxon=gaf_line[TAXON_INDEX], rule=1) return assocparser.ParseResult(source_line, [], True, report=report) gorefs = [ref for ref in references if ref.namespace == "GO_REF"] + [None] eco_curie = ecomap.coderef_to_ecoclass(gaf_line[6], reference=gorefs[0]) if eco_curie is None: report.error(source_line, Report.UNKNOWN_EVIDENCE_CLASS, gaf_line[6], msg="Expecting a known ECO GAF code, e.g ISS", rule=1) return assocparser.ParseResult(source_line, [], True, report=report) withfroms = association.ConjunctiveSet.str_to_conjunctions(gaf_line[7]) if isinstance(withfroms, association.Error): report.error(source_line, Report.INVALID_SYMBOL, gaf_line[7], "Problem parsing with/from", taxon=gaf_line[TAXON_INDEX], rule=1) return assocparser.ParseResult(source_line, [], True, report=report) evidence_type = association.Curie.from_str(eco_curie) if isinstance(evidence_type, association.Error): report.error(source_line, Report.INVALID_SYMBOL, gaf_line[6], "Problem parsing evidence type", taxon=gaf_line[TAXON_INDEX], rule=1) evidence = association.Evidence(association.Curie.from_str(eco_curie), references, withfroms) if any([isinstance(e, association.Error) for e in evidence.has_supporting_reference]): first_error = [e for e in evidence.has_supporting_reference if isinstance(e, association.Error)][0] report.error(source_line, Report.INVALID_SYMBOL, gaf_line[5], first_error.info, taxon=str(taxon), rule=1) return assocparser.ParseResult(source_line, [], True, report=report) subject_extensions = [] if gaf_line[16]: subject_filler = association.Curie.from_str(gaf_line[16]) if isinstance(subject_filler, association.Error): report.error(source_line, assocparser.Report.INVALID_ID, gaf_line[16], subject_filler.info, taxon=str(taxon), rule=1) return assocparser.ParseResult(source_line, [], True, report=report) # filler is not an Error, so keep moving subject_extensions.append(association.ExtensionUnit(association.Curie.from_str("rdfs:subClassOf"), subject_filler)) conjunctions = [] if gaf_line[15]: conjunctions = association.ConjunctiveSet.str_to_conjunctions( gaf_line[15], conjunct_element_builder=lambda el: association.ExtensionUnit.from_str(el)) if isinstance(conjunctions, association.Error): report.error(source_line, Report.EXTENSION_SYNTAX_ERROR, conjunctions.info, "extensions should be relation(curie) and relation should have corresponding URI", taxon=str(taxon), rule=1) return assocparser.ParseResult(source_line, [], True, report=report) relation_uri = relations.lookup_label(relation_label) if relation_uri is None: report.error(source_line, assocparser.Report.INVALID_QUALIFIER, relation_label, "Could not find CURIE for relation `{}`".format(relation_label), taxon=str(taxon), rule=1) return assocparser.ParseResult(source_line, [], True, report=report) # We don't have to check that this is well formed because we're grabbing it from the known relations URI map. relation_curie = association.Curie.from_str(curie_util.contract_uri(relation_uri)[0]) a = association.GoAssociation( source_line="\t".join(gaf_line), subject=subject, relation=relation_curie, object=object, negated=negated, qualifiers=qualifiers, aspect=aspect, interacting_taxon=interacting_taxon, evidence=evidence, subject_extensions=subject_extensions, object_extensions=conjunctions, provided_by=gaf_line[14], date=date, properties={}) return assocparser.ParseResult(source_line, [a], False, report=report)
def to_association(gaf_line: List[str], report=None, group="unknown", dataset="unknown") -> assocparser.ParseResult: report = Report(group=group, dataset=dataset) if report is None else report source_line = "\t".join(gaf_line) if source_line == "": report.error(source_line, "Blank Line", "EMPTY", "Blank lines are not allowed", rule=1) return assocparser.ParseResult(source_line, [], True, report=report) if len(gaf_line) > 17: # If we see more than 17 columns, we will just cut off the columns after column 17 report.warning( source_line, assocparser.Report.WRONG_NUMBER_OF_COLUMNS, "", msg= "There were more than 17 columns in this line. Proceeding by cutting off extra columns after column 17.", rule=1) gaf_line = gaf_line[:17] if 17 > len(gaf_line) >= 15: gaf_line += [""] * (17 - len(gaf_line)) if len(gaf_line) != 17: report.error( source_line, assocparser.Report.WRONG_NUMBER_OF_COLUMNS, "", msg= "There were {columns} columns found in this line, and there should be 15 (for GAF v1) or 17 (for GAF v2)" .format(columns=len(gaf_line)), rule=1) return assocparser.ParseResult(source_line, [], True, report=report) ## check for missing columns ## We use indeces here because we run GO RULES before we split the vals into individual variables DB_INDEX = 0 DB_OBJECT_INDEX = 1 TAXON_INDEX = 12 REFERENCE_INDEX = 5 if gaf_line[DB_INDEX] == "": report.error(source_line, Report.INVALID_IDSPACE, "EMPTY", "col1 is empty", taxon=gaf_line[TAXON_INDEX], rule=1) return assocparser.ParseResult(source_line, [], True, report=report) if gaf_line[DB_OBJECT_INDEX] == "": report.error(source_line, Report.INVALID_ID, "EMPTY", "col2 is empty", taxon=gaf_line[TAXON_INDEX], rule=1) return assocparser.ParseResult(source_line, [], True, report=report) if gaf_line[TAXON_INDEX] == "": report.error(source_line, Report.INVALID_TAXON, "EMPTY", "taxon column is empty", taxon=gaf_line[TAXON_INDEX], rule=1) return assocparser.ParseResult(source_line, [], True, report=report) if gaf_line[REFERENCE_INDEX] == "": report.error(source_line, Report.INVALID_ID, "EMPTY", "reference column 6 is empty", taxon=gaf_line[TAXON_INDEX], rule=1) return assocparser.ParseResult(source_line, [], True, report=report) taxon = gaf_line[12].split("|") taxon_curie = taxon[0].replace("taxon", "NCBITaxon") interacting_taxon = taxon[1].replace( "taxon", "NCBITaxon") if len(taxon) == 2 else None subject_curie = "{db}:{id}".format(db=gaf_line[0], id=gaf_line[1]) subject = association.Subject(subject_curie, gaf_line[2], gaf_line[9], gaf_line[10].split("|"), gaf_line[11], taxon_curie) aspect = gaf_line[8] negated, relation, qualifiers = assocparser._parse_qualifier( gaf_line[3], aspect) # For allowed, see http://geneontology.org/docs/go-annotations/#annotation-qualifiers for q in qualifiers: if q not in allowed_qualifiers: report.error( source_line, Report.INVALID_QUALIFIER, q, "Qualifiers must be `contributes_to`, `colocalizes_with`, or `NOT`", taxon=gaf_line[TAXON_INDEX], rule=1) return assocparser.ParseResult(source_line, [], True, report=report) object = association.Term(gaf_line[4], taxon_curie) evidence = association.Evidence(ecomap.coderef_to_ecoclass(gaf_line[6]), [e for e in gaf_line[5].split("|") if e], [e for e in gaf_line[7].split("|") if e]) subject_extensions = [ association.ExtensionUnit("rdfs:subClassOf", gaf_line[16]) ] if gaf_line[16] else [] conjunctions = [] if gaf_line[15]: for conjuncts in gaf_line[15].split("|"): extension_units = [] for u in conjuncts.split(","): parsed = relation_tuple.findall(u) if len(parsed) == 1: rel, term = parsed[0] extension_units.append(association.ExtensionUnit( rel, term)) else: # Otherwise, something went bad with the regex, and it's a bad parse report.error(source_line, Report.EXTENSION_SYNTAX_ERROR, u, "extensions should be relation(curie)", taxon=taxon, rule=1) return assocparser.ParseResult(source_line, [], True, report=report) conjunction = association.ExtensionConjunctions(extension_units) conjunctions.append(conjunction) object_extensions = association.ExtensionExpression(conjunctions) looked_up_rel = relations.lookup_label(relation) if looked_up_rel is None: report.error( source_line, assocparser.Report.INVALID_QUALIFIER, relation, "Qualifer must be \"colocalizes_with\", \"contributes_to\", or \"NOT\"", taxon=taxon, rule=1) return assocparser.ParseResult(source_line, [], True, report=report) a = association.GoAssociation( source_line="\t".join(gaf_line), subject=subject, relation=curie_util.contract_uri(looked_up_rel)[0], object=object, negated=negated, qualifiers=qualifiers, aspect=aspect, interacting_taxon=interacting_taxon, evidence=evidence, subject_extensions=subject_extensions, object_extensions=object_extensions, provided_by=gaf_line[14], date=gaf_line[13], properties={}) return assocparser.ParseResult(source_line, [a], False, report=report)
def to_association(gpad_line: List[str], report=None, group="unknown", dataset="unknown") -> assocparser.ParseResult: report = Report(group=group, dataset=dataset) if report is None else report source_line = "\t".join(gpad_line) if len(gpad_line) > 12: report.warning( source_line, assocparser.Report.WRONG_NUMBER_OF_COLUMNS, "", msg= "There were more than 12 columns in this line. Proceeding by cutting off extra columns.", rule=1) gpad_line = gpad_line[:12] if 12 > len(gpad_line) >= 10: gpad_line += [""] * (12 - len(gpad_line)) if len(gpad_line) != 12: report.error( source_line, assocparser.Report.WRONG_NUMBER_OF_COLUMNS, "", msg= "There were {columns} columns found in this line, and there should be between 10 and 12" .format(columns=len(gpad_line))) return assocparser.ParseResult(source_line, [], True, report=report) ## check for missing columns ## We use indeces here because we run GO RULES before we split the vals into individual variables DB_INDEX = 0 DB_OBJECT_INDEX = 1 QUALIFIER = 2 REFERENCE_INDEX = 4 EVIDENCE_INDEX = 5 if gpad_line[DB_INDEX] == "": report.error(source_line, Report.INVALID_IDSPACE, "EMPTY", "col1 is empty", rule=1) return assocparser.ParseResult(source_line, [], True, report=report) if gpad_line[DB_OBJECT_INDEX] == "": report.error(source_line, Report.INVALID_ID, "EMPTY", "col2 is empty", rule=1) return assocparser.ParseResult(source_line, [], True, report=report) if gpad_line[QUALIFIER] == "": report.error(source_line, Report.INVALID_TAXON, "EMPTY", "qualifier column is empty", rule=1) return assocparser.ParseResult(source_line, [], True, report=report) if gpad_line[REFERENCE_INDEX] == "": report.error(source_line, Report.INVALID_ID, "EMPTY", "reference column is empty", rule=1) return assocparser.ParseResult(source_line, [], True, report=report) if gpad_line[EVIDENCE_INDEX] == "": report.error(source_line, Report.INVALID_ID, "EMPTY", "Evidence column is empty", rule=1) taxon = "" subject_curie = "{db}:{id}".format(db=gpad_line[0], id=gpad_line[1]) subject = association.Subject(subject_curie, "", "", [], "", "") object = association.Term(gpad_line[3], "") evidence = association.Evidence(gpad_line[5], [e for e in gpad_line[4].split("|") if e], [e for e in gpad_line[6].split("|") if e]) raw_qs = gpad_line[2].split("|") negated = "NOT" in raw_qs looked_up_qualifiers = [ relations.lookup_label(q) for q in raw_qs if q != "NOT" ] if None in looked_up_qualifiers: report.error(source_line, Report.INVALID_QUALIFIER, raw_qs, "Could not find a URI for qualifier", taxon=taxon, rule=1) return assocparser.ParseResult(source_line, [], True, report=report) qualifiers = [curie_util.contract_uri(q)[0] for q in looked_up_qualifiers] conjunctions = [] if gpad_line[11]: for conjuncts in gpad_line[11].split("|"): extension_units = [] for u in conjuncts.split(","): parsed = relation_tuple.findall(u) if len(parsed) == 1: rel, term = parsed[0] extension_units.append(association.ExtensionUnit( rel, term)) else: # Otherwise, something went bad with the regex, and it's a bad parse report.error(source_line, Report.EXTENSION_SYNTAX_ERROR, u, "extensions should be relation(curie)", taxon=taxon, rule=1) return assocparser.ParseResult(source_line, [], True, report=report) conjunction = association.ExtensionConjunctions(extension_units) conjunctions.append(conjunction) object_extensions = association.ExtensionExpression(conjunctions) properties_list = [ prop.split("=") for prop in gpad_line[11].split("|") if prop ] # print(properties_list) a = association.GoAssociation( source_line="\t".join(gpad_line), subject=subject, relation="", object=object, negated=negated, qualifiers=qualifiers, aspect=None, interacting_taxon=gpad_line[7], evidence=evidence, subject_extensions=[], object_extensions=object_extensions, provided_by=gpad_line[9], date=gpad_line[8], properties={prop[0]: prop[1] for prop in properties_list if prop}) return assocparser.ParseResult(source_line, [a], False, report=report)
def to_association( gaf_line: List[str], report=None, group="unknown", dataset="unknown", qualifier_parser=Qualifier2_1()) -> assocparser.ParseResult: report = Report(group=group, dataset=dataset) if report is None else report source_line = "\t".join(gaf_line) if source_line == "": report.error(source_line, "Blank Line", "EMPTY", "Blank lines are not allowed", rule=1) return assocparser.ParseResult(source_line, [], True, report=report) if len(gaf_line) > 17: # If we see more than 17 columns, we will just cut off the columns after column 17 report.warning( source_line, assocparser.Report.WRONG_NUMBER_OF_COLUMNS, "", msg= "There were more than 17 columns in this line. Proceeding by cutting off extra columns after column 17.", rule=1) gaf_line = gaf_line[:17] if 17 > len(gaf_line) >= 15: gaf_line += [""] * (17 - len(gaf_line)) if len(gaf_line) != 17: report.error( source_line, assocparser.Report.WRONG_NUMBER_OF_COLUMNS, "", msg= "There were {columns} columns found in this line, and there should be 15 (for GAF v1) or 17 (for GAF v2)" .format(columns=len(gaf_line)), rule=1) return assocparser.ParseResult(source_line, [], True, report=report) ## check for missing columns ## We use indeces here because we run GO RULES before we split the vals into individual variables DB_INDEX = 0 DB_OBJECT_INDEX = 1 TAXON_INDEX = 12 REFERENCE_INDEX = 5 if gaf_line[DB_INDEX] == "": report.error(source_line, Report.INVALID_IDSPACE, "EMPTY", "col1 is empty", taxon=gaf_line[TAXON_INDEX], rule=1) return assocparser.ParseResult(source_line, [], True, report=report) if gaf_line[DB_OBJECT_INDEX] == "": report.error(source_line, Report.INVALID_ID, "EMPTY", "col2 is empty", taxon=gaf_line[TAXON_INDEX], rule=1) return assocparser.ParseResult(source_line, [], True, report=report) if gaf_line[TAXON_INDEX] == "": report.error(source_line, Report.INVALID_TAXON, "EMPTY", "taxon column is empty", taxon=gaf_line[TAXON_INDEX], rule=1) return assocparser.ParseResult(source_line, [], True, report=report) if gaf_line[REFERENCE_INDEX] == "": report.error(source_line, Report.INVALID_ID, "EMPTY", "reference column 6 is empty", taxon=gaf_line[TAXON_INDEX], rule=1) return assocparser.ParseResult(source_line, [], True, report=report) taxon = gaf_line[12].split("|") taxon_curie = taxon[0].replace("taxon", "NCBITaxon") date = assocparser._normalize_gaf_date(gaf_line[13], report, taxon_curie, source_line) if date is None: return assocparser.ParseResult(source_line, [], True, report=report) interacting_taxon = taxon[1].replace( "taxon", "NCBITaxon") if len(taxon) == 2 else None subject_curie = "{db}:{id}".format(db=gaf_line[0], id=gaf_line[1]) subject = association.Subject(subject_curie, gaf_line[2], gaf_line[9], gaf_line[10].split("|"), gaf_line[11], taxon_curie) aspect = gaf_line[8] negated, relation, qualifiers = assocparser._parse_qualifier( gaf_line[3], aspect) # column 4 is qualifiers -> index 3 # For allowed, see http://geneontology.org/docs/go-annotations/#annotation-qualifiers parsed_qualifiers = qualifier_parser.validate(gaf_line[3]) if not parsed_qualifiers.valid: report.error(source_line, Report.INVALID_QUALIFIER, parsed_qualifiers.original, parsed_qualifiers.message, taxon=gaf_line[TAXON_INDEX], rule=1) return assocparser.ParseResult(source_line, [], True, report=report) object = association.Term(gaf_line[4], taxon_curie) evidence = association.Evidence( ecomap.coderef_to_ecoclass(gaf_line[6]), [e for e in gaf_line[5].split("|") if e], association.ConjunctiveSet.str_to_conjunctions(gaf_line[7])) subject_extensions = [ association.ExtensionUnit("rdfs:subClassOf", gaf_line[16]) ] if gaf_line[16] else [] conjunctions = [] if gaf_line[15]: conjunctions = association.ConjunctiveSet.str_to_conjunctions( gaf_line[15], conjunct_element_builder=lambda el: association.ExtensionUnit. from_str(el)) if isinstance(conjunctions, association.Error): report.error(source_line, Report.EXTENSION_SYNTAX_ERROR, conjunctions.info, "extensions should be relation(curie)", taxon=taxon, rule=1) return assocparser.ParseResult(source_line, [], True, report=report) looked_up_rel = relations.lookup_label(relation) if looked_up_rel is None: report.error(source_line, assocparser.Report.INVALID_QUALIFIER, relation, "Could not find CURIE for relation `{}`".format(relation), taxon=taxon, rule=1) return assocparser.ParseResult(source_line, [], True, report=report) a = association.GoAssociation( source_line="\t".join(gaf_line), subject=subject, relation=curie_util.contract_uri(looked_up_rel)[0], object=object, negated=negated, qualifiers=qualifiers, aspect=aspect, interacting_taxon=interacting_taxon, evidence=evidence, subject_extensions=subject_extensions, object_extensions=conjunctions, provided_by=gaf_line[14], date=date, properties={}) return assocparser.ParseResult(source_line, [a], False, report=report)