示例#1
0
def read_gaf_csv(filename, version) -> pd:
    ecomapping = ecomap.EcoMap()
    data_frame = pd.read_csv(filename,
                             comment='!',
                             sep='\t',
                             header=None,
                             na_filter=False,
                             names=["DB",
                                    "DB_Object_ID",
                                    "DB_Object_Symbol",
                                    "Qualifier",
                                    "GO_ID",
                                    "DB_Reference",
                                    "Evidence_code",
                                    "With_or_From",
                                    "Aspect",
                                    "DB_Object_Name",
                                    "DB_Object_Synonym",
                                    "DB_Object_Type,"
                                    "Taxon",
                                    "Date",
                                    "Assigned_By",
                                    "Annotation_Extension",
                                    "Gene_Product_Form_ID"]).fillna("")
    new_df = data_frame.filter(['DB_Object_ID', 'Qualifier', 'GO_ID', 'Evidence_code', 'DB_Reference'], axis=1)
    for eco_code in ecomapping.mappings():
        for ev in new_df['Evidence_code']:
            if eco_code[2] == ev:
                new_df['Evidence_code'] = new_df['Evidence_code'].replace([eco_code[2]],
                                                                              ecomapping.ecoclass_to_coderef(
                                                                                  eco_code[2])[0])
    return new_df
示例#2
0
    def __init__(self,
                 remove_double_prefixes=False,
                 ontology=None,
                 repair_obsoletes=True,
                 entity_map=None,
                 valid_taxa=None,
                 class_idspaces=None,
                 entity_idspaces=None,
                 ecomap=ecomap.EcoMap(),
                 exclude_relations=None,
                 include_relations=None,
                 filter_out_evidence=None,
                 filtered_evidence_file=None,
                 gpi_authority_path=None,
                 paint=False):

        self.remove_double_prefixes = remove_double_prefixes
        self.ontology = ontology
        self.repair_obsoletes = repair_obsoletes
        self.entity_map = entity_map
        self.valid_taxa = valid_taxa
        self.class_idspaces = class_idspaces
        self.ecomap = ecomap
        self.include_relations = include_relations
        self.exclude_relations = exclude_relations
        self.filter_out_evidence = filter_out_evidence
        self.filtered_evidence_file = filtered_evidence_file
        self.gpi_authority_path = gpi_authority_path
        self.paint = paint
        if self.exclude_relations is None:
            self.exclude_relations = []
        if self.include_relations is None:
            self.include_relations = []
        if self.filter_out_evidence is None:
            self.filter_out_evidence = []
示例#3
0
    def __init__(self, file=None, version=GPAD_1_2):
        self.file = file
        if version in [GPAD_1_2, GPAD_2_0]:
            self.version = version
        else:
            self.version = GPAD_1_2

        self._write("!gpa-version: {}\n".format(self.version))
        self.ecomap = ecomap.EcoMap()
示例#4
0
    def __init__(self,
                 remove_double_prefixes=False,
                 ontology=None,
                 repair_obsoletes=True,
                 entity_map=None,
                 valid_taxa=None,
                 class_idspaces=None,
                 entity_idspaces=None,
                 group_idspace=None,
                 ecomap=ecomap.EcoMap(),
                 exclude_relations=None,
                 include_relations=None,
                 filter_out_evidence=None,
                 filtered_evidence_file=None,
                 gpi_authority_path=None,
                 paint=False,
                 rule_metadata=dict(),
                 goref_metadata=None,
                 group_metadata=None,
                 dbxrefs=None,
                 suppress_rule_reporting_tags=[],
                 annotation_inferences=None,
                 extensions_constraints=None,
                 rule_contexts=[]):

        self.remove_double_prefixes = remove_double_prefixes
        self.ontology = ontology
        self.repair_obsoletes = repair_obsoletes
        self.entity_map = entity_map
        self.valid_taxa = valid_taxa
        self.class_idspaces = class_idspaces
        self.ecomap = ecomap
        self.include_relations = include_relations
        self.exclude_relations = exclude_relations
        self.filter_out_evidence = filter_out_evidence
        self.filtered_evidence_file = filtered_evidence_file
        self.gpi_authority_path = gpi_authority_path
        self.paint = paint
        self.rule_metadata = rule_metadata
        self.goref_metadata = goref_metadata
        self.group_metadata = group_metadata
        self.suppress_rule_reporting_tags = suppress_rule_reporting_tags
        self.annotation_inferences = annotation_inferences
        self.entity_idspaces = entity_idspaces
        self.extensions_constraints = AssocParserConfig._compute_constraint_subclasses(
            extensions_constraints, ontology)
        self.group_idspace = None if group_idspace is None else set(
            group_idspace)
        self.rule_contexts = rule_contexts
        # This is a dictionary from ruleid: `gorule-0000001` to title strings
        if self.exclude_relations is None:
            self.exclude_relations = []
        if self.include_relations is None:
            self.include_relations = []
        if self.filter_out_evidence is None:
            self.filter_out_evidence = []
示例#5
0
    def __init__(self,
                 remove_double_prefixes=False,
                 ontology=None,
                 repair_obsoletes=True,
                 entity_map=None,
                 valid_taxa=None,
                 class_idspaces=None,
                 entity_idspaces=None,
                 group_idspace=None,
                 ecomap=ecomap.EcoMap(),
                 exclude_relations=None,
                 include_relations=None,
                 filter_out_evidence=None,
                 filtered_evidence_file=None,
                 gpi_authority_path=None,
                 paint=False,
                 rule_titles=None,
                 dbxrefs=None):

        self.remove_double_prefixes=remove_double_prefixes
        self.ontology=ontology
        self.repair_obsoletes=repair_obsoletes
        self.entity_map=entity_map
        self.valid_taxa=valid_taxa
        self.class_idspaces=class_idspaces
        self.ecomap=ecomap
        self.include_relations=include_relations
        self.exclude_relations=exclude_relations
        self.filter_out_evidence = filter_out_evidence
        self.filtered_evidence_file = filtered_evidence_file
        self.gpi_authority_path = gpi_authority_path
        self.paint = paint
        self.rule_titles = rule_titles

        self.entity_idspaces = None if entity_idspaces is None else set(entity_idspaces)
        self.group_idspace = None if group_idspace is None else set(group_idspace)
        # This is a dictionary from ruleid: `gorule-0000001` to title strings
        if self.exclude_relations is None:
            self.exclude_relations = []
        if self.include_relations is None:
            self.include_relations = []
        if self.filter_out_evidence is None:
            self.filter_out_evidence = []
示例#6
0
def read_gpad_csv(filename, version) -> pd:
    if version.startswith("1"):
        data_frame = pd.read_csv(filename,
                                 comment='!',
                                 sep='\t',
                                 header=None,
                                 na_filter=False,
                                 names=gpad_1_2_format).fillna("")
        df = data_frame.filter(['db', 'subject', 'qualifiers', 'relation', 'object', 'evidence_code', 'reference'], axis=1)
        concat_column = df['db'] + ":" + df['subject']
        df['concat_column'] = concat_column
        filtered_df = df.filter(['concat_column', 'qualifiers', 'relation', 'object', 'evidence_code', 'reference'])
        filtered_df.rename(columns={'concat_column': 'subject'}, inplace=True)
        new_df = filtered_df
    else:
        data_frame = pd.read_csv(filename,
                                 comment='!',
                                 sep='\t',
                                 header=None,
                                 na_filter=False,
                                 names=gpad_2_0_format).fillna("")
        new_df = data_frame.filter(['subject', 'negation', 'relation', 'object', 'evidence_code', 'reference'], axis=1)
    ecomapping = ecomap.EcoMap()
    for eco_code in ecomapping.mappings():
        for ev in new_df['evidence_code']:
            if eco_code[2] == ev:
                new_df['evidence_code'] = new_df['evidence_code'].replace([eco_code[2]],
                                                                          ecomapping.ecoclass_to_coderef(eco_code[2])[0])

    # normalize ids
    config = assocparser.AssocParserConfig()
    config.remove_double_prefixes = True
    parser = gpadparser.GpadParser(config=config)
    for i, r in enumerate(new_df['subject']):
        r1 = parser._normalize_id(r)
        new_df.at[i, 'subject'] = r1

    return new_df
示例#7
0
import logging
from typing import List
from ontobio.io import assocparser
from ontobio.io.gpadparser import GpadParser
from ontobio.model.association import GoAssociation, Date
from ontobio.rdfgen.gocamgen import errors
from ontobio import ecomap

logger = logging.getLogger(__name__)

ecomapping = ecomap.EcoMap()
ipi_eco = ecomapping.coderef_to_ecoclass("IPI")

GPAD_PARSER = GpadParser()
BINDING_ROOT = "GO:0005488"  # binding
IPI_ECO_CODE = ipi_eco


class GoAssocWithFrom:
    """
    Separate with/from column values into
    header vs line arrangement.
    Used for explicit placement in
    annotation assertions.
    """
    def __init__(self, header=None, line=None):
        if header is None:
            header = []
        if line is None:
            line = []
        self.header = sorted(header)
示例#8
0
 def __init__(self, file=None):
     self.file = file
     self._write("!gpa-version: 1.1\n")
     self.ecomap = ecomap.EcoMap()
示例#9
0
 def __init__(self):
     super().__init__(
         "GORULE:0000043",
         "Check for valid combination of evidence code and GO_REF",
         FailMode.SOFT)
     self.ecomapping = ecomap.EcoMap()
示例#10
0
    def __init__(self,
                 remove_double_prefixes=False,
                 ontology=None,
                 repair_obsoletes=True,
                 entity_map=None,
                 valid_taxa=None,
                 class_idspaces=None,
                 entity_idspaces=None,
                 group_idspace=None,
                 ecomap=ecomap.EcoMap(),
                 exclude_relations=None,
                 include_relations=None,
                 filter_out_evidence=None,
                 filtered_evidence_file=None,
                 gpi_authority_path=None,
                 paint=False,
                 rule_metadata=dict(),
                 goref_metadata=None,
                 group_metadata=None,
                 dbxrefs=None,
                 suppress_rule_reporting_tags=[],
                 annotation_inferences=None,
                 extensions_constraints=None,
                 rule_contexts=[],
                 rule_set=None):

        self.remove_double_prefixes = remove_double_prefixes
        self.ontology = ontology
        self.repair_obsoletes = repair_obsoletes
        self.entity_map = entity_map
        self.valid_taxa = valid_taxa
        self.class_idspaces = class_idspaces
        self.ecomap = ecomap
        self.include_relations = include_relations
        self.exclude_relations = exclude_relations
        self.filter_out_evidence = filter_out_evidence
        self.filtered_evidence_file = filtered_evidence_file
        self.gpi_authority_path = gpi_authority_path
        self.paint = paint
        self.rule_metadata = rule_metadata
        self.goref_metadata = goref_metadata
        self.group_metadata = group_metadata
        self.suppress_rule_reporting_tags = suppress_rule_reporting_tags
        self.annotation_inferences = annotation_inferences
        self.entity_idspaces = entity_idspaces
        self.extensions_constraints = AssocParserConfig._compute_constraint_subclasses(
            extensions_constraints, ontology)
        self.group_idspace = None if group_idspace is None else set(
            group_idspace)
        self.rule_contexts = rule_contexts
        # We'll say that the default None should run no rules, so let's set the rule_set to []
        # print("Rule Set is {}".format(rule_set))
        if rule_set == None:
            self.rule_set = RuleSet([])
        elif rule_set == RuleSet.ALL:
            # None here means all rules
            self.rule_set = RuleSet(None)
        else:
            self.rule_set = RuleSet(rule_set)

        # This is a dictionary from ruleid: `gorule-0000001` to title strings
        if self.exclude_relations is None:
            self.exclude_relations = []
        if self.include_relations is None:
            self.include_relations = []
        if self.filter_out_evidence is None:
            self.filter_out_evidence = []