Пример #1
0
    def make_association_id(definedby, sub, pred, obj, attributes=None):
        """
        A method to create unique identifiers for OBAN-style associations,
        based on all the parts of the association
        If any of the items is empty or None, it will convert it to blank.
        It effectively digests the  string of concatonated values.
        Subclasses of Assoc can submit an additional array of attributes
        that will be appeded to the ID.

        Note this is equivalent to a RDF blank node

        :param definedby: The (data) resource that provided the annotation
        :param subject:
        :param predicate:
        :param object:
        :param attributes:

        :return:

        """

        items_to_hash = [definedby, sub, pred, obj]
        if attributes is not None and len(attributes) > 0:
            items_to_hash += attributes

        items_to_hash = [x for x in items_to_hash if x is not None]

        assoc_id = ':'.join(('MONARCH', GraphUtils.digest_id('+'.join(items_to_hash))))
        # assert assoc_id is not None
        return assoc_id
Пример #2
0
    def make_association_id(definedby, sub, pred, obj, attributes=None):
        """
        A method to create unique identifiers for OBAN-style associations,
        based on all the parts of the association
        If any of the items is empty or None, it will convert it to blank.
        It effectively digests the  string of concatonated values.
        Subclasses of Assoc can submit an additional array of attributes
        that will be appeded to the ID.

        Note this is equivalent to a RDF blank node

        :param definedby: The (data) resource that provided the annotation
        :param subject:
        :param predicate:
        :param object:
        :param attributes:

        :return:

        """

        items_to_hash = [definedby, sub, pred, obj]
        if attributes is not None and len(attributes) > 0:
            items_to_hash += attributes

        items_to_hash = [x for x in items_to_hash if x is not None]

        assoc_id = ':'.join(('MONARCH', GraphUtils.digest_id('+'.join(items_to_hash))))
        assert assoc_id is not None
        return assoc_id
Пример #3
0
    def get_uniprot_entrez_id_map(self):
        taxon_digest = GraphUtils.digest_id(str(self.tax_ids))
        id_map = {}
        smallfile = '/'.join((self.rawdir, 'id_map_' + taxon_digest + '.yaml'))
        bigfile = '/'.join((self.rawdir, self.files['id-map']['file']))

        # if processed smallfile exists and is newer use it instesd
        if os.path.isfile(smallfile) and \
                os.path.getctime(smallfile) > os.path.getctime(bigfile):
            LOG.info("Using the cheap mapping file %s", smallfile)
            with open(smallfile, 'r') as fh:
                id_map = yaml.safe_load(fh)
        else:
            LOG.info(
                "Expensive Mapping from Uniprot ids to Entrez/ENSEMBL gene ids for %s",
                str(self.tax_ids))
            self.fetch_from_url(self.files['id-map']['url'], bigfile)
            with gzip.open(bigfile, 'rb') as csvfile:
                csv.field_size_limit(sys.maxsize)
                filereader = csv.reader(  # warning this file is over 10GB unzipped
                    io.TextIOWrapper(csvfile, newline=""),
                    delimiter='\t',
                    quotechar='\"')
                for row in filereader:
                    (uniprotkb_ac, uniprotkb_id, geneid, refseq, gi, pdb, go,
                     uniref100, unifref90, uniref50, uniparc, pir, ncbitaxon,
                     mim, unigene, pubmed, embl, embl_cds, ensembl,
                     ensembl_trs, ensembl_pro, other_pubmed) = row
                    if str(ncbitaxon) not in self.tax_ids:
                        continue
                    genid = geneid.strip()
                    if geneid != '' and ';' not in genid:
                        id_map[uniprotkb_ac.strip()] = 'NCBIGene:' + genid
                    elif ensembl.strip() != '' and ';' not in ensembl:
                        id_map[uniprotkb_ac.strip(
                        )] = 'ENSEMBL:' + ensembl.strip()

            LOG.info("Writing id_map out as %s", smallfile)
            with open(smallfile, 'w') as fh:
                yaml.dump(id_map, fh)

        LOG.info("Acquired %i 1:1 uniprot to [entrez|ensembl] mappings",
                 len(id_map.keys()))

        return id_map
Пример #4
0
    def get_uniprot_entrez_id_map(self):
        taxon_digest = GraphUtils.digest_id(str(self.tax_ids))
        id_map = {}
        smallfile = '/'.join((self.rawdir, 'id_map_' + taxon_digest + '.yaml'))
        bigfile = '/'.join((self.rawdir, self.files['id-map']['file']))

        # if processed smallfile exists and is newer use it instesd
        if os.path.isfile(smallfile) and \
                os.path.getctime(smallfile) > os.path.getctime(bigfile):
            LOG.info("Using the cheap mapping file %s", smallfile)
            with open(smallfile, 'r') as fh:
                id_map = yaml.safe_load(fh)
        else:
            LOG.info(
                "Expensive Mapping from Uniprot ids to Entrez/ENSEMBL gene ids for %s",
                str(self.tax_ids))
            self.fetch_from_url(self.files['id-map']['url'], bigfile)
            with gzip.open(bigfile, 'rb') as csvfile:
                csv.field_size_limit(sys.maxsize)
                filereader = csv.reader(  # warning this file is over 10GB unzipped
                    io.TextIOWrapper(
                        csvfile, newline=""), delimiter='\t', quotechar='\"')
                for row in filereader:
                    (uniprotkb_ac, uniprotkb_id, geneid, refseq, gi, pdb, go,
                     uniref100, unifref90, uniref50, uniparc, pir, ncbitaxon, mim,
                     unigene, pubmed, embl, embl_cds, ensembl, ensembl_trs,
                     ensembl_pro, other_pubmed) = row
                    if int(ncbitaxon) not in self.tax_ids:
                        continue
                    genid = geneid.strip()
                    if geneid != '' and ';' not in genid:
                        id_map[uniprotkb_ac.strip()] = 'NCBIGene:' + genid
                    elif ensembl.strip() != '' and ';' not in ensembl:
                        id_map[uniprotkb_ac.strip()] = 'ENSEMBL:' + ensembl.strip()

            LOG.info("Writing id_map out as %s", smallfile)
            with open(smallfile, 'w') as fh:
                yaml.dump(id_map, fh)

        LOG.info(
            "Acquired %i 1:1 uniprot to [entrez|ensembl] mappings", len(id_map.keys()))

        return id_map
Пример #5
0
class Pathway():
    """
    This provides convenience methods to deal with gene and protein collections
    in the context of pathways.
    """
    def __init__(self, graph):
        if isinstance(graph, Graph):
            self.graph = graph
        else:
            raise ValueError("{} is not a graph".format(graph))
        self.model = Model(self.graph)
        self.globaltt = self.graph.globaltt
        self.globaltcid = self.graph.globaltcid
        self.curie_map = self.graph.curie_map
        self.gut = GraphUtils(self.curie_map)

    def addPathway(self,
                   pathway_id,
                   pathway_label,
                   pathway_type=None,
                   pathway_description=None):
        """
        Adds a pathway as a class.  If no specific type is specified, it will
        default to a subclass of "GO:cellular_process" and "PW:pathway".
        :param pathway_id:
        :param pathway_label:
        :param pathway_type:
        :param pathway_description:
        :return:
        """

        if pathway_type is None:
            pathway_type = self.globaltt['cellular_process']
        self.model.addClassToGraph(pathway_id, pathway_label, pathway_type,
                                   pathway_description)
        self.model.addSubClass(pathway_id, self.globaltt['pathway'])

    def addGeneToPathway(self, gene_id, pathway_id):
        """
        When adding a gene to a pathway, we create an intermediate
        'gene product' that is involved in
        the pathway, through a blank node.

        gene_id RO:has_gene_product _gene_product
        _gene_product RO:involved_in pathway_id

        :param pathway_id:
        :param gene_id:
        :return:
        """
        # bnode
        gene_product = ':'.join(
            ('_', self.gut.digest_id(gene_id.replace(':', '') + 'product')))
        self.model.addIndividualToGraph(gene_product, None,
                                        self.globaltt['gene_product'])
        self.graph.addTriple(gene_product, self.globaltt['label'], pathway_id)

        self.graph.addTriple(gene_id, self.globaltt['has gene product'],
                             gene_product)
        self.addComponentToPathway(gene_product, pathway_id)

    def addComponentToPathway(self, component_id, pathway_id):
        """
        This can be used directly when the component is directly involved in
        the pathway.  If a transforming event is performed on the component
        first, then the addGeneToPathway should be used instead.

        :param pathway_id:
        :param component_id:
        :param component_category: biolink category for component_id
        :param pathway_category: biolink category for pathway_id
        :return:
        """
        self.graph.addTriple(component_id, self.globaltt['involved in'],
                             pathway_id)
Пример #6
0
class Model():
    """
    Utility class to add common triples to a graph
    (subClassOf, type, label, sameAs)
    """
    def __init__(self, graph):
        if isinstance(graph, Graph):
            self.graph = graph
            self.globaltt = self.graph.globaltt
            self.globaltcid = self.graph.globaltcid
            self.curie_map = self.graph.curie_map

        else:
            raise ValueError("{} is not a graph".format(graph))

        self.gut = GraphUtils(None)  # self.curie_map

    def addTriple(self,
                  subject_id,
                  predicate_id,
                  obj,
                  object_is_literal=False,
                  literal_type=None,
                  subject_category=None,
                  object_category=None):
        self.graph.addTriple(subject_id,
                             predicate_id,
                             obj,
                             object_is_literal,
                             literal_type,
                             subject_category=subject_category,
                             object_category=object_category)

    def addType(self,
                subject_id,
                subject_type,
                subject_category=None,
                subject_type_category=None):
        self.graph.addTriple(subject_id,
                             self.globaltt['type'],
                             subject_type,
                             subject_category=subject_category,
                             object_category=subject_type_category)

    def addLabel(self, subject_id, label, subject_category=None):
        if label != '':
            self.graph.addTriple(subject_id,
                                 self.globaltt['label'],
                                 label,
                                 object_is_literal=True,
                                 subject_category=subject_category)
        # warn

    def addClassToGraph(self,
                        class_id,
                        label=None,
                        class_type=None,
                        description=None,
                        class_category=None,
                        class_type_category=None):
        """
        Any node added to the graph will get at least 3 triples:
        *(node, type, owl:Class) and
        *(node, label, literal(label))
        *if a type is added,
            then the node will be an OWL:subclassOf that the type
        *if a description is provided,
            it will also get added as a dc:description
        :param class_id:
        :param label:
        :param class_type:
        :param description:
        :param class_category: a biolink category CURIE for class
        :param class_type_category: a biolink category CURIE for class type
        :return:

        """
        if class_id is None:
            raise ValueError("class_id is None")

        self.graph.addTriple(class_id,
                             self.globaltt['type'],
                             self.globaltt['class'],
                             subject_category=class_category)
        if label is not None and label != '':
            self.graph.addTriple(class_id,
                                 self.globaltt['label'],
                                 label,
                                 object_is_literal=True)

        if class_type is not None:
            self.graph.addTriple(class_id,
                                 self.globaltt['subclass_of'],
                                 class_type,
                                 object_category=class_type_category)
        if description is not None and description != '':
            self.graph.addTriple(class_id,
                                 self.globaltt['description'],
                                 description,
                                 object_is_literal=True)

    def addIndividualToGraph(self,
                             ind_id,
                             label,
                             ind_type=None,
                             description=None,
                             ind_category=None,
                             ind_type_category=None):
        if label is not None and label != '':
            self.graph.addTriple(ind_id,
                                 self.globaltt['label'],
                                 label,
                                 object_is_literal=True)
        if ind_type is not None:
            self.graph.addTriple(ind_id,
                                 self.globaltt['type'],
                                 ind_type,
                                 object_is_literal=False,
                                 subject_category=ind_category,
                                 object_category=ind_type_category)
        else:
            self.graph.addTriple(ind_id,
                                 self.globaltt['type'],
                                 self.globaltt['named_individual'],
                                 subject_category=ind_category)
        if description is not None and description != '':
            self.graph.addTriple(ind_id,
                                 self.globaltt['description'],
                                 description,
                                 object_is_literal=True)

    def addEquivalentClass(self,
                           sub,
                           obj,
                           subject_category=None,
                           object_category=None):
        self.graph.addTriple(sub,
                             self.globaltt['equivalent_class'],
                             obj,
                             object_is_literal=False,
                             subject_category=subject_category,
                             object_category=object_category)

    def addSameIndividual(self,
                          sub,
                          obj,
                          subject_category=None,
                          object_category=None):
        self.graph.addTriple(sub,
                             self.globaltt['same_as'],
                             obj,
                             object_is_literal=False,
                             subject_category=subject_category,
                             object_category=object_category)

    def addOWLPropertyClassRestriction(self,
                                       class_id,
                                       property_id,
                                       property_value,
                                       class_category=None,
                                       property_id_category=None,
                                       property_value_category=None):
        # make a bnode to hold the property restrictions
        uniq_str = '-'.join((property_id, property_value))
        bnode = ':'.join(('_', self.gut.digest_id(uniq_str)))

        self.graph.addTriple(bnode, self.globaltt['type'],
                             self.globaltt['restriction'])
        self.graph.addTriple(bnode, self.globaltt['label'], uniq_str)
        self.graph.addTriple(bnode,
                             self.globaltt['on_property'],
                             property_id,
                             object_category=property_id_category)
        self.graph.addTriple(bnode,
                             self.globaltt['some_values_from'],
                             property_value,
                             object_category=property_value_category)
        self.graph.addTriple(class_id,
                             self.globaltt['subclass_of'],
                             bnode,
                             object_is_literal=False,
                             subject_category=class_category)

    def addPerson(self, person_id, person_label=None):
        self.graph.addTriple(person_id, self.globaltt['type'],
                             self.globaltt['person'])
        if person_label is not None and person_label != '':
            self.graph.addTriple(person_id,
                                 self.globaltt['label'],
                                 person_label,
                                 object_is_literal=True)

    def addDeprecatedClass(self,
                           old_id,
                           new_ids=None,
                           old_id_category=None,
                           new_ids_category=None):
        """
        Will mark the oldid as a deprecated class.
        if one newid is supplied, it will mark it as replaced by.
        if >1 newid is supplied, it will mark it with consider properties
        :param old_id: str - the class id to deprecate
        :param new_ids: list - the class list that is
                       the replacement(s) of the old class.  Not required.
        :param old_id_category - a biolink category CURIE for old id
        :param new_ids_category - a biolink category CURIE for new ids
        :return: None

        """
        self.graph.addTriple(old_id,
                             self.globaltt['type'],
                             self.globaltt['class'],
                             subject_category=old_id_category)

        self._addReplacementIds(old_id,
                                new_ids,
                                new_ids_category=new_ids_category)

    def _addReplacementIds(self, old_id, new_ids, new_ids_category=None):

        self.graph.addTriple(old_id,
                             self.globaltt['deprecated'],
                             True,
                             object_is_literal=True,
                             literal_type='xsd:boolean')

        if new_ids is not None:
            if isinstance(new_ids, str):
                self.graph.addTriple(old_id, self.globaltt['term replaced by'],
                                     new_ids)
            elif len(new_ids) == 1:
                self.graph.addTriple(old_id,
                                     self.globaltt['term replaced by'],
                                     new_ids[0],
                                     object_category=new_ids_category)
            elif new_ids:
                for new_id in new_ids:
                    self.graph.addTriple(old_id,
                                         self.globaltt['consider'],
                                         new_id,
                                         object_category=new_ids_category)

    def addDeprecatedIndividual(self,
                                old_id,
                                new_ids=None,
                                old_id_category=None,
                                new_id_category=None):
        """
        Will mark the oldid as a deprecated individual.
        if one newid is supplied, it will mark it as replaced by.
        if >1 newid is supplied, it will mark it with consider properties
        :param g:
        :param oldid: the individual id to deprecate
        :param newids: the individual idlist that is the replacement(s) of
                       the old individual.  Not required.
        :param old_id_category - a biolink category CURIE for old id
        :param new_ids_category - a biolink category CURIE for new ids
        :return:

        """
        self.graph.addTriple(old_id,
                             self.globaltt['type'],
                             self.globaltt['named_individual'],
                             subject_category=old_id_category)

        self._addReplacementIds(old_id,
                                new_ids,
                                new_ids_category=new_id_category)

    def addSubClass(self,
                    child_id,
                    parent_id,
                    child_category=None,
                    parent_category=None):
        self.graph.addTriple(child_id,
                             self.globaltt['subclass_of'],
                             parent_id,
                             object_is_literal=False,
                             subject_category=child_category,
                             object_category=parent_category)

    def addSynonym(self,
                   class_id,
                   synonym,
                   synonym_type=None,
                   class_category=None):
        """
        Add the synonym as a property of the class cid.
        Assume it is an exact synonym, unless otherwise specified
        :param self:
        :param class_id: class id
        :param synonym: the literal synonym label
        :param synonym_type: the CURIE of the synonym type (not the URI)
        :param class_category: biolink category CURIE for class_id
        (no biolink category is possible for synonym, since this is added to the triple
        as a literal)
        :return:

        """
        if synonym_type is None:
            synonym_type = self.globaltt['has_exact_synonym']

        if synonym is not None and synonym != '':
            self.graph.addTriple(class_id,
                                 synonym_type,
                                 synonym,
                                 object_is_literal=True,
                                 subject_category=class_category)
            # todo warn

    def addDefinition(self, class_id, definition, class_category=None):
        self.graph.addTriple(class_id,
                             self.globaltt['definition'],
                             definition,
                             object_is_literal=True,
                             subject_category=class_category)

    def addXref(self,
                class_id,
                xref_id,
                xref_as_literal=False,
                class_category=None,
                xref_category=None):
        self.graph.addTriple(class_id,
                             self.globaltt['database_cross_reference'],
                             xref_id,
                             object_is_literal=xref_as_literal,
                             subject_category=class_category,
                             object_category=xref_category)

    def addDepiction(self, subject_id, image_url):
        self.graph.addTriple(subject_id, self.globaltt['depiction'], image_url)

    def addComment(self, subject_id, comment, subject_category=None):
        self.graph.addTriple(subject_id,
                             self.globaltt['comment'],
                             comment.strip(),
                             object_is_literal=True,
                             subject_category=subject_category)

    def addDescription(self, subject_id, description, subject_category=None):
        description = description.strip()
        if description is not None and description != '':
            self.graph.addTriple(subject_id,
                                 self.globaltt['description'],
                                 description,
                                 object_is_literal=True,
                                 subject_category=subject_category)
            # todo: warn; but only when we can say where it came from

    def addOntologyDeclaration(self, ontology_id):
        self.graph.addTriple(ontology_id, self.globaltt['type'],
                             self.globaltt['ontology'])

    def addOWLVersionIRI(self, ontology_id, version_iri):
        self.graph.addTriple(ontology_id,
                             self.globaltt['version_iri'],
                             version_iri,
                             object_is_literal=False)

    def addOWLVersionInfo(self, ontology_id, version_info):
        self.graph.addTriple(ontology_id,
                             self.globaltt['version_info'],
                             version_info,
                             object_is_literal=True)

    def makeLeader(self, node_id):
        """
        Add an annotation property to the given ```node_id```
        to be the clique_leader.
        This is a monarchism.
        :param node_id:
        :param node_category: a biolink category CURIE for node_id
        :return:
        """
        self.graph.addTriple(node_id,
                             self.globaltt['clique_leader'],
                             True,
                             object_is_literal=True,
                             literal_type='xsd:boolean')

    def addBlankNodeAnnotation(self, node_id):
        """
        Add an annotation property to the given ```node_id```
        to be a pseudo blank node.
        This is a monarchism.
        :param node_id:
        :return:
        """
        self.graph.addTriple(node_id,
                             self.globaltt['is_anonymous'],
                             True,
                             object_is_literal=True,
                             literal_type='xsd:boolean')

    def _addSexSpecificity(self, subject_id, sex, subject_category=None):
        """
        Add sex specificity to a subject (eg association node)

        In our modeling we use this to add a qualifier to a triple
        for example, this genotype to phenotype association
        is specific to this sex (see MGI, IMPC)

        This expects the client to define the ontology term
        for sex (eg PATO)

        Note this class is probably not the right place for this
        method, but putting here until a better home is found
        :param subject_id:
        :param subject_category: a biolink category CURIE for subject_id
        :param sex:
        :return:
        """
        self.graph.addTriple(subject_id,
                             self.globaltt['has_sex_specificty'],
                             sex,
                             subject_category=subject_category,
                             object_category=blv.terms['BiologicalSex'])
Пример #7
0
    def get_uniprot_entrez_id_map(self):
        src_key = 'id-map'
        taxon_digest = GraphUtils.digest_id(str(self.tax_ids))
        id_map = {}
        smallfile = '/'.join((self.rawdir, 'id_map_' + taxon_digest + '.yaml'))
        bigfile = '/'.join((self.rawdir, self.files[src_key]['file']))

        # if processed smallfile exists and is newer than bigfile then use it instesd
        if os.path.isfile(smallfile) and \
                os.path.getctime(smallfile) > os.path.getctime(bigfile):
            LOG.info("Using the cheap mapping file %s", smallfile)
            with open(smallfile, 'r') as yamlreader:
                id_map = yaml.safe_load(yamlreader)
        else:
            LOG.info(
                "Expensive Mapping from Uniprot IDs to Entrez/ENSEMBL gene ids for %s",
                self.tax_ids)
            self.fetch_from_url(self.files[src_key]['url'], bigfile)
            col = self.files[src_key]['columns']
            ummapped_uniprot = 0
            with gzip.open(bigfile, 'rb') as csvfile:
                csv.field_size_limit(sys.maxsize)
                reader = csv.reader(  # warning this file is over 10GB unzipped
                    io.TextIOWrapper(csvfile, newline=""),
                    delimiter='\t', quotechar='\"')
                for row in reader:
                    uniprotkb_ac = row[col.index('UniProtKB-AC')].strip()
                    # uniprotkb_id = row[col.index('UniProtKB-ID')]
                    geneid = row[col.index('GeneID (EntrezGene)')].strip()
                    # refseq = row[col.index('RefSeq')]
                    # gi = row[col.index('GI')]
                    # pdb = row[col.index('PDB')]
                    # go = row[col.index('GO')]
                    # uniref100 = row[col.index('UniRef100')]
                    # unifref90 = row[col.index('UniRef90')]
                    # uniref50 = row[col.index('UniRef50')]
                    # uniparc = row[col.index('UniParc')]
                    # pir = row[col.index('PIR')]
                    ncbitaxon = row[col.index('NCBI-taxon')].strip()
                    # mim = row[col.index('MIM')]
                    # unigene = row[col.index('UniGene')]
                    # pubmed = row[col.index('PubMed')]
                    # embl = row[col.index('EMBL')]
                    # embl_cds = row[col.index('EMBL-CDS')]
                    ensembl = row[col.index('Ensembl')].strip()
                    # ensembl_trs = row[col.index('Ensembl_TRS')]
                    # ensembl_pro = row[col.index('Ensembl_PRO')]
                    # other_pubmed = row[col.index('Additional PubMed')]

                    if ncbitaxon not in self.tax_ids:
                        continue

                    # neither empty nor a list
                    if geneid != '' and ';' not in geneid:
                        id_map[uniprotkb_ac] = 'NCBIGene:' + geneid
                    elif ensembl != '' and ';' not in ensembl:
                        id_map[uniprotkb_ac] = 'ENSEMBL:' + ensembl
                    else:
                        ummapped_uniprot += 1

            LOG.info("Writing id_map out as %s", smallfile)
            with open(smallfile, 'w') as yamlwriter:
                yaml.dump(id_map, yamlwriter)
            LOG.warning('Did not find 1:1 gene IDs for %i uniprots', ummapped_uniprot)
        LOG.info(
            "Acquired %i 1:1 uniprot to [entrez|ensembl] mappings", len(id_map.keys()))

        return id_map
Пример #8
0
class Feature():
    """
    Dealing with genomic features here.  By default they are all faldo:Regions.
    We use SO for typing genomic features. At the moment,
    RO:has_subsequence is the default relationship
    between the regions, but this should be tested/verified.

    TODO:
    the graph additions are in the addXToFeature functions,
    but should be separated.
    TODO:
    this will need to be extended to properly deal with
    fuzzy positions in faldo.

    """

    def __init__(
            self,
            graph,
            feature_id=None,
            label=None,
            feature_type=None,
            description=None,
            feature_category=None
    ):

        if isinstance(graph, Graph):
            self.graph = graph
        else:
            raise ValueError("{} is not a graph".format(graph))
        self.model = Model(self.graph)
        self.globaltt = self.graph.globaltt
        self.globaltcid = self.graph.globaltcid
        self.curie_map = self.graph.curie_map
        self.gfxutl = GraphUtils(self.curie_map)
        self.fid = feature_id
        self.feature_category = feature_category
        self.label = label
        self.ftype = feature_type
        self.description = description
        self.start = None
        self.stop = None
        self.taxon = None

    def addFeatureStartLocation(
            self, coordinate, reference_id, strand=None, position_types=None
    ):
        """
        Adds coordinate details for the start of this feature.
        :param coordinate:
        :param reference_id:
        :param strand:
        :param position_types:

        """

        # make an object for the start, which has:
        # {coordinate : integer, reference : reference_id, types = []}
        self.start = self._getLocation(coordinate, reference_id, strand, position_types)

    def addFeatureEndLocation(
            self, coordinate, reference_id, strand=None, position_types=None
    ):
        """
        Adds the coordinate details for the end of this feature
        :param coordinate:
        :param reference_id:
        :param strand:

        """

        self.stop = self._getLocation(coordinate, reference_id, strand, position_types)

    def _getLocation(self, coordinate, reference_id, strand, position_types):
        """
        Make an object for the location, which has:
        {coordinate : integer, reference : reference_id, types = []}
        where the strand is indicated in the type array
        :param coordinate:
        :param reference_id:
        :param strand:
        :param position_types:

        """

        loc = {}
        loc['coordinate'] = coordinate
        loc['reference'] = reference_id
        loc['type'] = []
        strand_id = self._getStrandType(strand)
        if strand_id is not None:
            loc['type'].append(strand_id)
        if position_types is not None:
            loc['type'] += position_types
        if position_types == []:
            loc['type'].append(self.globaltt['Position'])

        return loc

    def _getStrandType(self, strand):
        """
        :param strand:
        """
        strand_id = None
        if strand == '+':
            strand_id = self.globaltt['plus_strand']
        elif strand == '-':
            strand_id = self.globaltt['minus_strand']
        elif strand == '.':
            strand_id = self.globaltt['both_strand']
        elif strand is None:  # assume this is Unknown
            pass
        else:
            LOG.warning("strand type could not be mapped: %s", str(strand))

        return strand_id

    def addFeatureToGraph(
            self, add_region=True, region_id=None, feature_as_class=False,
            feature_category=None):
        """
        We make the assumption here that all features are instances.
        The features are located on a region,
        which begins and ends with faldo:Position
        The feature locations leverage the Faldo model,
        which has a general structure like:
        Triples:
        feature_id a feature_type (individual)
        faldo:location region_id
        region_id a faldo:region
        faldo:begin start_position
        faldo:end end_position
        start_position a
        (any of: faldo:(((Both|Plus|Minus)Strand)|Exact)Position)
        faldo:position Integer(numeric position)
        faldo:reference reference_id
        end_position a
        (any of: faldo:(((Both|Plus|Minus)Strand)|Exact)Position)
        faldo:position Integer(numeric position)
        faldo:reference reference_id

        :param add_region [True]
        :param region_id [None]
        :param feature_as_class [False]
        :param feature_category: a biolink category CURIE for feature
        """

        if feature_category is None:
            feature_category = self.feature_category

        if feature_as_class:
            self.model.addClassToGraph(
                self.fid, self.label, self.ftype, self.description,
                class_category=feature_category)
        else:
            self.model.addIndividualToGraph(
                self.fid, self.label, self.ftype, self.description,
                ind_category=feature_category)

        if self.start is None and self.stop is None:
            add_region = False

        if add_region:
            # create a region that has the begin/end positions
            regionchr = re.sub(r'\w+\:_?', '', self.start['reference'])
            if region_id is None:
                # in case the values are undefined
                # if we know only one of the coordinates,
                # then we'll add an "unknown" other.
                st = sp = 'UN'
                strand = None
                if self.start is not None and self.start['coordinate'] is not None:
                    st = str(self.start['coordinate'])
                    strand = self._getStrandStringFromPositionTypes(self.start['type'])
                if self.stop is not None and self.stop['coordinate'] is not None:
                    sp = str(self.stop['coordinate'])
                    if strand is not None:
                        strand = self._getStrandStringFromPositionTypes(
                            self.stop['type'])
                # assume that the strand is the same for both start and stop.
                # this will need to be fixed in the future
                region_items = [regionchr, st, sp]
                if strand is not None:
                    region_items += [strand]
                region_id = '-'.join(region_items)
                rid = region_id
                rid = re.sub(r'\w+\:', '', rid, 1)  # replace the id prefix
                # blank node, bnode
                rid = rid + "-Region"
                curie = '_:' + self.gfxutl.digest_id(rid)
                self.model.addLabel(curie, rid)
                region_id = curie

            self.graph.addTriple(
                self.fid,
                self.globaltt['location'],
                region_id,
                subject_category=feature_category
            )
            self.model.addIndividualToGraph(region_id, None, self.globaltt['Region'])
        else:
            region_id = self.fid
            self.model.addType(region_id, self.globaltt['region'])

        # add the start/end positions to the region
        beginp = endp = None
        if self.start is not None:
            beginp = self._makePositionId(
                self.start['reference'], self.start['coordinate'], self.start['type'])
            self.addPositionToGraph(
                self.start['reference'], self.start['coordinate'], self.start['type'],
            )

        if self.stop is not None:
            endp = self._makePositionId(
                self.stop['reference'], self.stop['coordinate'], self.stop['type'])
            self.addPositionToGraph(
                self.stop['reference'], self.stop['coordinate'], self.stop['type'])

        self.addRegionPositionToGraph(region_id, beginp, endp)

        # {coordinate : integer, reference : reference_id, types = []}

    def _getStrandStringFromPositionTypes(self, tylist):
        strand = None
        if self.globaltt['plus_strand'] in tylist:
            strand = 'plus'
        elif self.globaltt['minus_strand'] in tylist:
            strand = 'minus'
        elif self.globaltt['both_strand'] in tylist:
            strand = 'both'
        else:
            strand = None  # it is stranded, but we don't know what it is

        return strand

    def _makePositionId(self, reference, coordinate, types=None):
        """
        Note that positions should have a reference (we will enforce).
        Only exact positions need a coordinate.
        :param reference:
        :param coordinate:
        :param types:
        :return: bnode_curie
        """
        # blank node, bnode
        if reference is None:
            LOG.error("Trying to make position with no reference.")
            return None

        reference = re.sub(r'\w+\:', '', reference, 1)
        if reference[0] == '_':
            # in this case the reference is a bnode curie as well
            # ... this is a bad smell of over modleing
            reference = reference[1:]
        unique_words = reference
        if coordinate is not None:
            # just in case it isn't a string already
            unique_words = '-'.join((unique_words, str(coordinate)))
        if types is not None:
            tstring = self._getStrandStringFromPositionTypes(types)
            if tstring is not None:
                unique_words = '-'.join((unique_words, tstring))

        curie = '_:' + self.gfxutl.digest_id(unique_words)

        # attach the wordage via a label
        # I want to see more of this (TEC 201905)
        # including a type should be mandatory as well
        self.model.addLabel(curie, unique_words)
        return curie

    def addRegionPositionToGraph(self, region_id, begin_position_id, end_position_id):

        if begin_position_id is None:
            pass
            # LOG.warn("No begin position specified for region %s", region_id)
        else:
            self.graph.addTriple(region_id, self.globaltt['begin'], begin_position_id)

        if end_position_id is None:
            pass
            # LOG.warn("No end position specified for region %s", region_id)
        else:
            self.graph.addTriple(region_id, self.globaltt['end'], end_position_id)

    def addPositionToGraph(
            self, reference_id, position, position_types=None, strand=None
    ):
        """
        Add the positional information to the graph, following the faldo model.
        We assume that if the strand is None,
        we give it a generic "Position" only.
        Triples:
        my_position a (any of: faldo:(((Both|Plus|Minus)Strand)|Exact)Position)
        faldo:position Integer(numeric position)
        faldo:reference reference_id

        :param graph:
        :param reference_id:
        :param position:
        :param position_types:
        :param strand:

        :return:  Identifier of the position created

        """
        pos_id = self._makePositionId(reference_id, position, position_types)
        if position is not None:
            self.graph.addTriple(
                pos_id,
                self.globaltt['position'],
                position,
                object_is_literal=True,
                literal_type="xsd:integer"
            )
        self.graph.addTriple(
            pos_id, self.globaltt['reference'], reference_id
        )
        if position_types is not None:
            for pos_type in position_types:
                self.model.addType(pos_id, pos_type)
        strnd = None
        if strand is not None:
            strnd = strand
            if not re.match(r'faldo', strand):
                # not already mapped to faldo, so expect we need to map it
                strnd = self._getStrandType(strand)
        # else:
        #    strnd = self.globaltt['both_strand']
        if strnd is None and (position_types is None or position_types == []):
            strnd = self.globaltt['Position']

        if strnd is not None:
            self.model.addType(pos_id, strnd)

        return pos_id

    def addSubsequenceOfFeature(
            self, parentid, subject_category=None, object_category=None
    ):
        """
        This will add reciprocal triples like:
        feature <is subsequence of> parent
        parent has_subsequence feature
        :param graph:
        :param parentid:

        :return:

        """
        self.graph.addTriple(
            self.fid,
            self.globaltt['is subsequence of'],
            parentid,
            subject_category=subject_category,
            object_category=object_category
        )
        # this should be expected to be done in reasoning not ETL
        self.graph.addTriple(
            parentid,
            self.globaltt['has subsequence'],
            self.fid,
            subject_category=object_category,
            object_category=subject_category
        )

    def addTaxonToFeature(self, taxonid):
        """
        Given the taxon id, this will add the following triple:
        feature in_taxon taxonid
        :param graph:
        :param taxonid:
        :return:
        """
        self.taxon = taxonid
        self.graph.addTriple(
            self.fid,
            self.globaltt['in taxon'],
            self.taxon,
            subject_category=self.feature_category
        )

    def addFeatureProperty(self, property_type, feature_property):

        self.graph.addTriple(
            self.fid,
            property_type,
            feature_property,
            subject_category=self.feature_category
        )
Пример #9
0
class G2PAssoc(Assoc):
    """
    A specific association class for defining Genotype-to-Phenotype
    relationships. This assumes that a graph is created outside of this class,
    and nodes get added.
    By default, an association will assume the "has_phenotype" relationship,
    unless otherwise specified.
    Note that genotypes are expected to be
    created and defined outside of this association,
    most likely by calling methods in the Genotype() class.

    """
    def __init__(self,
                 graph,
                 definedby,
                 entity_id,
                 phenotype_id,
                 rel=None,
                 entity_category=None,
                 phenotype_category=None):
        super().__init__(graph, definedby)
        self.entity_id = entity_id
        self.phenotype_id = phenotype_id

        if rel is None:
            rel = self.globaltt['has phenotype']

        self.start_stage_id = None
        self.end_stage_id = None
        self.environment_id = None
        self.stage_process_id = None

        self.set_subject(entity_id)
        self.set_object(phenotype_id)
        self.set_relationship(rel)

        self.subject_category = entity_category
        self.object_category = phenotype_category
        self.gut = GraphUtils(None)

        return

    def set_stage(self, start_stage_id, end_stage_id):
        if start_stage_id is not None and start_stage_id.strip() != '':
            self.start_stage_id = start_stage_id
        if end_stage_id is not None and end_stage_id.strip() != '':
            self.end_stage_id = end_stage_id

    def set_environment(self, environment_id):
        if environment_id is not None and environment_id.strip() != '':
            self.environment_id = environment_id

    def set_association_id(self, assoc_id=None):

        if assoc_id is None:
            self.assoc_id = self.make_g2p_id()
        else:
            self.assoc_id = assoc_id

    def add_association_to_graph(self,
                                 entity_category=None,
                                 phenotype_category=None):
        """
        Overrides  Association by including bnode support

        The reified relationship between a genotype (or any genotype part)
        and a phenotype is decorated with some provenance information.
        This makes the assumption that
        both the genotype and phenotype are classes.

        currently hardcoded to map the annotation to the monarch namespace
        :param g:
        :param entity_category: a biolink category CURIE for self.sub
        :param phenotype_category: a biolink category CURIE for self.obj
        :return:
        """
        # is this kosher?
        Assoc.add_association_to_graph(self)

        # make a blank stage
        if self.start_stage_id or self.end_stage_id is not None:
            stage_process_str = '-'.join(
                (str(self.start_stage_id), str(self.end_stage_id)))
            stage_process_id = ':'.join(  # bnode
                ('_', self.gut.digest_id(stage_process_str)))
            self.model.addIndividualToGraph(
                stage_process_id, None, self.globaltt['developmental_process'])
            self.graph.addTriple(stage_process_id, self.globaltt['label'],
                                 stage_process_str)

            self.graph.addTriple(stage_process_id,
                                 self.globaltt['starts during'],
                                 self.start_stage_id)

            self.graph.addTriple(stage_process_id,
                                 self.globaltt['ends during'],
                                 self.end_stage_id)

            self.stage_process_id = stage_process_id
            self.graph.addTriple(self.assoc_id, self.globaltt['has_qualifier'],
                                 self.stage_process_id)

        if self.environment_id is not None:
            self.graph.addTriple(self.assoc_id, self.globaltt['has_qualifier'],
                                 self.environment_id)

    def make_g2p_id(self):
        """
        Make an association id for phenotypic associations that is defined by:
        source of association +
        (Annot subject) +
        relationship +
        phenotype/disease +
        environment +
        start stage +
        end stage

        :return:

        """

        attributes = [
            self.environment_id, self.start_stage_id, self.end_stage_id
        ]
        assoc_id = self.make_association_id(self.definedby, self.entity_id,
                                            self.rel, self.phenotype_id,
                                            attributes)

        return assoc_id
Пример #10
0
class Genotype():
    """
    These methods provide convenient methods to
    add items related to a genotype and it's parts to a supplied graph.
    They follow the patterns set out in
    GENO https://github.com/monarch-initiative/GENO-ontology.
    For specific sequence features,
    we use the GenomicFeature class to create them.

    """
    def __init__(self, graph):
        if isinstance(graph, Graph):
            self.graph = graph
        else:
            raise ValueError("{} is not a graph".format(graph))
        self.model = Model(self.graph)
        self.globaltt = self.graph.globaltt
        self.globaltcid = self.graph.globaltcid
        self.curie_map = self.graph.curie_map
        self.gut = GraphUtils(self.curie_map)

    def addGenotype(self,
                    genotype_id,
                    genotype_label,
                    genotype_type=None,
                    genotype_description=None):
        """
        If a genotype_type is not supplied,
        we will default to 'intrinsic genotype'
        :param genotype_id:
        :param genotype_label:
        :param genotype_type:
        :param genotype_description:
        :return:

        """
        if genotype_type is None:
            genotype_type = self.globaltt['intrinsic genotype']

        self.model.addIndividualToGraph(genotype_id, genotype_label,
                                        genotype_type, genotype_description)

    def addAllele(self,
                  allele_id,
                  allele_label,
                  allele_type=None,
                  allele_description=None):
        """
        Make an allele object.
        If no allele_type is added, it will default to a geno:allele
        :param allele_id: curie for allele (required)
        :param allele_label: label for allele (required)
        :param allele_type: id for an allele type (optional,
        recommended SO or GENO class)
        :param allele_description: a free-text description of the allele
        :return:

        """

        # TODO should we accept a list of allele types?
        if allele_type is None:
            allele_type = self.globaltt['allele']  # TODO is this a good idea?
        self.model.addIndividualToGraph(allele_id, allele_label, allele_type,
                                        allele_description)

    def addGene(self,
                gene_id,
                gene_label=None,
                gene_type=None,
                gene_description=None):
        ''' genes are classes '''
        if gene_type is None:
            gene_type = self.globaltt['gene']
        self.model.addClassToGraph(gene_id, gene_label, gene_type,
                                   gene_description)

    def addConstruct(self,
                     construct_id,
                     construct_label,
                     construct_type=None,
                     construct_description=None,
                     construct_category=None,
                     construct_type_category=None):
        """
        :param construct_id:
        :param construct_label:
        :param construct_type:
        :param construct_description:
        :param construct_category: a biolink category CURIE for construct_id
        :param construct_type_category: a biolink category CURIE for construct_type
        :return:

        """
        # TODO add base type for construct
        # if (constrcut_type is None):
        #    construct_type=self.construct_base_type
        self.model.addIndividualToGraph(
            construct_id,
            construct_label,
            construct_type,
            construct_description,
            ind_category=construct_category,
            ind_type_category=construct_type_category)

    def addDerivesFrom(self,
                       child_id,
                       parent_id,
                       child_category=None,
                       parent_category=None):
        """
        We add a derives_from relationship between the child and parent id.
        Examples of uses include between:
        an allele and a construct or strain here,
        a cell line and it's parent genotype.  Adding the parent and child to
        the graph should happen outside of this function call to ensure graph
        integrity.
        :param child_id:
        :param parent_id:
        :return:

        """

        self.graph.addTriple(child_id,
                             self.globaltt['derives_from'],
                             parent_id,
                             subject_category=child_category,
                             object_category=parent_category)

    def addSequenceDerivesFrom(self,
                               child_id,
                               parent_id,
                               child_category=None,
                               parent_category=None):
        self.graph.addTriple(child_id,
                             self.globaltt['sequence_derives_from'],
                             parent_id,
                             subject_category=child_category,
                             object_category=parent_category)

        return

    def addAlleleOfGene(self, allele_id, gene_id, rel_id=None):
        """
        We make the assumption here that if the relationship is not provided,
        it is a
        GENO:is_allele_of.

        Here, the allele should be a variant_locus, not a sequence alteration.
        :param allele_id:
        :param gene_id:
        :param rel_id:
        :return:

        """
        if rel_id is None:
            rel_id = self.globaltt["is_allele_of"]
        self.graph.addTriple(allele_id, rel_id, gene_id)

    def addAffectedLocus(self, allele_id, gene_id, rel_id=None):
        """
        We make the assumption here that if the relationship is not provided,
        it is a
        GENO:has_affected_feature.

        Here, the allele should be a variant_locus, not a sequence alteration.
        :param allele_id:
        :param gene_id:
        :param rel_id:
        :return:

        """
        if rel_id is None:
            rel_id = self.globaltt['has_affected_feature']
        self.graph.addTriple(allele_id, rel_id, gene_id)

    def addGeneProduct(self,
                       sequence_id,
                       product_id,
                       product_label=None,
                       product_type=None,
                       sequence_category=None,
                       product_category=None):
        """
        Add gene/variant/allele has_gene_product relationship
        Can be used to either describe a gene to transcript relationship
        or gene to protein
        :param sequence_id:
        :param product_id:
        :param product_label:
        :param product_type:
        :param sequence_category: bl category CURIE for seq_id [blv.terms.Gene].value
        :param product_category: biolink category CURIE for product_id
        :return:

        """
        if product_label is not None and product_type is not None:
            self.model.addIndividualToGraph(product_id,
                                            product_label,
                                            product_type,
                                            ind_category=product_category)
        self.graph.addTriple(sequence_id,
                             self.globaltt['has gene product'],
                             product_id,
                             subject_category=sequence_category,
                             object_category=product_category)

    def addPolypeptide(self,
                       polypeptide_id,
                       polypeptide_label=None,
                       transcript_id=None,
                       polypeptide_type=None):
        """
        :param polypeptide_id:
        :param polypeptide_label:
        :param polypeptide_type:
        :param transcript_id:
        :return:

        """
        if polypeptide_type is None:
            polypeptide_type = self.globaltt['polypeptide']
        self.model.addIndividualToGraph(polypeptide_id, polypeptide_label,
                                        polypeptide_type)
        if transcript_id is not None:
            self.graph.addTriple(transcript_id, self.globaltt['translates_to'],
                                 polypeptide_id)

    def addPartsToVSLC(self,
                       vslc_id,
                       allele1_id,
                       allele2_id,
                       zygosity_id=None,
                       allele1_rel=None,
                       allele2_rel=None):
        """
        Here we add the parts to the VSLC.  While traditionally alleles
        (reference or variant loci) are traditionally added, you can add any
        node (such as sequence_alterations for unlocated variations) to a vslc
        if they are known to be paired.  However, if a sequence_alteration's
        loci is unknown, it probably should be added directly to the GVC.
        :param vslc_id:
        :param allele1_id:
        :param allele2_id:
        :param zygosity_id:
        :param allele1_rel:
        :param allele2_rel:
        :return:

        """

        # vslc has parts allele1/allele2

        if allele1_id is not None:
            self.addParts(allele1_id, vslc_id, allele1_rel)
        if allele2_id is not None and allele2_id.strip() != '':
            self.addParts(allele2_id, vslc_id, allele2_rel)

        # figure out zygosity if it's not supplied
        if zygosity_id is None:
            if allele1_id == allele2_id:
                zygosity_id = self.globaltt['homozygous']
            else:
                zygosity_id = self.globaltt['heterozygous']

        if zygosity_id is not None:
            self.graph.addTriple(vslc_id, self.globaltt['has_zygosity'],
                                 zygosity_id)

    def addVSLCtoParent(self,
                        vslc_id,
                        parent_id,
                        part_category=None,
                        parent_category=None):
        """
        The VSLC can either be added to a genotype or to a GVC.
        The vslc is added as a part of the parent.
        :param vslc_id:
        :param parent_id:
        :param part_category: a biolink category CURIE for part
        :param parent_category: a biolink category CURIE for parent
        :return:
        """

        self.addParts(vslc_id,
                      parent_id,
                      self.globaltt['has_variant_part'],
                      part_category=part_category,
                      parent_category=parent_category)

    def addParts(self,
                 part_id,
                 parent_id,
                 part_relationship=None,
                 part_category=None,
                 parent_category=None):
        """
        This will add a has_part (or subproperty) relationship between
        a parent_id and the supplied part.
        By default the relationship will be BFO:has_part,
        but any relationship could be given here.
        :param part_id:
        :param parent_id:
        :param part_relationship:
        :param part_category: a biolink vocab curie for part_id
        :param parent_category: a biolink vocab curie for parent_id
        :return:

        """
        if part_relationship is None:
            part_relationship = self.globaltt['has_part']
        # Fail loudly if parent or child identifiers are None
        if parent_id is None:
            raise TypeError('Attempt to pass None as parent')
        elif part_id is None:
            raise TypeError('Attempt to pass None as child')
        elif part_relationship is None:
            part_relationship = self.globaltt['has_part']

        self.graph.addTriple(parent_id,
                             part_relationship,
                             part_id,
                             subject_category=parent_category,
                             object_category=part_category)

    def addSequenceAlteration(self,
                              sa_id,
                              sa_label,
                              sa_type=None,
                              sa_description=None):

        if sa_type is None:
            sa_type = self.globaltt['sequence_alteration']

        self.model.addIndividualToGraph(sa_id, sa_label, sa_type,
                                        sa_description)

    def addSequenceAlterationToVariantLocus(self, sa_id, vl_id):
        self.addParts(sa_id, vl_id, self.globaltt['has_variant_part'])

    def addGenomicBackground(self,
                             background_id,
                             background_label,
                             background_type=None,
                             background_description=None):
        if background_type is None:
            background_type = self.globaltt['genomic_background']
        self.model.addIndividualToGraph(background_id, background_label,
                                        background_type,
                                        background_description)

    def addGenomicBackgroundToGenotype(self,
                                       background_id,
                                       genotype_id,
                                       background_type=None):
        if background_type is None:
            background_type = self.globaltt['genomic_background']
        self.model.addType(background_id, background_type)
        self.addParts(background_id, genotype_id,
                      self.globaltt['has_reference_part'])

    def addTaxon(self, taxon_id, genopart_id, genopart_category=None):
        """
        The supplied geno part will have the specified taxon added with
        RO:in_taxon relation.
        Generally the taxon is associated with a genomic_background,
        but could be added to any genotype part (including a gene,
        regulatory element, or sequence alteration).
        :param taxon_id:
        :param genopart_id:
        :param genopart_category: a biolink term for genopart_id
        :return:

        """
        self.graph.addTriple(genopart_id, self.globaltt['in taxon'], taxon_id)

    def addGeneTargetingReagentToGenotype(self, reagent_id, genotype_id):
        """
        Add genotype has_variant_part reagent_id. For example, add a morphant
        reagent thingy to the genotype, assuming it's a extrinsic_genotype
        Also a triple to assign biolink categories to genotype and reagent.
        :param reagent_id
        :param genotype_id
        :return:

        """
        self.graph.addTriple(genotype_id, self.globaltt['has_variant_part'],
                             reagent_id)

    def addGeneTargetingReagent(self,
                                reagent_id,
                                reagent_label,
                                reagent_type,
                                gene_id,
                                description=None,
                                reagent_category=None):
        """
        Here, a gene-targeting reagent is added.
        The actual targets of this reagent should be added separately.
        :param reagent_id:
        :param reagent_label:
        :param reagent_type:

        :return:

        """

        # TODO add default type to reagent_type
        self.model.addIndividualToGraph(reagent_id,
                                        reagent_label,
                                        reagent_type,
                                        description,
                                        ind_category=reagent_category)

        self.graph.addTriple(reagent_id, self.globaltt['targets_gene'],
                             gene_id)

    def addReagentTargetedGene(self,
                               reagent_id,
                               gene_id,
                               targeted_gene_id=None,
                               targeted_gene_label=None,
                               description=None,
                               reagent_category=None):
        """
        This will create the instance of a gene that is targeted by a molecular
        reagent (such as a morpholino or rnai).
        If an instance id is not supplied,
        we will create it as an anonymous individual which is of the type
        GENO:reagent_targeted_gene.
        We will also add the targets relationship between the reagent and
        gene class.

        <targeted_gene_id> a GENO:reagent_targeted_gene
        rdfs:label targeted_gene_label
        dc:description description
        <reagent_id> GENO:targets_gene <gene_id>

        :param reagent_id:
        :param gene_id:
        :param targeted_gene_id:
        :param reagent_category: a biolink category CURIE for reagent_id
        :return:

        """

        # akin to a variant locus
        # is this some sort of pseudo bnode?
        if targeted_gene_id is None:
            targeted_gene_id = '_' + gene_id + '-' + reagent_id
            targeted_gene_id = targeted_gene_id.replace(":", "")
        self.model.addIndividualToGraph(targeted_gene_id,
                                        targeted_gene_label,
                                        self.globaltt['reagent_targeted_gene'],
                                        description,
                                        ind_category=reagent_category)

        if gene_id is not None:
            self.graph.addTriple(targeted_gene_id,
                                 self.globaltt['is_expression_variant_of'],
                                 gene_id)

        self.graph.addTriple(targeted_gene_id, self.globaltt['is_targeted_by'],
                             reagent_id)

    def addTargetedGeneSubregion(self,
                                 tgs_id,
                                 tgs_label,
                                 tgs_type=None,
                                 tgs_description=None):
        if tgs_type is None:
            tgs_type = self.globaltt['targeted_gene_subregion']

        self.model.addIndividualToGraph(tgs_id, tgs_label, tgs_type,
                                        tgs_description)

    def addMemberOfPopulation(self, member_id, population_id):
        self.graph.addTriple(population_id,
                             self.globaltt['has_member_with_allelotype'],
                             member_id)

    def addTargetedGeneComplement(self,
                                  tgc_id,
                                  tgc_label,
                                  tgc_type=None,
                                  tgc_description=None):
        if tgc_type is None:
            tgc_type = self.globaltt['targeted_gene_complement']
        self.model.addIndividualToGraph(tgc_id, tgc_label, tgc_type,
                                        tgc_description)

    def addGenome(self, taxon_num, taxon_label=None, genome_id=None):
        ncbitaxon = 'NCBITaxon:' + taxon_num
        if taxon_label is None:
            if ncbitaxon in self.globaltcid:
                taxon_label = self.globaltcid[ncbitaxon]
            else:
                logging.warning('Add ' + ncbitaxon +
                                ' to global translation table')
                taxon_label = taxon_num
        elif ncbitaxon in self.globaltcid and taxon_label != self.globaltcid[
                ncbitaxon]:
            logging.warning('"' + self.globaltcid[ncbitaxon] +
                            '" may need updating from "' + taxon_label +
                            '" in global translation table')
            logging.warning(
                '"' + taxon_label + '": " ' + self.globaltcid[ncbitaxon] +
                '"' + ' may need to be added to a local translation table')

        genome_label = taxon_label + ' genome'

        if genome_id is None:
            genome_id = self.makeGenomeID(taxon_num)

        self.model.addClassToGraph(genome_id, genome_label,
                                   self.globaltt['genome'])

    def addReferenceGenome(self, build_id, build_label, taxon_id):
        genome_id = self.makeGenomeID(taxon_id)
        self.model.addIndividualToGraph(build_id, build_label,
                                        self.globaltt['reference_genome'],
                                        blv.terms['GenomeBuild'])
        self.model.addType(build_id,
                           genome_id,
                           subject_category=blv.terms['GenomeBuild'])
        if re.match(r'[0-9]+', taxon_id):
            taxon_id = 'NCBITaxon:' + taxon_id

        self.addTaxon(taxon_id,
                      build_id,
                      genopart_category=blv.terms['GenomeBuild'])

    @staticmethod
    def makeGenomeID(taxon_id):
        # scrub off the taxon prefix.  put it in base space
        # TODO: revisit as yet another BNODE?
        # should never be called if a real genome iri exists
        # should create the opaque bode and label together
        # genome_id = re.sub(r'.*\:', '_:', taxon_id) + 'genome'
        genome_id = '_:' + taxon_id + 'genome'
        return genome_id

    def addChromosome(self,
                      chrom,
                      tax_id,
                      tax_label=None,
                      build_id=None,
                      build_label=None):
        """
        if it's just the chromosome, add it as an instance of a SO:chromosome,
        and add it to the genome. If a build is included,
        punn the chromosome as a subclass of SO:chromsome, and make the
        build-specific chromosome an instance of the supplied chr.
        The chr then becomes part of the build or genome.
        """
        family = Family(self.graph)
        # first, make the chromosome class, at the taxon level
        chr_id = makeChromID(str(chrom), tax_id)
        if tax_label is not None:
            chr_label = makeChromLabel(chrom, tax_label)
        else:
            chr_label = makeChromLabel(chrom)
        genome_id = self.makeGenomeID(tax_id)
        self.model.addClassToGraph(chr_id, chr_label,
                                   self.globaltt['chromosome'])
        self.addTaxon(tax_id, genome_id)  # add the taxon to the genome

        if build_id is not None:
            # the build-specific chromosome
            chrinbuild_id = makeChromID(chrom, build_id)
            if build_label is None:
                build_label = build_id
            chrinbuild_label = makeChromLabel(chrom, build_label)
            # add the build-specific chromosome as an instance of the chr class

            self.model.addIndividualToGraph(chrinbuild_id, chrinbuild_label,
                                            chr_id)

            # add the build-specific chromosome
            # as a member of the build (both ways)
            family.addMember(build_id,
                             chrinbuild_id,
                             group_category=blv.terms['GenomeBuild'])
            family.addMemberOf(chrinbuild_id,
                               build_id,
                               group_category=blv.terms['GenomeBuild'])

    def addChromosomeClass(self, chrom_num, taxon_id, taxon_label):
        taxon = re.sub('NCBITaxon:', '', taxon_id)
        # the chrom class (generic) id
        chrom_class_id = makeChromID(chrom_num, taxon, 'CHR')
        chrom_class_label = makeChromLabel(chrom_num, taxon_label)
        self.model.addClassToGraph(chrom_class_id, chrom_class_label,
                                   self.globaltt['chromosome'])

    def addChromosomeInstance(self,
                              chr_num,
                              reference_id,
                              reference_label,
                              chr_type=None):
        """
        Add the supplied chromosome as an instance within the given reference
        :param chr_num:
        :param reference_id: for example, a build id like UCSC:hg19
        :param reference_label:
        :param chr_type: this is the class that this is an instance of.
        typically a genome-specific chr

        :return:

        """
        family = Family(self.graph)
        chr_id = makeChromID(str(chr_num), reference_id, 'MONARCH')
        chr_label = makeChromLabel(str(chr_num), reference_label)

        self.model.addIndividualToGraph(chr_id, chr_label,
                                        self.globaltt['chromosome'])
        if chr_type is not None:
            self.model.addType(chr_id, chr_type)

        # add the build-specific chromosome
        # as a member of the build  (both ways)
        family.addMember(reference_id,
                         chr_id,
                         group_category=blv.terms['GenomeBuild'])
        family.addMemberOf(chr_id, reference_id)

    @staticmethod
    def make_variant_locus_label(gene_label, allele_label):
        if gene_label is None:
            gene_label = ''
        label = gene_label.strip() + '<' + allele_label.strip() + '>'

        return label

    def make_vslc_label(self, gene_label, allele1_label, allele2_label):
        """
        Make a Variant Single Locus Complement (VSLC) in monarch-style.
        :param gene_label:
        :param allele1_label:
        :param allele2_label:
        :return:
        """

        vslc_label = ''

        if gene_label is None and allele1_label is None and allele2_label is None:
            LOG.error("Not enough info to make vslc label")
            return None

        top = self.make_variant_locus_label(gene_label, allele1_label)
        bottom = ''
        if allele2_label is not None:
            bottom = self.make_variant_locus_label(gene_label, allele2_label)

        vslc_label = '/'.join((top, bottom))

        return vslc_label

    def make_experimental_model_with_genotype(self, genotype_id,
                                              genotype_label, taxon_id,
                                              taxon_label):

        animal_id = '-'.join((taxon_id, 'with', genotype_id))
        animal_id = animal_id.replace(':', '')
        # bnode
        animal_id = ':'.join(('_', self.gut.digest_id(animal_id)))

        animal_label = ' '.join((genotype_label, taxon_label))
        self.model.addIndividualToGraph(animal_id, animal_label, taxon_id)

        self.graph.addTriple(animal_id, self.globaltt['has_genotype'],
                             genotype_id)
        return animal_id