Python GraphUtils.getNode примеры использования

Язык программирования: Python

Пространство имен/Пакет: dipper.utils.GraphUtils

Класс/Тип: GraphUtils

Метод/Функция: getNode

Примеров на hotexamples.com: 8

Python GraphUtils.getNode - 8 примеров найдено. Это лучшие примеры Python кода для dipper.utils.GraphUtils.GraphUtils.getNode, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

loadProperties(22)

addSynonym(19)

loadAllProperties(19)

addEquivalentClass(14)

addDescription(12)

GraphUtils(10)

getNode(8)

digest_id(8)

addXref(8)

addDefinition(5)

addType(5)

addTriple(4)

makeLeader(4)

addDeprecatedClass(4)

addMemberOf(3)

addPage(3)

addSubclass(3)

addDeprecatedIndividual(3)

add_property_axioms(3)

addComment(3)

get_properties_from_graph(3)

write(2)

addOWLPropertyClassRestriction(2)

addClassToGraph(2)

addSameIndividual(2)

addMember(2)

addTitle(2)

addIndividualToGraph(2)

addDepiction(2)

addPerson(1)

loadObjectProperties(1)

addOntologyDeclaration(1)

addOWLVersionInfo(1)

addOWLVersionIRI(1)

write_raw_triples(1)

Пример #1

Показать файл

Файл: NCBIGene.py Проект: d3borah/dipper

    def _get_gene2pubmed(self, limit):
        """
        Loops through the gene2pubmed file and adds a simple triple to say that a given publication
        is_about a gene.  Publications are added as NamedIndividuals.
        :param limit:
        :return:
        """

        gu = GraphUtils(curie_map.get())
        if self.testMode:
            g = self.testgraph
        else:
            g = self.graph
        is_about = gu.getNode(gu.object_properties['is_about'])
        logger.info("Processing Gene records")
        line_counter = 0
        myfile = '/'.join((self.rawdir, self.files['gene2pubmed']['file']))
        logger.info("FILE: %s", myfile)
        with gzip.open(myfile, 'rb') as f:
            for line in f:
                # skip comments
                line = line.decode().strip()
                if re.match('^#', line):
                    continue
                (tax_num, gene_num, pubmed_num) = line.split('\t')

                ##### set filter=None in init if you don't want to have a filter
                #if self.filter is not None:
                #    if ((self.filter == 'taxids' and (int(tax_num) not in self.tax_ids))
                #       or (self.filter == 'geneids' and (int(gene_num) not in self.gene_ids))):
                #        continue
                ##### end filter

                if self.testMode and int(gene_num) not in self.gene_ids:
                    continue

                if int(tax_num) not in self.tax_ids:
                    continue

                if gene_num == '-' or pubmed_num == '-':
                    continue

                line_counter += 1
                gene_id = ':'.join(('NCBIGene', gene_num))
                pubmed_id = ':'.join(('PMID', pubmed_num))

                # add the gene, in case it hasn't before
                gu.addClassToGraph(g, gene_id, None)
                # add the publication as a NamedIndividual
                gu.addIndividualToGraph(g, pubmed_id, None, None)  # add type publication
                self.graph.add((gu.getNode(pubmed_id), is_about, gu.getNode(gene_id)))

                if not self.testMode and limit is not None and line_counter > limit:
                    break

        return

Пример #2

Показать файл

Файл: Reference.py Проект: JervenBolleman/dipper

    def addRefToGraph(self, g):

        gu = GraphUtils(curie_map.get())

        n = self.short_citation
        if n is None:
            n = self.title

        if self.ref_url is not None:
            ref_uri = URIRef(self.ref_url)
            g.add((ref_uri, DC['title'], Literal(self.title)))
            g.add((ref_uri, RDF['type'], gu.getNode(self.ref_type)))
            g.add((ref_uri, RDFS['label'], Literal(n)))
        elif self.ref_id is not None:
            gu.addIndividualToGraph(g, self.ref_id, n, self.ref_type)
            if self.title is not None:
                gu.addTitle(g, self.ref_id, self.title)
        else:
            # should never be true
            logger.error("You are missing an identifier for a reference.")

        # TODO what is the property here to add the date?
        # if self.year is not None:
        #    gu.addTriple()

        # if self.author_list is not None:
        #    for a in self.author_list:
        #        gu.addTriple(
        #           g, self.ref_id, self.props['has_author'], a, True)
        return

Пример #3

Показать файл

Файл: test_ctd.py Проект: d3borah/dipper

    def test_therapeutic_relationship(self):
        from dipper.utils.TestUtils import TestUtils
        from dipper.utils.GraphUtils import GraphUtils
        from dipper import curie_map

        # Make testutils object and load ttl
        test_query = TestUtils(self.source.graph)
        test_query.load_testgraph_from_turtle(self.source)

        # Expected structure
        # TODO can this be unified OBAN and the Annot models to be automatically generated?
        sparql_query = """
                       SELECT ?assoc ?pubmed ?disease ?chemical
                       WHERE {
                       ?assoc a Annotation: ;
                           dc:evidence OBO:ECO_0000033 ;
                           dc:source ?pubmed ;
                           :hasObject ?disease ;
                           :hasPredicate OBO:RO_0002606 ;
                           :hasSubject ?chemical .}
                       """

        # SPARQL variables to check
        gu = GraphUtils(curie_map.get())
        chem_id = 'MESH:D009538'
        chem_uri = gu.getNode(chem_id)
        disease_id = 'OMIM:188890'
        disease_uri = gu.getNode(disease_id)
        eco = 'ECO:0000033'
        rel_id = gu.object_properties['substance_that_treats']
        pubmed_id = 'PMID:16785264'
        pubmed_uri = gu.getNode(pubmed_id)

        # consider replacing with make_ctd_chem_disease_assoc_id()
        assoc_id = self.source.make_association_id('ctd', chem_id, rel_id, disease_id, eco, pubmed_id)
        assoc_uri = gu.getNode(assoc_id)

        # One of the expected outputs from query
        expected_output = [assoc_uri, pubmed_uri, disease_uri, chem_uri]

        # Query graph
        sparql_output = test_query.query_graph(sparql_query)

        self.assertTrue(expected_output in sparql_output, "did not find expected association: " + assoc_id +
                        " found: " + pprint.pformat(sparql_output))

        logger.info("Test query data finished.")

Пример #4

Показать файл

Файл: test_interactions.py Проект: JervenBolleman/dipper

    def test_therapeutic_relationship(self):
        from dipper.utils.TestUtils import TestUtils
        from dipper.utils.GraphUtils import GraphUtils

        # Make testutils object and load bindings
        test_query = TestUtils(self.ctd.graph)
        self.ctd.load_bindings()

        # Expected structure
        sparql_query = """
                       SELECT ?assoc ?pubmed ?disease ?chemical
                       WHERE {
                       ?assoc a Annotation: ;
                           dc:evidence OBO:ECO_0000033 ;
                           dc:source ?pubmed ;
                           :hasObject ?disease ;
                           :hasPredicate OBO:RO_0002606 ;
                           :hasSubject ?chemical .}
                       """

        # SPARQL variables to check
        gu = GraphUtils(curie_map.get())
        chem_id = 'MESH:D009538'
        chem_uri = gu.getNode(chem_id)
        disease_id = 'OMIM:188890'
        disease_uri = gu.getNode(disease_id)
        pubmed_id = 'PMID:16785264'
        pubmed_uri = gu.getNode(pubmed_id)
        rel_id = gu.object_properties['substance_that_treats']
        eco = 'ECO:0000033'
        # TODO PYLINT  make_association_id() does not exist in CTD
        # there is "_make_association()" with a different sig

        assoc_id = self.ctd.make_association_id(
            'ctd', chem_id, rel_id, disease_id, eco, pubmed_id)
        assoc_uri = gu.getNode(assoc_id)

        # Expected output from query
        expected_output = [assoc_uri, pubmed_uri, disease_uri, chem_uri]

        # Query graph
        sparql_output = test_query.query_graph(sparql_query)

        self.assertTrue(expected_output in sparql_output)

        logger.info("Test finished.")

Пример #5

Показать файл

Файл: test_general.py Проект: d3borah/dipper

    def test_curieprefixes(self):
        """
        This will ensure that we can create identifiers for all of the defined curie prefixes using the
        GraphUtils.getNode() method
        :return:
        """
        from dipper.utils.GraphUtils import GraphUtils

        gu = GraphUtils(self.curie_map)

        # add one id per curie as classes to the graph
        for p in self.curie_map.keys():
            testid = p+':testme'
            n = gu.getNode(testid)
            m = "prefix \""+p+"\" has an error...can't create graph node"
            self.assertTrue(n is not None, m)

        return

Пример #6

Показать файл

Файл: GenomicFeature.py Проект: JervenBolleman/dipper

class Feature():
    """
    Dealing with genomic features here.  By default they are all faldo:Regions.
    We use SO for typing genomic features. At the moment,
    RO:has_subsequence is the default relationship
    between the regions, but this should be tested/verified.

    TODO:
        the graph additions are in the addXToFeature functions,
        but should be separated.
    TODO:
        this will need to be extended to properly deal with
        fuzzy positions in faldo.
    """

    object_properties = {
        'location': 'faldo:location',
        'begin': 'faldo:begin',
        'end': 'faldo:end',
        'reference': 'faldo:reference',
        'gene_product_of': 'RO:0002204',
        'has_gene_product': 'RO:0002205',
        'is_about': 'IAO:00000136',
        'has_subsequence': 'RO:0002524',
        'is_subsequence_of': 'RO:0002525',
        'has_staining_intensity': 'GENO:0000207',
        # was GENO:0000626 (staining_intensity),
        # but changing to has_sequence_attribute
        'upstream_of_sequence_of': 'RO:0002528',
        'downstream_of_sequence_of': 'RO:0002529'

    }

    data_properties = {
        'position': 'faldo:position',
    }

    annotation_properties = {}

    properties = object_properties.copy()
    properties.update(data_properties)
    properties.update(annotation_properties)

    types = {
        'region': 'faldo:Region',
        'Position': 'faldo:Position',
        # big P for Position type.  little p for position property
        'FuzzyPosition': 'faldo:FuzzyPosition',
        'chromosome': 'SO:0000340',
        'chromosome_arm': 'SO:0000105',
        'chromosome_band': 'SO:0000341',
        'chromosome_part': 'SO:0000830',
        'long_chromosome_arm': 'GENO:0000629',
        'short_chromosome_arm': 'GENO:0000628',
        'chromosome_region': 'GENO:0000614',
        'chromosome_subband': 'GENO:0000616',
        'centromere': 'SO:0000577',
        'plus_strand': 'faldo:PlusStrandPosition',
        'minus_strand': 'faldo:MinusStrandPosition',
        'both_strand': 'faldo:BothStrandPosition',
        'score': 'SO:0001685',
        # FIXME - score is not a good solution, too generic
        'reference_genome': 'SO:0001505',
        'genome': 'SO:0001026',
        'assembly_component': 'SO:0000143',
        'SNP': 'SO:0000694',

        # the following are sequence attributes:
        'band_intensity':  'GENO:0000618',
        'gneg': 'GENO:0000620',
        'gpos': 'GENO:0000619',
        'gpos100': 'GENO:0000622',
        'gpos75': 'GENO:0000623',
        'gpos50': 'GENO:0000624',
        'gpos25': 'GENO:0000625',
        'gvar': 'GENO:0000621',
        'gpos33': 'GENO:0000633',
        'gpos66': 'GENO:0000632'
    }

    def __init__(self, id, label, type, description=None):
        self.id = id
        self.label = label
        self.type = type
        self.description = description
        self.gu = GraphUtils(curie_map.get())
        self.start = None
        self.stop = None
        self.nobnodes = True  # TODO remove this before official release
        return

    def addFeatureStartLocation(
            self, coordinate, reference_id, strand=None,
            position_types=None):
        """
        Adds coordinate details for the start of this feature.
        :param coordinate:
        :param reference_id:
        :param strand:
        :param position_types:
        :return:
        """

        # make an object for the start, which has:
        # {coordinate : integer, reference : reference_id, types = []}
        self.start = self._getLocation(coordinate, reference_id, strand,
                                       position_types)

        return

    def addFeatureEndLocation(
            self, coordinate, reference_id, strand=None,
            position_types=None):
        """
        Adds the coordinate details for the end of this feature
        :param coordinate:
        :param reference_id:
        :param strand:
        :return:
        """

        self.stop = self._getLocation(coordinate, reference_id, strand,
                                      position_types)

        return

    def _getLocation(self, coordinate, reference_id, strand, position_types):
        """
        Make an object for the location, which has:
        {coordinate : integer, reference : reference_id, types = []}
        where the strand is indicated in the type array
        :param coordinate:
        :param reference_id:
        :param strand:
        :param position_types:
        :return:
        """

        loc = {}
        loc['coordinate'] = coordinate
        loc['reference'] = reference_id
        loc['type'] = []
        strand_id = self._getStrandType(strand)
        if strand_id is not None:
            loc['type'].append(strand_id)
        if position_types is not None:
            loc['type'] += position_types
        if position_types == []:
            loc['type'].append(self.types['Position'])

        return loc

    def _getStrandType(self, strand):
        """

        :param strand:
        :return:
        """

        # TODO make this a dictionary/enum:  PLUS, MINUS, BOTH, UNKNOWN
        strand_id = None
        if strand == '+':
            strand_id = self.types['plus_strand']
        elif strand == '-':
            strand_id = self.types['minus_strand']
        elif strand == '.':
            strand_id = self.types['both_strand']
        elif strand is None:  # assume this is Unknown
            pass
        else:
            logger.warning("strand type could not be mapped: %s", str(strand))

        return strand_id

    def addFeatureToGraph(
            self, graph, add_region=True, region_id=None,
            feature_as_class=False):
        """
        We make the assumption here that all features are instances.
        The features are located on a region,
            which begins and ends with faldo:Position
        The feature locations leverage the Faldo model,
            which has a general structure like:
        Triples:
        feature_id a feature_type (individual)
            faldo:location region_id
        region_id a faldo:region
            faldo:begin start_position
            faldo:end end_position
        start_position a
            (any of: faldo:(((Both|Plus|Minus)Strand)|Exact)Position)
            faldo:position Integer(numeric position)
            faldo:reference reference_id
        end_position a
            (any of: faldo:(((Both|Plus|Minus)Strand)|Exact)Position)
            faldo:position Integer(numeric position)
            faldo:reference reference_id

        :param graph:
        :return:
        """

        if feature_as_class:
            self.gu.addClassToGraph(graph, self.id, self.label, self.type,
                                    self.description)
        else:
            self.gu.addIndividualToGraph(graph, self.id, self.label, self.type,
                                         self.description)

        if self.start is None and self.stop is None:
            add_region = False

        if add_region:
            # create a region that has the begin/end positions
            regionchr = re.sub(r'\w+\:_?', '', self.start['reference'])
            if region_id is None:
                # in case the values are undefined
                # if we know only one of the coordinates,
                # then we'll add an "unknown" other.
                st = sp = 'UN'
                strand = None
                if self.start is not None and \
                        self.start['coordinate'] is not None:
                    st = str(self.start['coordinate'])
                    strand = self._getStrandStringFromPositionTypes(
                        self.start['type'])
                if self.stop is not None and\
                        self.stop['coordinate'] is not None:
                    sp = str(self.stop['coordinate'])
                    if strand is not None:
                        strand = self._getStrandStringFromPositionTypes(
                            self.stop['type'])
                # assume that the strand is the same for both start and stop.
                # this will need to be fixed in the future
                region_items = [regionchr, st, sp]
                if strand is not None:
                    region_items += [strand]
                region_id = '-'.join(region_items)
                rid = region_id
                rid = re.sub(r'\w+\:', '', rid, 1)  # replace the id prefix
                rid = '_'+rid+"-Region"
                region_id = rid
                if self.nobnodes:
                    region_id = ':'+region_id
            self.gu.addTriple(graph, self.id, self.properties['location'],
                              region_id)
            self.gu.addIndividualToGraph(
                graph, region_id, None, 'faldo:Region')
        else:
            region_id = self.id
            self.gu.addType(graph, region_id, 'faldo:Region')

        # add the start/end positions to the region
        beginp = endp = None
        if self.start is not None:
            beginp = self._makePositionId(self.start['reference'],
                                          self.start['coordinate'],
                                          self.start['type'])
            self.addPositionToGraph(graph,
                                    self.start['reference'],
                                    self.start['coordinate'],
                                    self.start['type'])

        if self.stop is not None:
            endp = self._makePositionId(self.stop['reference'],
                                        self.stop['coordinate'],
                                        self.stop['type'])
            self.addPositionToGraph(graph,
                                    self.stop['reference'],
                                    self.stop['coordinate'],
                                    self.stop['type'])

        self.addRegionPositionToGraph(graph, region_id, beginp, endp)

        # {coordinate : integer, reference : reference_id, types = []}

        return

    def _getStrandStringFromPositionTypes(self, tylist):
        strand = None
        if self.types['plus_strand'] in tylist:
            strand = 'plus'
        elif self.types['minus_strand'] in tylist:
            strand = 'minus'
        elif self.types['both_strand'] in tylist:
            strand = 'both'
        else:
            strand = None  # it is stranded, but we don't know what it is

        return strand

    def _makePositionId(self, reference, coordinate, types=None):
        """
        Note that positions should have a reference (we will enforce).
            Only exact positions need a coordinate.
        :param reference:
        :param coordinate:
        :param types:
        :return:
        """

        if reference is None:
            logger.error("Trying to make position with no reference.")
            return None

        i = '_'
        if self.nobnodes:
            i = ':'+i
        reference = re.sub(r'\w+\:', '', reference, 1)
        if re.match(r'^_', reference):
            # this is in the case if the reference is a bnode
            reference = re.sub(r'^_', '', reference)
        i += reference
        if coordinate is not None:
            # just in case it isn't a string already
            i = '-'.join((i, str(coordinate)))
        if types is not None:
            tstring = self._getStrandStringFromPositionTypes(types)
            if tstring is not None:
                i = '-'.join((i, tstring))

        return i

    def addRegionPositionToGraph(
            self, graph, region_id, begin_position_id,
            end_position_id):

        if begin_position_id is None:
            pass
            # logger.warn(
            #   "No begin position specified for region %s", region_id)
        else:
            self.gu.addTriple(graph, region_id, self.properties['begin'],
                              begin_position_id)

        if end_position_id is None:
            pass
            # logger.warn("No end position specified for region %s", region_id)
        else:
            self.gu.addTriple(graph, region_id, self.properties['end'],
                              end_position_id)

        return

    def addPositionToGraph(
            self, graph, reference_id, position,
            position_types=None, strand=None):
        """
        Add the positional information to the graph, following the faldo model.
        We assume that if the strand is None,
            we give it a generic "Position" only.
        Triples:
        my_position a (any of: faldo:(((Both|Plus|Minus)Strand)|Exact)Position)
            faldo:position Integer(numeric position)
            faldo:reference reference_id

        :param graph:
        :param reference_id:
        :param position:
        :param position_types:
        :param strand:
        :return:  Identifier of the position created
        """

        iid = self._makePositionId(reference_id, position, position_types)
        n = self.gu.getNode(iid)
        pos = self.gu.getNode(self.properties['position'])
        ref = self.gu.getNode(self.properties['reference'])
        if position is not None:
            graph.add((n, pos, Literal(position, datatype=XSD['integer'])))
        graph.add((n, ref, self.gu.getNode(reference_id)))
        if position_types is not None:
            for t in position_types:
                graph.add((n, RDF['type'], self.gu.getNode(t)))
        s = None
        if strand is not None:
            s = strand
            if not re.match(r'faldo', strand):
                # not already mapped to faldo, so expect we need to map it
                s = self._getStrandType(strand)
        # else:
        #    s = self.types['both_strand']
        if s is None and (position_types is None or position_types == []):
            s = self.types['Position']

        if s is not None:
            graph.add((n, RDF['type'], self.gu.getNode(s)))

        return iid

    def addSubsequenceOfFeature(self, graph, parentid):
        """
        This will add reciprocal triples like:
        feature is_subsequence_of parent
        parent has_subsequence feature
        :param graph:
        :param parentid:
        :return:
        """
        self.gu.addTriple(
            graph, self.id, self.properties['is_subsequence_of'], parentid)
        self.gu.addTriple(
            graph, parentid, self.properties['has_subsequence'], self.id)

        return

    def addTaxonToFeature(self, graph, taxonid):
        """
        Given the taxon id, this will add the following triple:
        feature in_taxon taxonid
        :param graph:
        :param taxonid:
        :return:
        """
        # TEC: should taxon be set in __init__()?
        self.taxon = taxonid
        self.gu.addTriple(
            graph, self.id, Assoc.properties['in_taxon'], self.taxon)

        return

    def loadAllProperties(self, graph):

        prop_dict = {
            Assoc(None).ANNOTPROP: self.annotation_properties,
            Assoc(None).OBJECTPROP: self.object_properties,
            Assoc(None).DATAPROP: self.data_properties
        }

        for p in prop_dict:
            self.gu.loadProperties(graph, prop_dict.get(p), p)

        return

    def addFeatureProperty(self, graph, property_type, property):
        self.gu.addTriple(graph, self.id, property_type, property)
        return

    def setNoBNodes(self, nobnodes):
        self.nobnodes = nobnodes
        return

Пример #7

Показать файл

Файл: Genotype.py Проект: d3borah/dipper

class Genotype():
    """
    These methods provide convenient methods to add items related to a genotype and it's parts to a supplied graph.
    They follow the patterns set out in GENO https://github.com/monarch-initiative/GENO-ontology.
    For specific sequence features, we use the GenomicFeature class to create them.
    """

    # special genotype parts mapped to their GENO and SO classes that we explicitly reference here
    genoparts = {
        'intrinsic_genotype': 'GENO:0000000',
        'extrinsic_genotype': 'GENO:0000524',
        'effective_genotype': 'GENO:0000525',
        'genomic_background': 'GENO:0000611',
        'genomic_variation_complement': 'GENO:0000009',
        'karyotype_variation_complement': 'GENO:0000644',
        'variant_single_locus_complement': 'GENO:0000030',
        'variant_locus': 'GENO:0000002',
        'reference_locus': 'GENO:0000036',
        'allele': 'GENO:0000008',
        'gene': 'SO:0000704',
        'QTL': 'SO:0000771',
        'transgene': 'SO:0000902',
        'pseudogene': 'SO:0000336',
        'cytogenetic marker': 'SO:0000341',
        'sequence_feature': 'SO:0000110',
        'sequence_alteration': 'SO:0001059',
        'insertion': 'SO:0000667',
        'deletion': 'SO:0000159',
        'substitution': 'SO:1000002',
        'duplication': 'SO:1000035',
        'translocation': 'SO:0000199',
        'inversion': 'SO:1000036',
        'tandem_duplication': 'SO:1000173',
        'point_mutation': 'SO:1000008',
        'population': 'PCO:0000001',  # population
        'family': 'PCO:0000020',  # family
        'wildtype': 'GENO:0000511',
        'reagent_targeted_gene': 'GENO:0000504',
        'targeted_gene_subregion' : 'GENO:0000534',
        'targeted_gene_complement' : 'GENO:0000527',
        'biological_region' : 'SO:0001411',
        'missense_variant': 'SO:0001583',
        'transcript': 'SO:0000233',
        'polypeptide': 'SO:0000104',
        'cDNA': 'SO:0000756',
        'sequence_variant_causing_loss_of_function_of_polypeptide': 'SO:1000118',
        'sequence_variant_causing_gain_of_function_of_polypeptide': 'SO:1000125',
        'sequence_variant_causing_inactive_catalytic_site': 'SO:1000120',
        'sequence_variant_affecting_polypeptide_function': 'SO:1000117',
        'regulatory_transgene_feature': 'GENO:0000638',
        'coding_transgene_feature': 'GENO:0000637',
        'protein_coding_gene': 'SO:0001217',
        'ncRNA_gene': 'SO:0001263'
    }

    object_properties = {
        'is_mutant_of': 'GENO:0000440',
        'derives_from': 'RO:0001000',
        'has_alternate_part': 'GENO:0000382',
        'has_reference_part': 'GENO:0000385',
        'in_taxon': 'RO:0002162',
        'has_zygosity': 'GENO:0000608',
        'is_sequence_variant_instance_of': 'GENO:0000408',  # links a alternate locus (instance) to a gene (class)
        'targets_instance_of': 'GENO:0000414',
        'is_reference_instance_of': 'GENO:0000610',
        'has_part': 'BFO:0000051',
        'has_member_with_allelotype': 'GENO:0000225',  # use this when relating populations
        'is_allelotype_of': 'GENO:0000206',
        'has_genotype': 'GENO:0000222',
        'has_phenotype': 'RO:0002200',
        'transcribed_to': 'RO:0002205',
        'translates_to': 'RO:0002513',
        'is_targeted_expression_variant_of' : 'GENO:0000443',
        'is_transgene_variant_of': 'GENO:0000444',
        'has_expression-variant_part' : 'GENO:0000532',
        'targeted_by' : 'GENO:0000634',  # between a (reagent-targeted gene) and a morpholino
        'derives_sequence_from_gene': 'GENO:0000639',   # FIXME should this just be subsequence of?
        'feature_to_gene_relation': 'GENO:0000418'
    }

    annotation_properties = {
        # TODO change properties with https://github.com/monarch-initiative/GENO-ontology/issues/21
        'reference_nucleotide': 'GENO:reference_nucleotide',  # Made up term
        'reference_amino_acid': 'GENO:reference_amino_acid',  # Made up term
        'altered_nucleotide': 'GENO:altered_nucleotide',  # Made up term
        'results_in_amino_acid_change': 'GENO:results_in_amino_acid_change'  # Made up term
    }

    zygosity = {
        'homoplasmic': 'GENO:0000602',
        'heterozygous': 'GENO:0000135',
        'indeterminate': 'GENO:0000137',
        'heteroplasmic': 'GENO:0000603',
        'hemizygous-y': 'GENO:0000604',
        'hemizygous-x': 'GENO:0000605',
        'homozygous': 'GENO:0000136',
        'hemizygous': 'GENO:0000606',
        'complex_heterozygous': 'GENO:0000402',
        'simple_heterozygous': 'GENO:0000458'
    }

    properties = object_properties.copy()
    properties.update(annotation_properties)

    def __init__(self, graph):

        self.gu = GraphUtils(curie_map.get())

        self.graph = graph

        self.gu.loadProperties(self.graph, self.object_properties, self.gu.OBJPROP)

        return

    def addGenotype(self, genotype_id, genotype_label, genotype_type=None, genotype_description=None):
        """
        If a genotype_type is not supplied, we will default to 'intrinsic_genotype'
        :param genotype_id:
        :param genotype_label:
        :param genotype_type:
        :param genotype_description:
        :return:
        """
        if genotype_type is None:
            genotype_type = self.genoparts['intrinsic_genotype']

        self.gu.addIndividualToGraph(self.graph, genotype_id, genotype_label, genotype_type, genotype_description)

        return

    def addAllele(self, allele_id, allele_label, allele_type=None, allele_description=None):
        """
        Make an allele object. If no allele_type is added, it will default to a geno:allele
        :param allele_id: curie for allele (required)
        :param allele_label: label for allele (required)
        :param allele_type: id for an allele type (optional, recommended SO or GENO class)
        :param allele_description: a free-text description of the allele
        :return:
        """
        # TODO should we accept a list of allele types?
        if (allele_type is None):
            allele_type = self.genoparts['allele']  #TODO is this a good idea?
        self.gu.addIndividualToGraph(self.graph, allele_id, allele_label, allele_type, allele_description)

        return

    def addGene(self, gene_id, gene_label, gene_type=None, gene_description=None):
        if gene_type is None:
            gene_type = self.genoparts['gene']
        # genes are classes
        self.gu.addClassToGraph(self.graph, gene_id, gene_label, gene_type, gene_description)

        return

    def addConstruct(self, construct_id, construct_label, construct_type=None, construct_description=None):
        # TODO add base type for construct
        # if (constrcut_type is None):
        #    constrcut_type=self.construct_base_type
        self.gu.addIndividualToGraph(self.graph, construct_id, construct_label, construct_type, construct_description)

        return

    def addDerivesFrom(self, child_id, parent_id):
        """
        We add a derives_from relationship between the child and parent id.  Examples of uses include between:
        an allele and a construct or strain here, a cell line and it's parent genotype.  Adding the
        parent and child to the graph should happen outside of this function call to
        ensure graph integrity.
        :param child_id:
        :param parent_id:
        :return:
        """

        self.gu.addTriple(self.graph, child_id, self.properties['derives_from'], parent_id)

        return

    def addSequenceDerivesFrom(self, child_id, parent_id):
        self.gu.addTriple(self.graph, child_id, self.properties['derives_sequence_from_gene'], parent_id)
        return

    def addAlleleOfGene(self, allele_id, gene_id, rel_id=None):
        """
        We make the assumption here that if the relationship is not provided, it is a
        GENO:is_sequence_variant_instance_of.

        Here, the allele should be a variant_locus, not a sequence alteration.
        :param allele_id:
        :param gene_id:
        :param rel_id:
        :return:
        """
        if (rel_id is None):
            rel_id = self.properties['is_sequence_variant_instance_of']
        self.gu.addTriple(self.graph, allele_id, rel_id, gene_id)
        return

    def addTranscript(self, variant_id, transcript_id, transcript_label=None, transcript_type=None):
        """
        Add gene/variant/allele transcribes_to relationship
        :param variant_id:
        :param transcript_id:
        :param transcript_label:
        :param transcript_type:
        :return:
        """
        self.gu.addIndividualToGraph(self.graph, transcript_id, transcript_label, transcript_type)
        self.gu.addTriple(self.graph, variant_id, self.properties['transcribed_to'], transcript_id)

        return

    def addPolypeptide(self, polypeptide_id, polypeptide_label=None, transcript_id=None, polypeptide_type=None, ):
        """
        :param polypeptide_id:
        :param polypeptide_label:
        :param polypeptide_type:
        :param transcript_id:
        :return:
        """
        if polypeptide_type is None:
            polypeptide_type = self.genoparts['polypeptide']
        self.gu.addIndividualToGraph(self.graph, polypeptide_id, polypeptide_label, polypeptide_type)
        if transcript_id is not None:
            self.gu.addTriple(self.graph, transcript_id, self.properties['translates_to'], polypeptide_id)

        return


    def addPartsToVSLC(self, vslc_id, allele1_id, allele2_id, zygosity_id=None, allele1_rel=None, allele2_rel=None):
        """
        Here we add the parts to the VSLC.  While traditionally alleles (reference or variant loci) are
        traditionally added, you can add any node (such as sequence_alterations for unlocated variations)
        to a vslc if they are known to be paired.  However, if a sequence_alteration's loci is unknown,
        it probably should be added directly to the GVC.
        :param vslc_id:
        :param allele1_id:
        :param allele2_id:
        :param zygosity_id:
        :param allele1_rel:
        :param allele2_rel:
        :return:
        """

        # vslc has parts allele1/allele2
        gu = self.gu

        vslc = gu.getNode(vslc_id)
        if allele1_id is not None:
            self.addParts(allele1_id, vslc_id, allele1_rel)
        if allele2_id is not None and allele2_id.strip() != '':
            self.addParts(allele2_id, vslc_id, allele2_rel)

        # figure out zygosity if it's not supplied
        if zygosity_id is None:
            if allele1_id == allele2_id:
                zygosity_id = self.zygosity['homozygous']
            else:
                zygosity_id = self.zygosity['heterozygous']

        if zygosity_id is not None:
            gu.addTriple(self.graph, vslc_id, self.properties['has_zygosity'], zygosity_id)

        return

    def addVSLCtoParent(self, vslc_id, parent_id):
        """
        The VSLC can either be added to a genotype or to a GVC.  The vslc is added as a part of the parent.
        :param vslc_id:
        :param parent_id:
        :return:
        """
        self.addParts(vslc_id, parent_id, self.properties['has_alternate_part'])

        return

    def addParts(self, part_id, parent_id, part_relationship=None):
        """
        This will add a has_part (or subproperty) relationship between a parent_id and the supplied part.
        By default the relationship will be BFO:has_part, but any relationship could be given here.
        :param part_id:
        :param parent_id:
        :param part_relationship:
        :return:
        """
        if part_relationship is None:
            part_relationship = self.properties['has_part']

        self.gu.addTriple(self.graph, parent_id, part_relationship, part_id)

        return

    def addSequenceAlteration(self, sa_id, sa_label, sa_type=None, sa_description=None):
        if sa_type is None:
            sa_type = self.genoparts['sequence_alteration']
        self.gu.addIndividualToGraph(self.graph, sa_id, sa_label, sa_type, sa_description)

        return

    def addSequenceAlterationToVariantLocus(self, sa_id, vl_id):
        self.addParts(sa_id, vl_id, self.properties['has_alternate_part'])
        return

    def addGenomicBackground(self, background_id, background_label, background_type=None, background_description=None):
        if background_type is None:
            background_type = self.genoparts['genomic_background']
        self.gu.addIndividualToGraph(self.graph, background_id, background_label, background_type, background_description)

        return

    def addGenomicBackgroundToGenotype(self, background_id, genotype_id):
        self.gu.addType(self.graph, background_id, self.genoparts['genomic_background'])
        self.addParts(background_id, genotype_id, self.object_properties['has_reference_part'])

        return

    def addTaxon(self, taxon_id, genopart_id):
        """
        The supplied geno part will have the specified taxon added with RO:in_taxon relation.
        Generally the taxon is associated with a genomic_background, but could be added to any
        genotype part (including a gene, regulatory element, or sequence alteration).
        :param taxon_id:
        :param genopart_id:
        :return:
        """
        in_taxon = self.gu.getNode(self.properties['in_taxon'])
        s = self.gu.getNode(genopart_id)
        self.graph.add((s, in_taxon, self.gu.getNode(taxon_id)))

        return

    def addGeneTargetingReagentToGenotype(self, reagent_id, genotype_id):
        # for example, add a morphant reagent thingy to the genotype, assuming it's a extrinsic_genotype
        p = self.object_properties['has_expression-variant_part']
        self.gu.addTriple(self.graph, genotype_id, p, reagent_id)

        return

    def addGeneTargetingReagent(self, reagent_id, reagent_label, reagent_type, gene_id, description=None):
        """
        Here, a gene-targeting reagent is added.  The actual targets of this reagent should be added separately.
        :param reagent_id:
        :param reagent_label:
        :param reagent_type:
        :return:
        """
        # TODO add default type to reagent_type
        self.gu.addIndividualToGraph(self.graph, reagent_id, reagent_label, reagent_type, description)

        self.gu.addTriple(self.graph, reagent_id, self.object_properties['targets_instance_of'], gene_id)

        return

    def addReagentTargetedGene(self, reagent_id, gene_id, targeted_gene_id=None, targeted_gene_label=None,
                               description=None):
        """
        This will create the instance of a gene that is targeted by a molecular reagent (such as a morpholino or rnai).
        If an instance id is not supplied, we will create it as an anonymous individual which is of the
        type GENO:reagent_targeted_gene.  We will also add the targets relationship between the reagent and gene class.

        <targeted_gene_id> a GENO:reagent_targeted_gene
            rdf:label targeted_gene_label
            dc:description description
        <reagent_id> GENO:targets_instance_of <gene_id>

        :param reagent_id:
        :param gene_id:
        :param targeted_gene_id:
        :return:
        """

        # akin to a variant locus
        if (targeted_gene_id is None):
            targeted_gene_id = '_' + gene_id + '-' + reagent_id
        self.gu.addIndividualToGraph(self.graph, targeted_gene_id, targeted_gene_label,
                                     self.genoparts['reagent_targeted_gene'], description)

        self.gu.addTriple(self.graph, targeted_gene_id,
                          self.object_properties['is_targeted_expression_variant_of'], gene_id)

        self.gu.addTriple(self.graph, targeted_gene_id, self.object_properties['targeted_by'], reagent_id)

        return

    def addTargetedGeneSubregion(self, tgs_id, tgs_label, tgs_type=None, tgs_description=None):
        if tgs_type is None:
            tgs_type = self.genoparts['targeted_gene_subregion']
        self.gu.addIndividualToGraph(self.graph, tgs_id, tgs_label, tgs_type, tgs_description)

    def addMemberOfPopulation(self, member_id, population_id):
        self.gu.addTriple(self.graph, population_id,
                          self.properties['has_member_with_allelotype'], member_id)

        return


    def addTargetedGeneComplement(self, tgc_id, tgc_label, tgc_type=None, tgc_description=None):
        if tgc_type is None:
            tgc_type = self.genoparts['targeted_gene_complement']
        self.gu.addIndividualToGraph(self.graph, tgc_id, tgc_label, tgc_type, tgc_description)

        return


    def addGenome(self, taxon_id, taxon_label=None):
        if taxon_label is None:
            taxon_label = taxon_id
        genome_label = taxon_label+' genome'
        genome_id = self.makeGenomeID(taxon_id)
        self.gu.addClassToGraph(self.graph, genome_id, genome_label, Feature.types['genome'])

        return

    def addReferenceGenome(self, build_id, build_label, taxon_id):
        genome_id = self.makeGenomeID(taxon_id)
        self.gu.addIndividualToGraph(self.graph, build_id, build_label, Feature.types['reference_genome'])
        self.gu.addType(self.graph, build_id, genome_id)
        self.addTaxon(taxon_id, build_id)

        return

    def makeGenomeID(self, taxon_id):
        # scrub off the taxon prefix.  put it in base space

        genome_id = re.sub('.*\:', ':', taxon_id) + 'genome'

        return genome_id

    def addChromosome(self, chr, tax_id, tax_label=None, build_id=None, build_label=None):
        # if it's just the chromosome, add it as an instance of a SO:chromosome, and add it to the genome.
        # if a build is included, punn the chromosome as a subclass of SO:chromsome, and
        # make the build-specific chromosome an instance of the supplied chr.  The chr then becomes part of the
        # build or genome.

        # first, make the chromosome class, at the taxon level
        chr_id = makeChromID(str(chr), tax_id)
        if tax_label is not None:
            chr_label = makeChromLabel(chr, tax_label)
        else:
            chr_label = makeChromLabel(chr)
        genome_id = self.makeGenomeID(tax_id)
        self.gu.addClassToGraph(self.graph, chr_id, chr_label, Feature.types['chromosome'])
        self.addTaxon(tax_id, genome_id)  # add the taxon to the genome

        if build_id is not None:
            chrinbuild_id = makeChromID(chr, build_id)  # the build-specific chromosome
            if build_label is None:
                build_label = build_id
            chrinbuild_label = makeChromLabel(chr, build_label)
            # add the build-specific chromosome as an instance of the chr class
            self.gu.addIndividualToGraph(self.graph, chrinbuild_id, chrinbuild_label, chr_id)

            # add the build-specific chromosome as a member of the build  (both ways)
            self.gu.addMember(self.graph, build_id, chrinbuild_id)
            self.gu.addMemberOf(self.graph, chrinbuild_id, build_id)

        return

    def addChromosomeClass(self, chrom_num, taxon_id, taxon_label):
        taxon = re.sub('NCBITaxon:', '', taxon_id)
        chrom_class_id = makeChromID(chrom_num, taxon, 'CHR')  # the chrom class (generic) id
        chrom_class_label = makeChromLabel(chrom_num, taxon_label)
        self.gu.addClassToGraph(self.graph, chrom_class_id, chrom_class_label,
                                Feature.types['chromosome'])

        return

    def addChromosomeInstance(self, chr_num, reference_id, reference_label, chr_type=None):
        """
        Add the supplied chromosome as an instance within the given reference
        :param chr:
        :param reference_id: for example, a build id like UCSC:hg19
        :param reference_label:
        :param chr_type: this is the class that this is an instance of.  typically a genome-specific chr
        :return:
        """

        chr_id = makeChromID(str(chr_num), reference_id, 'MONARCH')
        chr_label = makeChromLabel(str(chr_num), reference_label)

        self.gu.addIndividualToGraph(self.graph, chr_id, chr_label, Feature.types['chromosome'])
        self.gu.addType(self.graph, chr_id, chr_type)

        # add the build-specific chromosome as a member of the build  (both ways)
        self.gu.addMember(self.graph, reference_id, chr_id)
        self.gu.addMemberOf(self.graph, chr_id, reference_id)

        return

    def make_variant_locus_label(self, gene_label, allele_label):
        if gene_label is None:
            gene_label = ''
        label = gene_label.strip()+'<' + allele_label.strip() + '>'

        return label

    def make_vslc_label(self, gene_label, allele1_label, allele2_label):
        """
        Make a Variant Single Locus Complement (VSLC) in monarch-style.
        :param gene_label:
        :param allele1_label:
        :param allele2_label:
        :return:
        """
        vslc_label = ''

        if (gene_label is None and allele1_label is None and allele2_label is None):
            logger.error("Not enough info to make vslc label")
            return None

        top = self.make_variant_locus_label(gene_label, allele1_label)
        bottom = ''
        if allele2_label is not None:
            bottom = self.make_variant_locus_label(gene_label, allele2_label)

        vslc_label = '/'.join((top, bottom))

        return vslc_label

Пример #8

Показать файл

Файл: Association.py Проект: JervenBolleman/dipper

class Assoc:
    """
    An abstract class for OBAN (Monarch)-style associations,
    to enable attribution of source and evidence
    on statements.
    """

    assoc_types = {
        'association': 'OBAN:association'
    }

    annotation_properties = {
        'replaced_by': 'IAO:0100001',
        'consider': 'OIO:consider',
        'hasExactSynonym': 'OIO:hasExactSynonym',
        'hasRelatedSynonym': 'OIO:hasRelatedSynonym',
        'definition': 'IAO:0000115',
        'has_xref': 'OIO:hasDbXref',
    }

    object_properties = {
        'has_disposition': 'GENO:0000208',
        'has_phenotype': 'RO:0002200',
        'in_taxon': 'RO:0002162',
        'has_quality': 'RO:0000086',
        'towards': 'RO:0002503',
        'has_subject': 'OBAN:association_has_subject',
        'has_object': 'OBAN:association_has_object',
        'has_predicate': 'OBAN:association_has_object_property',
        'is_about': 'IAO:00000136',
        'has_evidence': 'RO:0002558',
        'has_source': 'dc:source',
        'has_provenance': 'OBAN:has_provenance'
    }

    datatype_properties = {
        'position': 'faldo:position',
        'has_measurement': 'IAO:0000004'
    }

    properties = annotation_properties.copy()
    properties.update(object_properties)
    properties.update(datatype_properties)

    OWLCLASS = OWL['Class']
    OWLIND = OWL['NamedIndividual']
    OBJECTPROP = OWL['ObjectProperty']
    ANNOTPROP = OWL['AnnotationProperty']
    DATAPROP = OWL['DatatypeProperty']

    SUBCLASS = RDFS['subClassOf']
    BASE = Namespace(curie_map.get()[''])

    def __init__(self, definedby):
        self.cu = CurieUtil(curie_map.get())
        self.gu = GraphUtils(curie_map.get())

        # core parts of the association
        self.definedby = definedby
        self.sub = self.obj = self.rel = None
        self.assoc_id = None

        self.description = None
        self.source = []
        self.evidence = []
        # this is going to be used for the refactored evidence/provenance
        self.provenance = []

        self.score = None
        self.score_type = None
        self.score_unit = None

        return

    def get_properties(self):
        return self.properties

    def _is_valid(self):

        # check if sub/obj/rel are none...throw error
        if self.sub is None:
            raise ValueError('No subject set for this association')
        if self.obj is None:
            raise ValueError('No object set for this association')
        if self.rel is None:
            raise ValueError('No relation set for this association')

        return True

    def _add_basic_association_to_graph(self, g):

        if not self._is_valid():
            return

        # first, add the direct triple
        # anonymous (blank) nodes are indicated with underscore
        s = self.gu.getNode(self.sub)
        o = self.gu.getNode(self.obj)
        p = self.gu.getNode(self.rel)

        if s is None:
            logging.error(
                "Unable to retrieve graph node for Subject %s ", self.sub)
            return
                
        elif p is None:
            logging.error(
                "Unable to retrieve graph node for Predicate %s ", self.rel)
            return
                
        elif o is None:
            logging.error(
                "Unable to retrieve graph node for Object %s ", self.obj)
            return
        else:
            g.add((s, p, o))

        if self.assoc_id is None:
            self.set_association_id()

        node = self.gu.getNode(self.assoc_id)
        g.add((node, RDF['type'],
               self.gu.getNode(self.assoc_types['association'])))

        self.gu.addTriple(g, self.assoc_id,
                          self.object_properties['has_subject'], self.sub)
        self.gu.addTriple(g, self.assoc_id,
                          self.object_properties['has_object'], self.obj)
        self.gu.addTriple(g, self.assoc_id,
                          self.object_properties['has_predicate'], self.rel)

        if self.description is not None:
            self.gu.addDescription(g, self.assoc_id, self.description)

        if self.evidence is not None and len(self.evidence) > 0:
            for e in self.evidence:
                self.gu.addTriple(g, self.assoc_id,
                                  self.object_properties['has_evidence'], e)

        if self.source is not None and len(self.source) > 0:
            for s in self.source:
                if re.match('http', s):
                    # TODO assume that the source is a publication?
                    # use Reference class here
                    self.gu.addTriple(g, self.assoc_id,
                                      self.object_properties['has_source'], s,
                                      True)
                else:
                    self.gu.addTriple(g, self.assoc_id,
                                      self.object_properties['has_source'], s)

        if self.provenance is not None and len(self.provenance) > 0:
            for p in self.provenance:
                self.gu.addTriple(g, self.assoc_id,
                                  self.object_properties['has_provenance'], p)

        if self.score is not None:
            self.gu.addTriple(
                g, self.assoc_id, self.properties['has_measurement'],
                Literal(self.score, datatype=XSD['float']), True)
            # TODO
            # update with some kind of instance of scoring object
            # that has a unit and type

        return

    def add_association_to_graph(self, g):

        self._add_basic_association_to_graph(g)

        return

    def set_subject(self, identifier):
        self.sub = identifier
        return

    def set_object(self, identifier):
        self.obj = identifier
        return

    def set_relationship(self, identifier):
        self.rel = identifier
        return

    def set_association_id(self, assoc_id=None):
        """
        This will set the association ID based on the internal parts
            of the association.
        To be used in cases where an external association identifier
            should be used.

        :param assoc_id:
        :return:
        """
        if assoc_id is None:
            self.assoc_id = self.make_association_id(self.definedby, self.sub,
                                                     self.rel, self.obj)
        else:
            self.assoc_id = assoc_id

        return

    def get_association_id(self):

        return self.assoc_id

    def set_description(self, description):
        self.description = description

        return

    def set_score(self, score, unit=None, score_type=None):

        self.score = score
        self.score_unit = unit
        self.score_type = score_type

        return

    def add_evidence(self, identifier):
        """
        Add an evidence code to the association object (maintained as a list)
        :param identifier:
        :return:
        """

        if identifier is not None and identifier.strip() != '':
            self.evidence += [identifier]

        return

    def add_source(self, identifier):
        """
        Add a source identifier (such as publication id)
        to the association object (maintained as a list)
        TODO we need to greatly expand this function!

        :param identifier:
        :return:
        """

        if identifier is not None and identifier.strip() != '':
            self.source += [identifier]

        return

    def add_provenance(self, identifier):

        if identifier is not None and identifier.strip() != '':
            self.provenance += [identifier]

        return

    def load_all_properties(self, g):
        props = {
            self.OBJECTPROP: self.object_properties,
            self.ANNOTPROP: self.annotation_properties,
            self.DATAPROP: self.datatype_properties
        }

        for p in props:
            self.gu.loadProperties(g, props[p], p)

        return

    def _get_source_uri(self, pub_id):
        """
        Given some kind of pub_id (which might be a CURIE or url),
        convert it into a proper node.

        :param pub_id:
        :return: source: Well-formed URI for the given identifier (or url)
        """

        source = None
        if re.compile('http').match(pub_id):
            source = URIRef(pub_id)
        else:
            u = self.gu.getNode(pub_id)
            if u is not None:
                source = URIRef(u)
            else:
                logger.error(
                    "An id we don't know how to deal with: %s", pub_id)

        return source

    @staticmethod
    def make_association_id(definedby, subject, predicate, object,
                            attributes=None):
        """
        A method to create unique identifiers for OBAN-style associations,
        based on all the parts of the association
        If any of the items is empty or None, it will convert it to blank.
        It effectively md5 hashes the (+)-joined string from the values.
        Subclasses of Assoc can submit an additional array of attributes
        that will be added to the ID.

        :param definedby: The (data) resource that provided the annotation
        :param subject:
        :param predicate:
        :param object:
        :param attributes:
        :return:
        """

        # note others available:
        #   md5(), sha1(), sha224(), sha256(), sha384(), and sha512()
        # TEC: at our scale, md5 is in danger of having collisions.
        # putting definedby first,
        # as this will usually be the datasource providing the annotation
        # this will end up making the first few parts of the id
        # be the same for all annotations in that resource
        items_to_hash = [definedby, subject, predicate, object]
        if attributes is not None:
            items_to_hash += attributes

        for i, val in enumerate(items_to_hash):
            if val is None:
                items_to_hash[i] = ''

        byte_string = '+'.join(items_to_hash).encode("utf-8")

        # TODO put this in a util?
        return ':'.join(('MONARCH', hashlib.md5(byte_string).hexdigest()))