Пример #1
0
 def fixHasAltId(g):
     ng = makeGraph('',
                    graph=g,
                    prefixes=makePrefixes('oboInOwl', 'NIFCHEM', 'BIRNANN'))
     ng.replace_uriref('NIFCHEM:hasAlternativeId',
                       'oboInOwl:hasAlternativeId')
     ng.replace_uriref('BIRNANN:ChEBIid', 'oboInOwl:id')
Пример #2
0
class MBA(HBA):
    ont = OntMeta(
        PARC, 'mbaslim', 'Allen Mouse Brain Atlas Ontology', 'MBA 2011 v2',
        'This file is automatically generated from the Allen Brain Atlas API.'
        + NOTICE, TODAY)
    concept = PScheme(ILXREPLACE(ont.name),
                      'Allen Mouse Brain Atlas parcellation concept',
                      'NCBITaxon:10090', ADULT)
    atlas = PSArtifact(
        ILXREPLACE(ont.name + 'atlas'),
        'Allen Mouse Brain Atlas v2',
        '2011 v2',
        'November 2011',
        'http://mouse.brain-map.org/static/atlas',
        'http://help.brain-map.org/download/attachments/2818169/AllenReferenceAtlas_v2_2011.pdf?version=1&modificationDate=1319667383440',  # yay no doi! wat
        tuple(),
        tuple())
    PREFIX = 'MBA'
    PREFIXES = makePrefixes('NIFRID')
    PREFIXES[
        PREFIX] = 'http://api.brain-map.org/api/v2/data/Structure/'  # FIXME hack to allow both HBA and MBA
    ROOT = 997

    @classmethod
    def datamunge(cls, data):
        for node in data:
            if node['id'] == cls.ROOT:
                node['name'] = 'allen mouse brain atlas parcellation root'
                node['safe_name'] = 'allen mouse brain atlas parcellation root'
                node['acronym'] = 'mbaroot'
                break
Пример #3
0
class genericPScheme:
    ont = OntMeta
    concept = PScheme
    atlas = PSArtifact
    PREFIXES = makePrefixes('', 'ilx', 'owl', 'skos', 'BIRNLEX', 'NCBITaxon',
                            'ILXREPLACE')

    def __new__(cls, validate=False):
        error = 'Expected %s got %s'
        if type(cls.ont) != OntMeta:
            raise TypeError(error % (OntMeta, type(cls.ont)))
        elif type(cls.concept) != PScheme:
            raise TypeError(error % (PScheme, type(cls.concept)))
        elif type(cls.atlas) != PSArtifact:
            raise TypeError(error % (PSArtifact, type(cls.atlas)))

        ontid = cls.ont.path + cls.ont.filename + '.ttl'
        PREFIXES = {k: v for k, v in cls.PREFIXES.items()}
        PREFIXES.update(genericPScheme.PREFIXES)
        #if '' in cls.PREFIXES:  # NOT ALLOWED!
        #if PREFIXES[''] is None:
        #PREFIXES[''] = ontid + '/'
        graph = makeGraph(cls.ont.filename, PREFIXES, writeloc=WRITELOC)
        graph.add_ont(ontid, *cls.ont[2:])
        make_scheme(graph, cls.concept, cls.atlas.curie)
        data = cls.datagetter()
        cls.datamunge(data)
        cls.dataproc(graph, data)
        add_ops(graph)
        graph.write()
        if validate or getattr(cls, 'VALIDATE', False):
            cls.validate(graph)
        return ontid, cls.atlas

    @classmethod
    def datagetter(cls):
        """ example datagetter function, make any local modifications here """
        with open('myfile', 'rt') as f:
            rows = [r for r in csv.reader(f)]
        dothing = lambda _: [i for i, v in enumerate(_)]
        rows = [dothing(_) for _ in rows]
        raise NotImplementedError('You need to implement this yourlself!')
        return rows

    @classmethod
    def datamunge(cls, data):
        """ in place modifier of data """
        pass

    @classmethod
    def dataproc(cls, graph, data):
        """ example datagetter function, make any local modifications here """
        for thing in data:
            graph.add_trip(*thing)
        raise NotImplementedError('You need to implement this yourlself!')

    @classmethod
    def validate(cls, graph):
        """ Put any post validation here. """
        raise NotImplementedError('You need to implement this yourlself!')
Пример #4
0
class CoCoMac(genericPScheme):
    ont = OntMeta(
        PARC, 'cocomacslim', 'CoCoMac terminology', 'CoCoMac',
        ('This file is automatically generated from the CoCoMac '
         'database on the terms from BrainMaps_BrainSiteAcronyms.' + NOTICE),
        TODAY)
    concept = PScheme(ILXREPLACE(ont.name),
                      'CoCoMac terminology parcellation concept',
                      'NCBITaxon:9544', 'ilx:various')
    atlas = PSArtifact(
        ILXREPLACE(ont.name + 'atlas'),
        'CoCoMac terminology',
        None,  #'no version info',
        None,  #'no date',
        'http://cocomac.g-node.org',
        'scholarly things',
        tuple(),
        tuple())

    PREFIXES = makePrefixes('NIFRID')
    PREFIXES[
        'cocomac'] = 'http://cocomac.g-node.org/services/custom_sql_query.php?sql=SELECT%20*%20from%20BrainMaps_BrainSiteAcronyms%20where%20ID='  # looking for better options

    @classmethod
    def datagetter(cls):
        url = 'http://cocomac.g-node.org/services/custom_sql_query.php?sql=SELECT * from BrainMaps_BrainSiteAcronyms;&format=json'
        table = requests.get(url).json()
        fields = table['fields']
        data = [fields] + list(table['data'].values())
        return data

    @classmethod
    def dataproc(cls, graph, data):
        class cocomac(rowParse):
            def ID(self, value):
                self.identifier = 'cocomac:' + value  # safe because reset every row (ish)
                graph.add_class(self.identifier, cls.concept.curie)

            def Key(self, value):
                pass

            def Summary(self, value):
                pass

            def Acronym(self, value):
                graph.add_trip(self.identifier, ACRONYM, value)

            def FullName(self, value):
                graph.add_trip(self.identifier, rdfs.label,
                               '(%s) ' % cls.ont.shortname + value)
                graph.add_trip(self.identifier, PARCLAB, value)

            def LegacyID(self, value):
                graph.add_trip(self.identifier, ACRONYM, value)

            def BrainInfoID(self, value):
                pass

        cocomac(data)
Пример #5
0
 def switch_dead(g):
     ng = makeGraph('', graph=g, prefixes=makePrefixes('oboInOwl'))
     for f, r in deads.items():
         ng.replace_uriref(f, r)
         ng.add_node(r, 'oboInOwl:hasAlternateId',
                     rdflib.Literal(f, datatype=rdflib.XSD.string))
         g.remove(
             (r, replacedBy, r))  # in case the replaced by was already in
Пример #6
0
 def fixIons(g):
     # there are a series of atom/ion confusions that shall be dealt with, solution is to add 'iron' as a synonym to the charged form since that is what the biologists are usually referring to...
     ng = makeGraph('', graph=g, prefixes=makePrefixes('CHEBI'))
     # atom           ion
     None, 'CHEBI:29108'  # calcium is ok
     ng.replace_uriref('CHEBI:30145', 'CHEBI:49713')  # lithium
     ng.replace_uriref('CHEBI:18248', 'CHEBI:29033')  # iron
     ng.replace_uriref('CHEBI:26216', 'CHEBI:29103')  # potassium
     ng.replace_uriref('CHEBI:26708', 'CHEBI:29101')  # sodium
     None, 'CHEBI:29105'  # zinc is ok
Пример #7
0
class FMRI(genericPScheme):
    PREFIXES = makePrefixes('', 'skos', 'ILXREPLACE')

    @classmethod
    def datagetter(cls):
        data = cls.DATA
        return data

    @classmethod
    def dataproc(cls, graph, data):
        for node in data:
            id_ = 'ATLAS:' + node.get('index')
            label = node.text
            display = '(%s) ' % cls.ont.shortname + label
            graph.add_class(id_, cls.concept.curie, label=display)
            graph.add_trip(id_, PARCLAB, label)
Пример #8
0
class HBA(genericPScheme):
    ont = OntMeta(
        PARC, 'hbaslim', 'Allen Human Brain Atlas Ontology', 'HBA 2013 v2',
        'This file is automatically generated from the Allen Brain Atlas API.'
        + NOTICE, TODAY)
    concept = PScheme(ILXREPLACE(ont.name),
                      'Allen Human Brain Atlas parcellation concept',
                      'NCBITaxon:9606', ADULT)
    atlas = PSArtifact(
        ILXREPLACE(ont.name + 'atlas'), 'Allen Human Brain Atlas v2',
        '2013 v2', 'October 2013', 'http://human.brain-map.org/',
        'http://help.brain-map.org/download/attachments/2818165/HBA_Ontology-and-Nomenclature.pdf?version=1&modificationDate=1382051847989',
        tuple(), tuple())
    PREFIX = 'HBA'
    PREFIXES = makePrefixes('NIFRID')
    PREFIXES[
        PREFIX] = 'http://api.brain-map.org:80/api/v2/data/Structure/'  # FIXME hack to allow both HBA and MBA
    ROOT = 3999
    #VALIDATE = True

    @classmethod
    def datagetter(cls):
        url = 'http://api.brain-map.org/api/v2/tree_search/Structure/{root}.json?descendants=true'.format(
            root=cls.ROOT)
        resp = requests.get(url).json()
        return resp['msg']

    @classmethod
    def dataproc(cls, graph, data):
        for node in data:
            curie = graph.expand(cls.PREFIX + ':' + str(node['id']))
            graph.add_class(curie, cls.concept.curie)
            parent = node['parent_structure_id']
            graph.add_trip(curie, rdfs.label,
                           '(%s) ' % cls.ont.shortname + node['name'])
            graph.add_trip(curie, PARCLAB, node['name'])
            graph.add_trip(curie, ACRONYM, node['acronym'])
            if node['safe_name'] != node['name']:
                graph.add_trip(curie, SYNONYM, node['safe_name'])
            if parent:
                pcurie = graph.expand(cls.PREFIX + ':' + str(parent))
                graph.add_hierarchy(pcurie, PARTOF, curie)

    @classmethod
    def validate(cls, graph):
        check_hierarchy(graph, cls.PREFIX + ':' + str(cls.ROOT), PARTOF,
                        PARCLAB)
Пример #9
0
def ncbigene_make():
    IDS_FILE = 'resources/gene-subset-ids.txt'
    with open(IDS_FILE, 'rt') as f:  # this came from neuroNER
        ids = [l.split(':')[1].strip() for l in f.readlines()]

    #url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?retmode=json&retmax=5000&db=gene&id='
    #for id_ in ids:
    #data = requests.get(url + id_).json()['result'][id_]
    url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi'
    data = {
        'db': 'gene',
        'retmode': 'json',
        'retmax': 5000,
        'id': None,
    }
    chunks = []
    for i, idset in enumerate(chunk_list(ids, 100)):
        print(i, len(idset))
        data['id'] = ','.join(idset),
        resp = requests.post(url, data=data).json()
        chunks.append(resp)

    base = chunks[0]['result']
    uids = base['uids']
    for more in chunks[1:]:
        data = more['result']
        uids.extend(data['uids'])
        base.update(data)
    #base['uids'] = uids  # i mean... its just the keys
    base.pop('uids')

    ng = createOntology(
        'ncbigeneslim',
        'NIF NCBI Gene subset',
        makePrefixes('ILXREPLACE', 'ilx', 'OBOANN', 'NCBIGene', 'NCBITaxon',
                     'skos', 'owl'),
        'ncbigeneslim',
        'This subset is automatically generated from the NCBI Gene database on a subset of terms listed in %s.'
        % IDS_FILE,
        remote_base='http://ontology.neuinfo.org/NIF/')

    for k, v in base.items():
        #if k != 'uids':
        ncbi(v, ng)
    ng.write()
Пример #10
0
def parcellation_schemes(ontids_atlases):
    ont = OntMeta('http://ontology.neuinfo.org/NIF/ttl/generated/',
                  'parcellation',
                  'NIF collected parcellation schemes ontology',
                  'NIF Parcellations',
                  'Brain parcellation schemes as represented by root concepts.',
                  TODAY)
    ontid = ont.path + ont.filename + '.ttl'
    PREFIXES = makePrefixes('ilx', 'owl', 'skos', 'OBOANN')
    graph = makeGraph(ont.filename, PREFIXES, writeloc = '/tmp/parc/')
    graph.add_ont(ontid, *ont[2:])

    for import_id, atlas in sorted(ontids_atlases):
        graph.add_node(ontid, rdflib.OWL.imports, import_id)
        add_triples(graph, atlas, make_atlas)

    graph.add_class(PARC_SUPER[0], label=PARC_SUPER[1])
    graph.write(convert=False)
Пример #11
0
def parcellation_schemes(ontids_atlases):
    ont = OntMeta(
        GENERATED, 'parcellation',
        'NIF collected parcellation schemes ontology', 'NIF Parcellations',
        'Brain parcellation schemes as represented by root concepts.', TODAY)
    ontid = ont.path + ont.filename + '.ttl'
    PREFIXES = makePrefixes('', 'ilx', 'owl', 'skos', 'NIFRID', 'ILXREPLACE')
    graph = makeGraph(ont.filename, PREFIXES, writeloc=WRITELOC)
    graph.add_ont(ontid, *ont[2:])

    for import_id, atlas in sorted(ontids_atlases):
        graph.add_trip(ontid, owl.imports, import_id)
        add_triples(graph, atlas, make_atlas)

    graph.add_class(ATLAS_SUPER, label=atname)

    graph.add_class(PARC_SUPER, label=psname)
    graph.write()
Пример #12
0
class PAXRAT6(genericPScheme):
    source = 'resources/paxinos09names.txt'
    ont = OntMeta(
        PARC, 'paxinos_r_s_6', 'Paxinos Rat Parcellation 6th', 'PAXRAT6',
        'This file is automatically generated from ' + source + '.' + NOTICE,
        TODAY)
    concept = PScheme(ILXREPLACE(ont.name),
                      'Paxinos Rat Stereological 6th Ed parcellation concept',
                      'NCBITaxon:10116', ADULT)
    atlas = PSArtifact(
        ILXREPLACE(ont.name + 'atlas'),
        'The Rat Brain in Stereotaxic Coordinates 6th Edition',
        '6th',
        '02-11-2006',  # d-m-y
        None,  # the fact this is missing is very big problem :/
        ('Paxinos, George, Charles RR Watson, and Piers C. Emson.'
         ' "AChE-stained horizontal sections of the rat brain'
         ' in stereotaxic coordinates." Journal of neuroscience'
         ' methods 3, no. 2 (1980): 129-149.'),  # FIXME
        ('Paxinos Rat 6th', ),
        tuple())
    PREFIXES = makePrefixes('NIFRID')
    PREFIXES['PAXRAT'] = interlex_namespace('paxinos/rat/labels')

    @classmethod
    def datagetter(cls):
        with open(cls.source, 'rt') as f:
            lines = [
                l.rsplit('#')[0].strip() for l in f.readlines()
                if not l.startswith('#')
            ]

        return [l.rsplit(' ', 1) for l in lines]

    @classmethod
    def dataproc(cls, graph, data):
        for i, (label, abrv) in enumerate(data):
            id_ = 'PAXRAT:' + str(i + 1)
            display = '(%s) ' % cls.ont.shortname + label
            graph.add_class(id_, cls.concept.curie, label=display)
            graph.add_trip(id_, PARCLAB, label)
            graph.add_trip(
                id_, ACRONYM,
                abrv)  # FIXME these are listed as abbreviations in the text
Пример #13
0
    def fixAltIdIsURIRef(g):
        hai = ug.expand('oboInOwl:hasAlternativeId')
        i = ug.expand('oboInOwl:id')
        makeGraph('', graph=g, prefixes=makePrefixes(
            'CHEBI'))  # amazlingly sometimes this is missing...

        def inner(s, p, o):
            if type(o) == rdflib.URIRef:
                qn = g.namespace_manager.qname(o)
                g.add((s, p, rdflib.Literal(qn, datatype=rdflib.XSD.string)))
                if 'ns' in qn:
                    print('WARNING UNKNOWN NAMESPACE BEING SHORTENED', str(o),
                          qn)
                g.remove((s, p, o))

        for s, o in g.subject_objects(hai):
            inner(s, hai, o)
        for s, o in g.subject_objects(i):
            inner(s, i, o)
Пример #14
0
def chebi_imp():
    PREFIXES = makePrefixes('definition', 'replacedBy', 'hasRole', 'oboInOwl',
                            'CHEBI', 'owl', 'skos', 'oboInOwl')
    ug = makeGraph('utilgraph', prefixes=PREFIXES)
    with open('resources/chebi-subset-ids.txt', 'rt') as f:
        ids_raw = set((_.strip() for _ in f.readlines()))
        ids = sorted(set((ug.expand(_.strip()) for _ in ids_raw)))

    def check_chebis(g):
        a = []
        for id_ in ids:
            l = sorted(g.triples((id_, None, None)))
            ll = len(l)
            a.append(ll)
        return a

    def fixIons(g):
        # there are a series of atom/ion confusions that shall be dealt with, solution is to add 'iron' as a synonym to the charged form since that is what the biologists are usually referring to...
        ng = makeGraph('', graph=g, prefixes=makePrefixes('CHEBI'))
        # atom           ion
        None, 'CHEBI:29108'  # calcium is ok
        ng.replace_uriref('CHEBI:30145', 'CHEBI:49713')  # lithium
        ng.replace_uriref('CHEBI:18248', 'CHEBI:29033')  # iron
        ng.replace_uriref('CHEBI:26216', 'CHEBI:29103')  # potassium
        ng.replace_uriref('CHEBI:26708', 'CHEBI:29101')  # sodium
        None, 'CHEBI:29105'  # zinc is ok

    g = rdflib.Graph()
    cg = rdflib.Graph()
    cd = rdflib.Graph()
    chemg = rdflib.Graph()
    molg = rdflib.Graph()

    #g.parse('/home/tom/git/NIF-Ontology/ttl/generated/chebislim.ttl', format='turtle')
    cg.parse('/home/tom/git/NIF-Ontology/ttl/generated/chebislim.ttl',
             format='turtle')
    list(g.add(t) for t in cg)
    a1 = check_chebis(g)

    #g.parse('/home/tom/git/NIF-Ontology/ttl/generated/chebi-dead.ttl', format='turtle')
    cd.parse('/home/tom/git/NIF-Ontology/ttl/generated/chebi-dead.ttl',
             format='turtle')
    list(g.add(t) for t in cd)
    a2 = check_chebis(g)

    #g.parse('/home/tom/git/NIF-Ontology/ttl/NIF-Chemical.ttl', format='turtle')
    chemg.parse('/home/tom/git/NIF-Ontology/ttl/NIF-Chemical.ttl',
                format='turtle')
    chemgg = makeGraph('NIF-Chemical', graph=chemg)
    fixIons(chemg)
    list(g.add(t) for t in chemg)
    a3 = check_chebis(g)

    #g.parse('/home/tom/git/NIF-Ontology/ttl/NIF-Molecule.ttl', format='turtle')
    molg.parse('/home/tom/git/NIF-Ontology/ttl/NIF-Molecule.ttl',
               format='turtle')
    molgg = makeGraph('NIF-Molecule', graph=molg)
    fixIons(molg)
    list(g.add(t) for t in molg)
    a4 = check_chebis(g)

    replacedBy = ug.expand('replacedBy:')
    deads = {s: o for s, o in cd.subject_objects(replacedBy)}

    def switch_dead(g):
        ng = makeGraph('', graph=g, prefixes=makePrefixes('oboInOwl'))
        for f, r in deads.items():
            ng.replace_uriref(f, r)
            ng.add_node(r, 'oboInOwl:hasAlternateId',
                        rdflib.Literal(f, datatype=rdflib.XSD.string))
            g.remove(
                (r, replacedBy, r))  # in case the replaced by was already in

    switch_dead(g)
    switch_dead(cg)
    switch_dead(chemg)
    switch_dead(molg)

    def fixHasAltId(g):
        ng = makeGraph('',
                       graph=g,
                       prefixes=makePrefixes('oboInOwl', 'NIFCHEM', 'BIRNANN'))
        ng.replace_uriref('NIFCHEM:hasAlternativeId',
                          'oboInOwl:hasAlternativeId')
        ng.replace_uriref('BIRNANN:ChEBIid', 'oboInOwl:id')

    list(map(fixHasAltId, (g, cg, chemg)))

    def fixAltIdIsURIRef(g):
        hai = ug.expand('oboInOwl:hasAlternativeId')
        i = ug.expand('oboInOwl:id')
        makeGraph('', graph=g, prefixes=makePrefixes(
            'CHEBI'))  # amazlingly sometimes this is missing...

        def inner(s, p, o):
            if type(o) == rdflib.URIRef:
                qn = g.namespace_manager.qname(o)
                g.add((s, p, rdflib.Literal(qn, datatype=rdflib.XSD.string)))
                if 'ns' in qn:
                    print('WARNING UNKNOWN NAMESPACE BEING SHORTENED', str(o),
                          qn)
                g.remove((s, p, o))

        for s, o in g.subject_objects(hai):
            inner(s, hai, o)
        for s, o in g.subject_objects(i):
            inner(s, i, o)

    list(map(fixAltIdIsURIRef, (g, cg, chemg)))

    matches = [_ for _ in zip(a1, a2, a3, a4)]
    changed = [len(set(_)) != 1 for _ in matches]
    review = [(id_, m) for id_, changed, m in zip(ids, changed, matches)
              if changed and m[0]]
    # for reasons currently lost to implementation details this returns a list of empty lists if run from ipython
    wat_c = [
        set([(s, str(o.toPython()))
             for s, p, o in cg.triples((u, None, None))]) for u, _ in review
    ]
    wat_a = [
        set([(s, str(o.toPython())) for s, p, o in g.triples((u, None, None))])
        for u, _ in review
    ]
    wat_c_ = [
        set(cg.triples((u, None, None))) for u, _ in review
    ]  # for reasons currently lost to implementation details this returns a list of empty lists if run from ipython
    wat_a_ = [
        set(g.triples((u, None, None))) for u, _ in review
    ]  # for reasons currently lost to implementation details this returns a list of empty lists if run from ipython
    diff = [a - c for a, c in zip(wat_a, wat_c)]
    diff_ = [a - c for a, c in zip(wat_a_, wat_c_)]

    cb = createOntology(
        'chebi-bridge',
        'NIF ChEBI bridge',
        makePrefixes('CHEBI', 'BFO1SNAP', 'owl', 'skos', 'dc', 'hasRole',
                     'NIFCHEM', 'oboInOwl', 'NIFMOL', 'OBOANN', 'BIRNANN'),
        'chebibridge',
        ('This bridge file contains additional annotations'
         ' on top of CHEBI identifiers that were originally'
         ' included in NIF-Chemical or NIF-Molecule that have'
         ' not since been added to CHEBI upstream'),
        path='ttl/bridge/',
        #imports=('https://raw.githubusercontent.com/SciCrunch/NIF-Ontology/master/ttl/generated/chebislim.ttl',
        #'https://raw.githubusercontent.com/SciCrunch/NIF-Ontology/master/ttl/generated/chebi-dead.ttl'))
        imports=(
            'http://ontology.neuinfo.org/NIF/ttl/generated/chebislim.ttl',
            'http://ontology.neuinfo.org/NIF/ttl/generated/chebi-dead.ttl'))

    out = []
    for set_ in diff:
        for sub, string in sorted(set_):
            for t in g.triples((sub, None, None)):
                # please not that this process will do things like remove hasStreenName ectasy from CHEBI:1391 since chebislim has it listed as a synonym
                py = t[-1].toPython()
                if py == string and not py.startswith(
                        'ub'
                ):  # ignore restrictions... this is safe because nifmol and nifchem dont have any restrictions...
                    cb.add_recursive(t, g)
        cb.add_class(
            sub
        )  # only need to go at the end because sub is the same for each set

    def hasImplicitSuperclass(s, o):
        for super_ in cg.objects(s, rdflib.RDFS.subClassOf):
            if super_ == o:
                return True
            elif hasImplicitSuperclass(super_, o):
                return True

    # curation decisions after review (see outtc for full list)
    curatedOut = []

    def curateOut(*t):
        curatedOut.append(
            tuple(
                ug.expand(_) if type(_) is not rdflib.Literal else _
                for _ in t))
        cb.del_trip(*t)

    curateOut(
        'CHEBI:6887', 'rdfs:subClassOf', 'CHEBI:23367'
    )  # defer to the chebi choice of chemical substance over molecular entity since it is classified as a racemate which doesn't quite match the mol ent def
    curateOut(
        'CHEBI:26519', 'rdfs:subClassOf', 'CHEBI:24870'
    )  # some ions may also be free radicals, but all free radicals are not ions!
    #natural product removal since natural product should probably be a role if anything...
    curateOut('CHEBI:18059', 'rdfs:subClassOf', 'CHEBI:33243')
    curateOut('CHEBI:24921', 'rdfs:subClassOf', 'CHEBI:33243')
    curateOut('CHEBI:37332', 'rdfs:subClassOf', 'CHEBI:33243')

    curateOut('CHEBI:50906', 'rdfs:label',
              rdflib.Literal('Chemical role', datatype=rdflib.XSD.string)
              )  # chebi already has a chemical role...
    curateOut(
        'CHEBI:22586', 'rdfs:subClassOf', 'CHEBI:24432'
    )  # antioxidant is already modelled as a chemical role instead of a biological role, the distinction is that the biological roles affect biological processes/property, not chemical processes/property
    curateOut('CHEBI:22720', 'rdfs:subClassOf',
              'CHEBI:27171')  # not all children are bicyclic
    curateOut(
        'CHEBI:23447', 'rdfs:subClassOf', 'CHEBI:17188'
    )  # this one seems obviously flase... all cyclic nucleotides are not nucleoside 5'-monophosphate...
    curateOut(
        'CHEBI:24922', 'rdfs:subClassOf', 'CHEBI:27171'
    )  # not all children are bicyclic, some may be poly, therefore removing
    curateOut(
        'CHEBI:48706', 'rdfs:subClassOf', 'CHEBI:33232'
    )  # removing since antagonist is more incidental and pharmacological role is more appropriate (as chebi has it)
    curateOut('CHEBI:51064', 'rdfs:subClassOf',
              'CHEBI:35338')  # removing since chebi models this with has part
    curateOut(
        'CHEBI:8247', 'rdfs:subClassOf', 'CHEBI:22720'
    )  # the structure is 'fused to' a benzo, but it is not a benzo, chebi has the correct
    #curateOut('CHEBI:9463', 'rdfs:subClassOf', 'CHEBI:50786')  # not sure what to make of this wikipedia says one thing, but chebi says another, very strange... not an anabolic agent?!??! wat no idea

    # review hold over subClassOf statements
    intc = []
    outtc = []
    for s, o in cb.g.subject_objects(rdflib.RDFS.subClassOf):
        if str(
                o
        ) == 'http://ontology.neuinfo.org/NIF/Backend/BIRNLex_annotation_properties.owl#_birnlex_retired_class':
            # we need to remove any of the cases where deprecation was misused
            cb.g.remove((s, rdflib.RDFS.subClassOf, o))
        elif hasImplicitSuperclass(s, o):
            cb.g.remove((s, rdflib.RDFS.subClassOf, o))
            intc.append((s, rdflib.RDFS.subClassOf, o))
        else:
            outtc.append((s, rdflib.RDFS.subClassOf, o))

    def qname(trips):
        return tuple(
            tuple(cb.g.namespace_manager.qname(_) for _ in t) for t in trips)

    for a, p, b in sorted(qname(outtc)):
        if 'NIFMOL' in b:
            continue  # not considering cases where NIFMOL/NIFCHEM ids are used, that can come later
        s = sgv.findById(a)
        o = sgv.findById(b)
        if s is None or o is None:
            print(a, '=>', s)
            print(b, '=>', o)
        else:
            print(s['labels'], s['curie'])
            print('subClassOf')
            print(o['labels'], o['curie'])
            print((a, p, b))
        print('---------------------')

    cb.write(
    )  # re-add only the missing edges so that we can zap them from NIF-Molecule and NIF-Chemical (recurse is needed...)

    # validation
    diff2 = set(cb.g) - set(cg)
    diff3 = set(cb.g) - diff2  # should just be all the owl:Class entries
    diff4 = set(cb.g) - set(chemg) | set(cb.g) - set(molg)  # not informative
    diff5 = set(cb.g) - diff4  # not informative
    both = set(chemg) & set(
        molg)  # there is no overlap beyond the owl:Class declarations

    def getChebis(set_):
        return set(t for t in set_ if 'CHEBI_' in t[0])

    def nodt(graph):
        return set((s, str(o) if type(o) is rdflib.Literal else o)
                   for s, p, o in graph)

    cmc = getChebis((((
        (nodt(chemg) - nodt(cb.g)) - nodt(cg)) - nodt(cd)) - nodt(intc)) -
                    nodt(curatedOut))
    cmc = sorted(t for s, o in cmc for t in chemg.triples((s, None, o)))
    mmc = getChebis((((
        (nodt(molg) - nodt(cb.g)) - nodt(cg)) - nodt(cd)) - nodt(intc)) -
                    nodt(curatedOut))
    mmc = sorted(t for s, o in mmc for t in molg.triples((s, None, o)))

    # remove chebi classes from nifchem and nifmol
    def remstuff(sources, targets):
        for source in sources:
            for id_ in source.subjects(rdflib.RDF.type, rdflib.OWL.Class):
                for target in targets:
                    target.del_class(id_)

    remstuff((cg, cd), (chemgg, molgg))

    chemgg.write()
    molgg.write()

    embed()
Пример #15
0
def swanson():
    """ not really a parcellation scheme """
    source = 'resources/swanson_aligned.txt'
    ONT_PATH = GENERATED
    filename = 'swanson_hierarchies'
    ontid = ONT_PATH + filename + '.ttl'
    PREFIXES = makePrefixes('', 'ilx', 'owl', 'skos', 'NIFRID', 'ILXREPLACE')
    PREFIXES.update({
        #'':ontid + '/',  # looking for better options
        'SWAN': interlex_namespace('swanson/nt/term'),
        'SWAA': interlex_namespace('swanson/nt/appendix'),
    })
    new_graph = makeGraph(filename, PREFIXES, writeloc=WRITELOC)
    new_graph.add_ont(
        ontid, 'Swanson brain partomies', 'Swanson 2014 Partonomies',
        'This file is automatically generated from ' + source + '.' + NOTICE,
        TODAY)

    # FIXME citations should really go on the ... anatomy? scheme artifact
    definingCitation = 'Swanson, Larry W. Neuroanatomical Terminology: a lexicon of classical origins and historical foundations. Oxford University Press, USA, 2014.'
    definingCitationID = 'ISBN:9780195340624'
    new_graph.add_trip(ontid, 'NIFRID:definingCitation', definingCitation)
    new_graph.add_trip(ontid, 'NIFRID:definingCitationID', definingCitationID)

    with open(source, 'rt') as f:
        lines = [l.strip() for l in f.readlines()]

    # join header on page 794
    lines[635] += ' ' + lines.pop(636)
    #fix for capitalization since this header is reused
    fixed = ' or '.join([
        ' ('.join([n.capitalize() for n in _.split(' (')])
        for _ in lines[635].lower().split(' or ')
    ]).replace('human', 'HUMAN')
    lines[635] = fixed

    data = []
    for l in lines:
        if not l.startswith('#'):
            level = l.count('.' * 5)
            l = l.strip('.')
            if ' (' in l:
                if ') or' in l:
                    n1, l = l.split(') or')
                    area_name, citationP = n1.strip().split(' (')
                    citation = citationP.rstrip(')')
                    d = (level, area_name, citation, 'NEXT SYN')
                    data.append(d)
                    #print(tc.red(tc.bold(repr(d))))

                area_name, citationP = l.strip().split(' (')
                citation = citationP.rstrip(')')
            else:
                area_name = l
                citation = None

            d = (level, area_name, citation, None)
            #print(d)
            data.append(d)
    results = async_getter(sgv.findByTerm, [(d[1], ) for d in data])
    #results = [None] * len(data)
    curies = [[r['curie'] for r in _ if 'UBERON' in r['curie']] if _ else []
              for _ in results]
    output = [_[0] if _ else None for _ in curies]

    header = ['Depth', 'Name', 'Citation', 'NextSyn', 'Uberon']
    zoop = [header] + [r for r in zip(*zip(*data), output)] + \
            [(0, 'Appendix END None', None, None, None)]  # needed to add last appendix

    class SP(rowParse):
        def __init__(self):
            self.nodes = defaultdict(dict)
            self._appendix = 0
            self.appendicies = {}
            self._last_at_level = {}
            self.names = defaultdict(set)
            self.children = defaultdict(set)
            self.parents = defaultdict(set)
            self.next_syn = False
            super().__init__(zoop)

        def Depth(self, value):
            if self.next_syn:
                self.synonym = self.next_syn
            else:
                self.synonym = False
            self.depth = value

        def Name(self, value):
            self.name = value

        def Citation(self, value):
            self.citation = value

        def NextSyn(self, value):
            if value:
                self.next_syn = self._rowind
            else:
                self.next_syn = False

        def Uberon(self, value):
            self.uberon = value

        def _row_post(self):
            # check if we are in the next appendix
            # may want to xref ids between appendicies as well...
            if self.depth == 0:
                if self.name.startswith('Appendix'):
                    if self._appendix:
                        self.appendicies[self._appendix]['children'] = dict(
                            self.children)
                        self.appendicies[self._appendix]['parents'] = dict(
                            self.parents)
                        self._last_at_level = {}
                        self.children = defaultdict(set)
                        self.parents = defaultdict(set)
                    _, num, apname = self.name.split(' ', 2)
                    if num == 'END':
                        return
                    self._appendix = int(num)
                    self.appendicies[self._appendix] = {
                        'name':
                        apname.capitalize(),
                        'type':
                        self.citation.capitalize() if self.citation else None
                    }
                    return
                else:
                    if ' [' in self.name:
                        name, taxonB = self.name.split(' [')
                        self.name = name
                        self.appendicies[self._appendix][
                            'taxon'] = taxonB.rstrip(']').capitalize()
                    else:  # top level is animalia
                        self.appendicies[
                            self._appendix]['taxon'] = 'ANIMALIA'.capitalize()

                    self.name = self.name.capitalize()
                    self.citation = self.citation.capitalize()
            # nodes
            if self.synonym:
                self.nodes[self.synonym]['synonym'] = self.name
                self.nodes[self.synonym]['syn-cite'] = self.citation
                self.nodes[self.synonym]['syn-uberon'] = self.uberon
                return
            else:
                if self.citation:  # Transverse Longitudinal etc all @ lvl4
                    self.names[self.name + ' ' + self.citation].add(
                        self._rowind)
                else:
                    self.name += str(self._appendix) + self.nodes[
                        self._last_at_level[self.depth - 1]]['label']
                    #print(level, self.name)
                    # can't return here because they are their own level
                # replace with actually doing something...
                self.nodes[self._rowind]['label'] = self.name
                self.nodes[self._rowind]['citation'] = self.citation
                self.nodes[self._rowind]['uberon'] = self.uberon
            # edges
            self._last_at_level[self.depth] = self._rowind
            # TODO will need something to deal with the Lateral/
            if self.depth > 0:
                try:
                    parent = self._last_at_level[self.depth - 1]
                except:
                    embed()
                self.children[parent].add(self._rowind)
                self.parents[self._rowind].add(parent)

        def _end(self):
            replace = {}
            for asdf in [
                    sorted(n) for k, n in self.names.items() if len(n) > 1
            ]:
                replace_with, to_replace = asdf[0], asdf[1:]
                for r in to_replace:
                    replace[r] = replace_with

            for r, rw in replace.items():
                #print(self.nodes[rw])
                o = self.nodes.pop(r)
                #print(o)

            for vals in self.appendicies.values():
                children = vals['children']
                parents = vals['parents']
                # need reversed so children are corrected before swap
                for r, rw in reversed(sorted(replace.items())):
                    if r in parents:
                        child = r
                        new_child = rw
                        parent = parents.pop(child)
                        parents[new_child] = parent
                        parent = list(parent)[0]
                        children[parent].remove(child)
                        children[parent].add(new_child)
                    if r in children:
                        parent = r
                        new_parent = rw
                        childs = children.pop(parent)
                        children[new_parent] = childs
                        for child in childs:
                            parents[child] = {new_parent}

            self.nodes = dict(self.nodes)

    sp = SP()
    tp = [
        _ for _ in sorted([
            '{: <50}'.format(n['label']) +
            n['uberon'] if n['uberon'] else n['label']
            for n in sp.nodes.values()
        ])
    ]
    #print('\n'.join(tp))
    #print(sp.appendicies[1].keys())
    #print(sp.nodes[1].keys())
    nbase = PREFIXES['SWAN'] + '%s'
    json_ = {'nodes': [], 'edges': []}
    parent = ILXREPLACE('swansonBrainRegionConcept')
    for node, anns in sp.nodes.items():
        nid = nbase % node
        new_graph.add_class(nid, parent, label=anns['label'])
        new_graph.add_trip(nid, 'NIFRID:definingCitation', anns['citation'])
        json_['nodes'].append({'lbl': anns['label'], 'id': 'SWA:' + str(node)})
        #if anns['uberon']:
        #new_graph.add_trip(nid, owl.equivalentClass, anns['uberon'])  # issues arrise here...

    for appendix, data in sp.appendicies.items():
        aid = PREFIXES['SWAA'] + str(appendix)
        new_graph.add_class(aid, label=data['name'].capitalize())
        new_graph.add_trip(
            aid, 'ilx:hasTaxonRank',
            data['taxon'])  # FIXME appendix is the data artifact...
        children = data['children']
        ahp = HASPART + str(appendix)
        apo = PARTOF + str(appendix)
        new_graph.add_op(ahp, transitive=True)
        new_graph.add_op(apo, inverse=ahp, transitive=True)
        for parent, childs in children.items(
        ):  # FIXME does this give complete coverage?
            pid = nbase % parent
            for child in childs:
                cid = nbase % child
                new_graph.add_hierarchy(
                    cid, ahp, pid)  # note hierarhcy inverts direction
                new_graph.add_hierarchy(pid, apo, cid)
                json_['edges'].append({
                    'sub': 'SWA:' + str(child),
                    'pred': apo,
                    'obj': 'SWA:' + str(parent)
                })

    new_graph.write()
    if False:
        Query = namedtuple('Query',
                           ['root', 'relationshipType', 'direction', 'depth'])
        mapping = (1, 1, 1, 1, 30, 83, 69, 70, 74, 1)  # should generate?
        for i, n in enumerate(mapping):
            a, b = creatTree(*Query('SWA:' + str(n), 'ilx:partOf' + str(i + 1),
                                    'INCOMING', 10),
                             json=json_)
            print(a)
    return ontid, None
Пример #16
0
def swanson():
    """ not really a parcellation scheme """
    ONT_PATH = 'http://ontology.neuinfo.org/NIF/ttl/generated/'
    filename = 'swanson_hierarchies'
    ontid = ONT_PATH + filename + '.ttl'
    PREFIXES = makePrefixes('ilx', 'owl', 'OBOANN', 'UBERON')
    PREFIXES.update({
        '':ontid + '/',  # looking for better options
        'SWAN':'http://swanson.org/node/',
        'SWAA':'http://swanson.org/appendix/',
    })
    new_graph = makeGraph(filename, PREFIXES, writeloc='/tmp/parc/')
    new_graph.add_ont(ontid,
                      'Swanson brain partomies',
                      'Swanson 2014 Partonomies',
                      'This file is automatically generated from....',
                      TODAY)
            
    with open('resources/swanson_aligned.txt', 'rt') as f:
        lines = [l.strip() for l in f.readlines()]

    # join header on page 794
    lines[635] += ' ' + lines.pop(636)
    #fix for capitalization since this header is reused
    fixed = ' or '.join([' ('.join([n.capitalize() for n in _.split(' (')]) for _ in lines[635].lower().split(' or ')]).replace('human','HUMAN')
    lines[635] = fixed
    
    data = []
    for l in lines:
        if not l.startswith('#'):
            level = l.count('.'*5)
            l = l.strip('.')
            if ' (' in l:
                if ') or' in l:
                    n1, l = l.split(') or')
                    area_name, citationP =  n1.strip().split(' (')
                    citation = citationP.rstrip(')')
                    d = (level, area_name, citation, 'NEXT SYN')
                    data.append(d)
                    #print(tc.red(tc.bold(repr(d))))

                area_name, citationP =  l.strip().split(' (')
                citation = citationP.rstrip(')')
            else:
                area_name = l
                citation = None
            
            d = (level, area_name, citation, None)
            #print(d)
            data.append(d)
    results = async_getter(sgv.findByTerm, [(d[1],) for d in data])
    #results = [None] * len(data)
    curies = [[r['curie'] for r in _ if 'UBERON' in r['curie']] if _ else [] for _ in results]
    output = [_[0] if _ else None for _ in curies]

    header = ['Depth', 'Name', 'Citation', 'NextSyn', 'Uberon']
    zoop = [header] + [r for r in zip(*zip(*data), output)] + \
            [(0, 'Appendix END None', None, None, None)]  # needed to add last appendix

    class SP(rowParse):
        def __init__(self):
            self.nodes = defaultdict(dict)
            self._appendix = 0
            self.appendicies = {}
            self._last_at_level = {}
            self.names = defaultdict(set)
            self.children = defaultdict(set)
            self.parents = defaultdict(set)
            self.next_syn = False
            super().__init__(zoop)

        def Depth(self, value):
            if self.next_syn:
                self.synonym = self.next_syn
            else:
                self.synonym = False
            self.depth = value

        def Name(self, value):
            self.name = value

        def Citation(self, value):
            self.citation = value

        def NextSyn(self, value):
            if value:
                self.next_syn = self._rowind
            else:
                self.next_syn = False

        def Uberon(self, value):
            self.uberon = value

        def _row_post(self):
            # check if we are in the next appendix
            # may want to xref ids between appendicies as well...
            if self.depth == 0:
                if self.name.startswith('Appendix'):
                    if self._appendix:
                        self.appendicies[self._appendix]['children'] = dict(self.children)
                        self.appendicies[self._appendix]['parents'] = dict(self.parents)
                        self._last_at_level = {}
                        self.children = defaultdict(set)
                        self.parents = defaultdict(set)
                    _, num, apname = self.name.split(' ', 2)
                    if num == 'END':
                        return
                    self._appendix = int(num)
                    self.appendicies[self._appendix] = {
                        'name':apname.capitalize(),
                        'type':self.citation.capitalize() if self.citation else None}
                    return
                else:
                    if ' [' in self.name:
                        name, taxonB = self.name.split(' [')
                        self.name = name
                        self.appendicies[self._appendix]['taxon'] = taxonB.rstrip(']').capitalize()
                    else:  # top level is animalia
                        self.appendicies[self._appendix]['taxon'] = 'ANIMALIA'.capitalize()

                    self.name = self.name.capitalize()
                    self.citation = self.citation.capitalize()
            # nodes
            if self.synonym:
                self.nodes[self.synonym]['synonym'] = self.name
                self.nodes[self.synonym]['syn-cite'] = self.citation
                self.nodes[self.synonym]['syn-uberon'] = self.uberon
                return
            else:
                if self.citation:  # Transverse Longitudinal etc all @ lvl4
                    self.names[self.name + ' ' + self.citation].add(self._rowind)
                else:
                    self.name += str(self._appendix) + self.nodes[self._last_at_level[self.depth - 1]]['label']
                    #print(level, self.name)
                    # can't return here because they are their own level
                # replace with actually doing something...
                self.nodes[self._rowind]['label'] = self.name
                self.nodes[self._rowind]['citation'] = self.citation
                self.nodes[self._rowind]['uberon'] = self.uberon
            # edges
            self._last_at_level[self.depth] = self._rowind
            # TODO will need something to deal with the Lateral/
            if self.depth > 0:
                try:
                    parent = self._last_at_level[self.depth - 1]
                except:
                    embed()
                self.children[parent].add(self._rowind)
                self.parents[self._rowind].add(parent)

        def _end(self):
            replace = {}
            for asdf in [sorted(n) for k,n in self.names.items() if len(n) > 1]:
                replace_with, to_replace = asdf[0], asdf[1:]
                for r in to_replace:
                    replace[r] = replace_with

            for r, rw in replace.items():
                #print(self.nodes[rw])
                o = self.nodes.pop(r)
                #print(o)

            for vals in self.appendicies.values():
                children = vals['children']
                parents = vals['parents']
                # need reversed so children are corrected before swap
                for r, rw in reversed(sorted(replace.items())):
                    if r in parents:
                        child = r
                        new_child = rw
                        parent = parents.pop(child)
                        parents[new_child] = parent
                        parent = list(parent)[0]
                        children[parent].remove(child)
                        children[parent].add(new_child)
                    if r in children:
                        parent = r
                        new_parent = rw
                        childs = children.pop(parent)
                        children[new_parent] = childs
                        for child in childs:
                            parents[child] = {new_parent}

            self.nodes = dict(self.nodes)

    sp = SP()
    tp = [_ for _ in sorted(['{: <50}'.format(n['label']) + n['uberon'] if n['uberon'] else n['label'] for n in sp.nodes.values()])]
    #print('\n'.join(tp))
    #print(sp.appendicies[1].keys())
    #print(sp.nodes[1].keys())
    nbase = 'http://swanson.org/node/%s' 
    json_ = {'nodes':[],'edges':[]}
    for node, anns in sp.nodes.items():
        nid = nbase % node
        new_graph.add_class(nid, 'ilx:swansonBrainRegionConcept', label=anns['label'])
        new_graph.add_node(nid, 'OBOANN:definingCitation', anns['citation'])
        json_['nodes'].append({'lbl':anns['label'],'id':'SWA:' + str(node)})
        #if anns['uberon']:
            #new_graph.add_node(nid, rdflib.OWL.equivalentClass, anns['uberon'])  # issues arrise here...

    for appendix, data in sp.appendicies.items():
        aid = 'http://swanson.org/appendix/%s' % appendix
        new_graph.add_class(aid, label=data['name'].capitalize())
        new_graph.add_node(aid, 'ilx:hasTaxonRank', data['taxon'])  # FIXME appendix is the data artifact...
        children = data['children']
        ahp = HASPART + str(appendix)
        apo = PARTOF + str(appendix)
        new_graph.add_op(ahp, transitive=True)
        new_graph.add_op(apo, inverse=ahp, transitive=True)
        for parent, childs in children.items():  # FIXME does this give complete coverage?
            pid = nbase % parent
            for child in childs:
                cid = nbase % child
                new_graph.add_hierarchy(cid, ahp, pid)  # note hierarhcy inverts direction
                new_graph.add_hierarchy(pid, apo, cid)
                json_['edges'].append({'sub':'SWA:' + str(child),'pred':apo,'obj':'SWA:' + str(parent)})

    new_graph.write(convert=False)
    if False:
        Query = namedtuple('Query', ['root','relationshipType','direction','depth'])
        mapping = (1, 1, 1, 1, 30, 83, 69, 70, 74, 1)  # should generate?
        for i, n in enumerate(mapping):
            a, b = creatTree(*Query('SWA:' + str(n), 'ilx:partOf' + str(i + 1), 'INCOMING', 10), json=json_)
            print(a)
    return ontid, None
Пример #17
0
class HCP(genericPScheme):
    source = 'resources/human_connectome_project_2016.csv'
    ont = OntMeta(
        PARC, 'hcp_parcellation', ('Human Connectome Project Multi-Modal '
                                   'human cortical parcellation'),
        'HCP-MMP1.0',
        'This file is automatically generated from ' + source + '.' + NOTICE,
        TODAY)
    concept = PScheme(ILXREPLACE(ont.name), 'HCP parcellation concept',
                      'NCBITaxon:9606', ADULT)
    atlas = PSArtifact(
        ILXREPLACE(ont.name + 'atlas'),
        'Human Connectome Project Multi-Modal human cortical parcellation',
        '1.0',
        '20-07-2016',  # d-m-y
        'awaiting...',
        'doi:10.1038/nature18933',
        ('Human Connectome Project Multi-Modal Parcellation',
         'HCP Multi-Modal Parcellation',
         'Human Connectome Project Multi-Modal Parcellation version 1.0'),
        ('HCP_MMP', ont.shortname))
    # see also https://balsa.wustl.edu/study/show/RVVG
    PREFIXES = makePrefixes('NIFRID')
    PREFIXES['HCPMMP'] = interlex_namespace('hcpmmp/labels')

    @classmethod
    def datagetter(cls):
        with open(cls.source, 'rt') as f:
            data = [r for r in csv.reader(f)]
        return data

    @classmethod
    def dataproc(cls, graph, data):
        class hcp2016(rowParse):
            def Parcellation_Index(self, value):
                self.id_ = value
                self.id_ = 'HCPMMP:' + value  # safe because reset every row (ish)
                graph.add_class(self.id_, cls.concept.curie)

            def Area_Name(self, value):
                value = value.strip()
                graph.add_trip(self.id_, ACRONYM, value)

            def Area_Description(self, value):
                value = value.strip()
                graph.add_trip(self.id_, rdfs.label,
                               '(%s) ' % cls.ont.shortname + value)
                graph.add_trip(self.id_, PARCLAB, value)

            def Newly_Described(self, value):
                if value == 'Yes*' or value == 'Yes':
                    graph.add_trip(self.id_, 'NIFRID:definingCitation',
                                   'Glasser and Van Essen 2016')

            def Results_Sections(self, value):
                pass

            def Other_Names(self, value):
                for name in value.split(','):
                    name = name.strip()
                    if name:
                        if len(name) <= 3:
                            graph.add_trip(self.id_, ACRONYM, name)
                        else:
                            graph.add_trip(self.id_, SYNONYM, name)

            def Key_Studies(self, value):
                for study in value.split(','):
                    study = study.strip()
                    if study:
                        graph.add_trip(self.id_, 'NIFRID:definingCitation',
                                       study)

        hcp2016(data)
Пример #18
0
    kwargs = {
        'uberon_id':uid,
        'uberon_label':uberon_labs[uid],
        'aba_id':aid,
        'aba_label':abalabs[aid],
        'aba_syns':'\n'.join(sorted(abasyns[aid] + abaacro[aid])),
        'uberon_syns':'\n'.join(insert_uberon)
    }
    return to_format.format(**kwargs)

#text = '\n\n'.join([make_record(uid, aid[0]) for uid, aid in sorted(u_a_map.items()) if aid])

#with open('aba_uberon_syn_review.txt', 'wt') as f:
    #f.write(text)

print('total uberon terms checked:', len(uberon_labs))
print('total aba terms:           ', len(abalabs))
print('total uberon with aba xref:', len([a for a in u_a_map.values() if a]))

ubridge = createOntology('uberon-parcellation-mappings', 'Uberon Parcellation Mappings',
                         makePrefixes('owl', 'ilx', 'UBERON', 'MBA'))
for u, arefs in u_a_map.items():
    if arefs:
        # TODO check for bad assumptions here
        ubridge.add_trip(u, 'ilx:delineatedBy', arefs[0])
        ubridge.add_trip(arefs[0], 'ilx:delineates', u)

ubridge.write()
embed()

Пример #19
0
def chebi_make():
    PREFIXES = makePrefixes('definition', 'hasRole', 'BFO', 'CHEBI', 'owl',
                            'skos', 'oboInOwl')
    dPREFIXES = makePrefixes('CHEBI', 'replacedBy', 'owl', 'skos')
    ug = makeGraph('utilgraph', prefixes=PREFIXES)

    IDS_FILE = 'resources/chebi-subset-ids.txt'
    with open(IDS_FILE, 'rt') as f:
        ids_raw = set((_.strip() for _ in f.readlines()))
        ids = set((ug.expand(_.strip()).toPython() for _ in ids_raw))

    #gzed = requests.get('http://localhost:8000/chebi.owl')
    #raw = BytesIO(gzed.content)
    gzed = requests.get(
        'http://ftp.ebi.ac.uk/pub/databases/chebi/ontology/nightly/chebi.owl.gz'
    )
    raw = BytesIO(gzip.decompress(gzed.content))
    t = etree.parse(raw)
    r = t.getroot()
    cs = r.getchildren()
    classes = [
        _ for _ in cs if _.tag == '{http://www.w3.org/2002/07/owl#}Class'
        and _.values()[0] in ids
    ]
    ontology = t.xpath("/*[local-name()='RDF']/*[local-name()='Ontology']")
    ops = t.xpath(
        "/*[local-name()='RDF']/*[local-name()='ObjectProperty']")  # TODO
    wanted = [etree.ElementTree(_) for _ in classes]
    rpl_check = t.xpath(
        "/*[local-name()='RDF']/*[local-name()='Class']/*[local-name()='hasAlternativeId']"
    )
    rpl_dict = {
        _.text: _.getparent()
        for _ in rpl_check if _.text in ids_raw
    }  # we also need to have any new classes that have replaced old ids
    also_classes = list(rpl_dict.values())

    def rec(start_set, done):
        ids_ = set()
        for c in start_set:
            ids_.update([
                _.items()[0][1] for _ in etree.ElementTree(c).xpath(
                    "/*[local-name()='Class']/*[local-name()='subClassOf']")
                if _.items()
            ])
            ids_.update([
                _.items()[0][1] for _ in etree.ElementTree(c).xpath(
                    "/*[local-name()='Class']/*[local-name()='subClassOf']/*[local-name()='Restriction']/*[local-name()='someValuesFrom']"
                ) if _.items()
            ])
        supers = [
            _ for _ in cs if _.tag == '{http://www.w3.org/2002/07/owl#}Class'
            and _.values()[0] in ids_ and _ not in done
        ]
        if supers:
            msup, mids = rec(supers, done + supers)
            supers += msup
            ids_.update(mids)
        return supers, ids_

    a = ontology + ops + classes + also_classes
    more, mids = rec(a, a)
    all_ = set(a + more)
    r.clear()  # wipe all the stuff we don't need
    for c in all_:
        r.append(c)
    data = etree.tostring(r)

    g = rdflib.Graph()
    g.parse(
        data=data
    )  # now _this_ is stupidly slow (like 20 minutes of slow) might make more sense to do the xml directly?

    src_version = list(
        g.query(
            'SELECT DISTINCT ?match WHERE { ?temp rdf:type owl:Ontology . ?temp owl:versionIRI ?match . }'
        ))[0][0]

    new_graph = createOntology(
        'chebislim',
        'NIF ChEBI slim',
        PREFIXES,
        'chebislim',
        'This file is generated by pyontutils/slimgen from the full ChEBI nightly at versionIRI %s based on the list of terms in %s.'
        % (src_version, IDS_FILE),
        remote_base='http://ontology.neuinfo.org/NIF/')

    chebi_dead = createOntology(
        'chebi-dead',
        'NIF ChEBI deprecated',
        dPREFIXES,
        'chebidead',
        'This file is generated by pyontutils/slimgen to make deprecated classes resolvablefrom the full ChEBI nightly at versionIRI %s based on the list of terms in %s.'
        % (src_version, IDS_FILE),
        remote_base='http://ontology.neuinfo.org/NIF/')

    depwor = {
        'CHEBI:33243': 'natural product',  # FIXME remove these?
        'CHEBI:36809': 'tricyclic antidepressant',
    }

    for id_ in sorted(
            set(ids_raw) | set((ug.g.namespace_manager.qname(_)
                                for _ in mids))):
        eid = ug.expand(id_)
        trips = list(g.triples((eid, None, None)))
        if not trips:
            #looks for the id_ as a literal
            alts = list(
                g.triples((
                    None,
                    rdflib.term.URIRef(
                        'http://www.geneontology.org/formats/oboInOwl#hasAlternativeId'
                    ),
                    rdflib.Literal(
                        id_,
                        datatype=rdflib.term.URIRef(
                            'http://www.w3.org/2001/XMLSchema#string')))))
            if alts:
                replaced_by, _, __ = alts[0]
                if replaced_by.toPython(
                ) not in ids:  #  we need to add any replacment classes to the bridge
                    print('REPLACED BY NEW CLASS', id_)
                    for t in g.triples((replaced_by, None, None)):
                        new_graph.add_recursive(t, g)
                chebi_dead.add_class(id_)
                chebi_dead.add_node(id_, 'replacedBy:', replaced_by)
                chebi_dead.add_node(id_, rdflib.OWL.deprecated, True)
            else:
                if id_ not in depwor:
                    raise BaseException('wtf error', id_)
        else:
            for trip in trips:
                new_graph.add_recursive(trip, g)

    # https://github.com/ebi-chebi/ChEBI/issues/3294
    madness = new_graph.expand('oboInOwl:hasRelatedSynonym'), rdflib.Literal(
        '0', datatype=rdflib.namespace.XSD.string)
    for a in new_graph.g.subjects(*madness):
        new_graph.g.remove((a, ) + madness)

    new_graph.write()
    chebi_dead.write()
    embed()
Пример #20
0
from hierarchies import creatTree, flatten
from parcellation import OntMeta

sgg = Graph(cache=True, basePath='http://localhost:9000/scigraph')
sgv = Vocabulary(cache=True, basePath='http://localhost:9000/scigraph')

Query = namedtuple('Query', ['root', 'relationshipType', 'direction', 'depth'])

CON = 'http://www.geneontology.org/formats/oboInOwl#consider'
DBX = 'http://www.geneontology.org/formats/oboInOwl#hasDbXref'  #FIXME also behaves as objectProperty :/
AID = 'http://www.geneontology.org/formats/oboInOwl#hasAlternativeId'
IRBC = 'http://ontology.neuinfo.org/NIF/Backend/BIRNLex_annotation_properties.owl#isReplacedByClass'

PREFIXES = makePrefixes(
    'UBERON',
    'ro',
    'owl',
    'skos',
)
NIFPREFIXES = makePrefixes(
    'NIFGA',
    'oboInOwl',
    'replacedBy',
)

NIFPREFIXES.update(PREFIXES)

nifga_path = os.path.expanduser('~/git/NIF-Ontology/ttl/NIF-GrossAnatomy.ttl')
uberon_path = os.path.expanduser('~/git/NIF-Ontology/ttl/external/uberon.owl')
uberon_bridge_path = 'http://berkeleybop.org/ontologies/uberon/bridge/uberon-bridge-to-nifstd.owl'
#bridge_path = os.path.expanduser('~/git/NIF-Ontology/ttl/uberon-bridge-to-nifstd.ttl')  # scigraph's got us
Пример #21
0
    kwargs = {
        'uberon_id':uid,
        'uberon_label':uberon_labs[uid],
        'aba_id':aid,
        'aba_label':abalabs[aid],
        'aba_syns':'\n'.join(sorted(abasyns[aid] + abaacro[aid])),
        'uberon_syns':'\n'.join(insert_uberon)
    }
    return to_format.format(**kwargs)

text = '\n\n'.join([make_record(uid, aid[0]) for uid, aid in sorted(u_a_map.items()) if aid])

with open('aba_uberon_syn_review.txt', 'wt') as f:
    f.write(text)

print('total uberon terms checked:', len(uberon_labs))
print('total aba terms:           ', len(abalabs))
print('total uberon with aba xref:', len([a for a in u_a_map.values() if a]))

ubridge = makeGraph('uberon-parcellation-mappings',prefixes=makePrefixes('ilx', 'UBERON', 'MBA'))
for u, arefs in u_a_map.items():
    if arefs:
        # TODO check for bad assumptions here
        ubridge.add_node(u, 'ilx:delineatedBy', arefs[0])
        ubridge.add_node(arefs[0], 'ilx:delineates', u)

ubridge.write()
embed()

Пример #22
0
def chebi_imp():
    PREFIXES = makePrefixes('definition',
                            'hasRole',
                            'CHEBI',
                            'owl',
                            'skos',
                            'oboInOwl')
    ug = makeGraph('utilgraph', prefixes=PREFIXES)
    with open('chebi-subset-ids.txt', 'rt') as f:
        ids_raw = set((_.strip() for _ in f.readlines()))
        ids = sorted(set((ug.expand(_.strip()) for _ in ids_raw)))

    def check_chebis(g):
        a = []
        for id_ in ids:
            l = sorted(g.triples((id_, None, None)))
            ll = len(l)
            a.append(ll)
        return a

    g = rdflib.Graph()
    cg = rdflib.Graph()
    chemg = rdflib.Graph()
    molg = rdflib.Graph()
    g.parse('/home/tom/git/NIF-Ontology/ttl/generated/chebislim.ttl', format='turtle')
    cg.parse('/home/tom/git/NIF-Ontology/ttl/generated/chebislim.ttl', format='turtle')
    a1 = check_chebis(g)
    g.parse('/home/tom/git/NIF-Ontology/ttl/generated/chebi-dead.ttl', format='turtle')
    a2 = check_chebis(g)
    g.parse('/home/tom/git/NIF-Ontology/ttl/NIF-Chemical.ttl', format='turtle')
    chemg.parse('/home/tom/git/NIF-Ontology/ttl/NIF-Chemical.ttl', format='turtle')
    a3 = check_chebis(g)
    g.parse('/home/tom/git/NIF-Ontology/ttl/NIF-Molecule.ttl', format='turtle')
    molg.parse('/home/tom/git/NIF-Ontology/ttl/NIF-Molecule.ttl', format='turtle')
    a4 = check_chebis(g)
    matches = [_ for _ in zip(a1, a2, a3, a4)]
    changed = [len(set(_)) != 1 for _ in matches] 
    review = [(id_, m) for id_, changed, m in zip(ids, changed, matches) if changed and m[0]]
    # for reasons currently lost to implementation details this returns a list of empty lists if run from ipython
    wat_c = [set([(s, str(o.toPython())) for s, p, o in cg.triples((u, None, None))]) for u, _ in review]
    wat_a = [set([(s, str(o.toPython())) for s, p, o in g.triples((u, None, None))]) for u, _ in review]
    wat_c_ = [set(cg.triples((u, None, None))) for u, _ in review]  # for reasons currently lost to implementation details this returns a list of empty lists if run from ipython
    wat_a_ = [set(g.triples((u, None, None))) for u, _ in review]  # for reasons currently lost to implementation details this returns a list of empty lists if run from ipython
    diff = [a - c for a, c in zip(wat_a, wat_c)]
    diff_ = [a - c for a, c in zip(wat_a_, wat_c_)]

    cb = makeGraph('chebi-bridge', makePrefixes('CHEBI',
                                                'owl',
                                                'skos',
                                                'dc',
                                                'hasRole',
                                                'NIFCHEM',
                                                'NIFMOL',
                                                'OBOANN',
                                                'BIRNANN'))
    out = []
    for set_ in diff:
        for sub, string in sorted(set_):
            for t in g.triples((sub, None, None)):
                py = t[-1].toPython()
                if py == string and not py.startswith('ub'):  # ignore restrictions... this is safe because nifmol and nifchem dont have any restrictions...
                    cb.add_recursive(t, g)
        cb.add_class(sub)  # only need to go at the end because sub is the same for each set

    cb.write()  # re-add only the missing edges so that we can zap them from NIF-Molecule and NIF-Chemical (recurse is needed...)
    embed()
Пример #23
0
def chebi_make():
    PREFIXES = makePrefixes('definition',
                            'hasRole',
                            'CHEBI',
                            'owl',
                            'skos',
                            'oboInOwl')
    dPREFIXES = makePrefixes('CHEBI','replacedBy','owl','skos')
    ug = makeGraph('utilgraph', prefixes=PREFIXES)

    IDS_FILE = 'chebi-subset-ids.txt'
    with open(IDS_FILE, 'rt') as f:
        ids_raw = set((_.strip() for _ in f.readlines()))
        ids = set((ug.expand(_.strip()).toPython() for _ in ids_raw))

    #gzed = requests.get('http://localhost:8000/chebi.owl')
    #raw = BytesIO(gzed.content)
    gzed = requests.get('http://ftp.ebi.ac.uk/pub/databases/chebi/ontology/nightly/chebi.owl.gz')
    raw = BytesIO(gzip.decompress(gzed.content))
    t = etree.parse(raw)
    r = t.getroot()
    cs = r.getchildren()
    classes = [_ for _ in cs if _.tag == '{http://www.w3.org/2002/07/owl#}Class' and _.values()[0] in ids]
    ontology = t.xpath("/*[local-name()='RDF']/*[local-name()='Ontology']")
    ops = t.xpath("/*[local-name()='RDF']/*[local-name()='ObjectProperty']")  # TODO
    wanted = [etree.ElementTree(_) for _ in classes]
    rpl_check = t.xpath("/*[local-name()='RDF']/*[local-name()='Class']/*[local-name()='hasAlternativeId']")
    rpl_dict = {_.text:_.getparent() for _ in rpl_check if _.text in ids_raw } # we also need to have any new classes that have replaced old ids
    also_classes = list(rpl_dict.values())
    def rec(start_set, done):
        ids_ = set()
        for c in start_set:
            ids_.update([_.items()[0][1] for _ in etree.ElementTree(c).xpath("/*[local-name()='Class']/*[local-name()='subClassOf']") if _.items()])
            ids_.update([_.items()[0][1] for _ in etree.ElementTree(c).xpath("/*[local-name()='Class']/*[local-name()='subClassOf']/*[local-name()='Restriction']/*[local-name()='someValuesFrom']") if _.items()])
        supers = [_ for _ in cs if _.tag == '{http://www.w3.org/2002/07/owl#}Class' and _.values()[0] in ids_ and _ not in done]
        if supers:
            msup, mids = rec(supers, done + supers)
            supers += msup
            ids_.update(mids)
        return supers, ids_
    a = ontology + ops + classes + also_classes
    more, mids = rec(a, a)
    all_ = set(a + more)
    r.clear()  # wipe all the stuff we don't need
    for c in all_:
        r.append(c)
    data = etree.tostring(r)

    g = rdflib.Graph()
    g.parse(data=data)  # now _this_ is stupidly slow (like 20 minutes of slow) might make more sense to do the xml directly?

    src_version = list(g.query('SELECT DISTINCT ?match WHERE { ?temp rdf:type owl:Ontology . ?temp owl:versionIRI ?match . }'))[0][0]

    ont = OntMeta('http://ontology.neuinfo.org/NIF/ttl/generated/',
                  'chebislim',
                  'NIF ChEBI slim',
                  'chebislim',
                  'This file is generated by pyontutils/slimgen from the full ChEBI nightly at versionIRI %s based on the list of terms in %s.' % (src_version, IDS_FILE),
                  TODAY)
    dont = OntMeta('http://ontology.neuinfo.org/NIF/ttl/generated/',
                  'chebi-dead',
                  'NIF ChEBI deprecated',
                  'chebidead',
                  'This file is generated by pyontutils/slimgen to make deprecated classes resolvablefrom the full ChEBI nightly at versionIRI %s based on the list of terms in %s.' % (src_version, IDS_FILE),
                  TODAY)

    new_graph = makeGraph(ont.filename, PREFIXES)
    ontid = ont.path + ont.filename + '.ttl'
    new_graph.add_ont(ontid, *ont[2:])
    chebi_dead = makeGraph(dont.filename, dPREFIXES)
    dontid = dont.path + dont.filename + '.ttl'
    chebi_dead.add_ont(dontid, *dont[2:])

    depwor = {'CHEBI:33243':'natural product',  # FIXME remove these?
              'CHEBI:36809':'tricyclic antidepressant',
             }

    for id_ in sorted(set(ids_raw) | set((ug.g.namespace_manager.qname(_) for _ in mids))):
        eid = ug.expand(id_)
        trips = list(g.triples((eid, None, None)))
        if not trips:
            #looks for the id_ as a literal
            alts = list(g.triples((None,
                                             rdflib.term.URIRef('http://www.geneontology.org/formats/oboInOwl#hasAlternativeId'),
                                             rdflib.Literal(id_, datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')))))
            if alts:
                replaced_by, _, __ = alts[0]
                if replaced_by.toPython() not in ids:  #  we need to add any replacment classes to the bridge
                    print('REPLACED BY NEW CLASS', id_)
                    for t in g.triples((replaced_by, None, None)):
                        new_graph.add_recursive(t, g)
                chebi_dead.add_class(id_)
                chebi_dead.add_node(id_, 'replacedBy:', replaced_by)
                chebi_dead.add_node(id_, rdflib.OWL.deprecated, True)
            else:
                if id_ not in depwor:
                    raise BaseException('wtf error', id_)
        else:
            for trip in trips:
                new_graph.add_recursive(trip, g)

    new_graph.write()
    chebi_dead.write()
    embed()
Пример #24
0
#!/usr/bin/env python3.5

import rdflib
from utils import makePrefixes, makeGraph

PREFIXES = makePrefixes('NIFGA', 'NIFSTD', 'owl')

g = rdflib.Graph()
g.parse(
    'http://purl.obolibrary.org/obo/uberon/bridge/uberon-bridge-to-nifstd.owl',
    format='xml')
name = 'NIFGA-Equivs'
ng = makeGraph(name, PREFIXES)
[
    ng.g.add(t)
    for t in ((rdflib.URIRef(PREFIXES['NIFGA'] + o.rsplit('/', 1)[-1]), p, o)
              for s, p, o in g.triples((None, rdflib.OWL.equivalentClass,
                                        None)))
]
ng.add_ont('http://ontology.neuinfo.org/NIF/ttl/generated/' + name + '.ttl',
           'NIFGA to NIFSTD mappings')
ng.write()
Пример #25
0
#!/usr/bin/env python3.5

import os
from glob import glob
from rdflib.namespace import SKOS
from parcellation import OntMeta, TODAY
from utils import makeGraph, makePrefixes

PREFIXES = makePrefixes(
    "SCR", "MBA", "NIFMOL", "NIFNEURON", "NIFCELL", "NIFGA", "UBERON", "PR", "NIFNEURMOR", "skos", "owl"
)

ont = OntMeta(
    "http://ontology.neuinfo.org/NIF/ttl/generated/",
    "ksdesc-defs",
    "Knolwedge Space Defs",
    "KSDEFS",
    "Definitions from knowledge space descriptions. Generated by pyontutils/ksdesc_bridge.py",
    TODAY,
)

ontid = ont.path + ont.filename + ".ttl"
g = makeGraph(ont.filename, prefixes=PREFIXES)
g.add_ont(ontid, *ont[2:])

top_level = glob(os.path.expanduser("~/git/ksdesc/") + "*")

for putative_dir in top_level:
    if os.path.isdir(putative_dir):
        for putative_md in glob(putative_dir + "/*.md"):
            ident = os.path.split(putative_dir)[-1] + ":" + os.path.splitext(os.path.split(putative_md)[-1])[0]
Пример #26
0
#!/usr/bin/env python3.5

import rdflib
from utils import makePrefixes, makeGraph

PREFIXES = makePrefixes('NIFGA', 'NIFSTD', 'owl')

g = rdflib.Graph()
g.parse('http://purl.obolibrary.org/obo/uberon/bridge/uberon-bridge-to-nifstd.owl', format='xml')
name = 'NIFGA-Equivs'
ng = makeGraph(name, PREFIXES)
[ng.g.add(t) for t in ((rdflib.URIRef(PREFIXES['NIFGA'] + o.rsplit('/',1)[-1]), p, o) for s, p, o in g.triples((None, rdflib.OWL.equivalentClass, None)))]
ng.add_ont('http://ontology.neuinfo.org/NIF/ttl/generated/' + name + '.ttl', 'NIFGA to NIFSTD mappings')
ng.write()
Пример #27
0
        'uberon_id': uid,
        'uberon_label': uberon_labs[uid],
        'aba_id': aid,
        'aba_label': abalabs[aid],
        'aba_syns': '\n'.join(sorted(abasyns[aid] + abaacro[aid])),
        'uberon_syns': '\n'.join(insert_uberon)
    }
    return to_format.format(**kwargs)


text = '\n\n'.join(
    [make_record(uid, aid[0]) for uid, aid in sorted(u_a_map.items()) if aid])

with open('aba_uberon_syn_review.txt', 'wt') as f:
    f.write(text)

print('total uberon terms checked:', len(uberon_labs))
print('total aba terms:           ', len(abalabs))
print('total uberon with aba xref:', len([a for a in u_a_map.values() if a]))

ubridge = makeGraph('uberon-parcellation-mappings',
                    prefixes=makePrefixes('ilx', 'UBERON', 'MBA'))
for u, arefs in u_a_map.items():
    if arefs:
        # TODO check for bad assumptions here
        ubridge.add_node(u, 'ilx:delineatedBy', arefs[0])
        ubridge.add_node(arefs[0], 'ilx:delineates', u)

ubridge.write()
embed()
Пример #28
0
expression_edge = 'ilx:hasExpressionPhenotype'
expression_defined = 'ilx:ExpressionClassifiedNeuron'
NIFCELL_NEURON = 'NIFCELL:sao1417703748'

syntax = '{region}{layer_or_subregion}{expression}{ephys}{molecular}{morph}{cellOrNeuron}'
ilx_base = 'ILX:{:0>7}'

PREFIXES = makePrefixes(
    'ilx',
    'ILX',
    'ILXREPLACE',
    'skos',
    'owl',
    'dc',
    'nsu',
    'NCBITaxon',
    'oboInOwl',
    'NIFRID',
    'NIFQUAL',
    'NIFCELL',
    'NIFMOL',
    'UBERON',
    'PR',
)


def replace_object(find, replace,
                   graph):  # note that this is not a sed 's/find/replace/g'
    find = graph.expand(find)
    for s, p, o in graph.g.triples((None, None, find)):
        graph.add_trip(s, p, replace)
Пример #29
0
#!/usr/bin/env python3.6

import os
from glob import glob
from rdflib.namespace import SKOS
from parcellation import OntMeta
from utils import TODAY, makeGraph, makePrefixes

PREFIXES = makePrefixes('SCR', 'MBA', 'NIFMOL', 'NIFNEURON', 'NIFCELL',
                        'NIFGA', 'UBERON', 'PR', 'NIFNEURMOR', 'skos', 'owl')

ont = OntMeta(
    'http://ontology.neuinfo.org/NIF/ttl/generated/', 'ksdesc-defs',
    'Knolwedge Space Defs', 'KSDEFS',
    'Definitions from knowledge space descriptions. Generated by pyontutils/ksdesc_bridge.py',
    TODAY)

ontid = ont.path + ont.filename + '.ttl'
g = makeGraph(ont.filename, prefixes=PREFIXES)
g.add_ont(ontid, *ont[2:])

top_level = glob(os.path.expanduser('~/git/ksdesc/') + '*')

for putative_dir in top_level:
    if os.path.isdir(putative_dir):
        for putative_md in glob(putative_dir + '/*.md'):
            ident = os.path.split(putative_dir)[-1] + ':' + os.path.splitext(
                os.path.split(putative_md)[-1])[0]
            print(ident)
            with open(putative_md, 'rt') as f:
                def_ = f.read()
Пример #30
0
from hierarchies import creatTree, flatten
from parcellation import OntMeta, TODAY

sgg = Graph(cache=True, basePath='http://localhost:9000/scigraph')
sgv = Vocabulary(cache=True, basePath='http://localhost:9000/scigraph')

Query = namedtuple('Query', ['root','relationshipType','direction','depth'])

CON = 'http://www.geneontology.org/formats/oboInOwl#consider'
DBX = 'http://www.geneontology.org/formats/oboInOwl#hasDbXref'  #FIXME also behaves as objectProperty :/
AID =  'http://www.geneontology.org/formats/oboInOwl#hasAlternativeId'
IRBC = 'http://ontology.neuinfo.org/NIF/Backend/BIRNLex_annotation_properties.owl#isReplacedByClass'

PREFIXES = makePrefixes('UBERON',
                        'ro',
                        'owl',
                        'skos',
                       )
NIFPREFIXES = makePrefixes('NIFGA',
                           'oboInOwl',
                           'replacedBy',
                          )

NIFPREFIXES.update(PREFIXES)

nifga_path = os.path.expanduser('~/git/NIF-Ontology/ttl/NIF-GrossAnatomy.ttl')
uberon_path = os.path.expanduser('~/git/NIF-Ontology/ttl/external/uberon.owl')
uberon_bridge_path = 'http://berkeleybop.org/ontologies/uberon/bridge/uberon-bridge-to-nifstd.owl'
#bridge_path = os.path.expanduser('~/git/NIF-Ontology/ttl/uberon-bridge-to-nifstd.ttl')  # scigraph's got us

#uberon_obsolete = {'UBERON:0022988',  # obsolete regional part of thalamaus