Пример #1
0
def main():

    #InterLexSneechenator()
    test()

    return
    # testing
    index_graph.bind('ILX', ILX)
    #[index_graph.add((npokb[str(i)], rdf.type, owl.Class)) for i in range(1, 11)]
    #[index_graph.add((npokb[str(i)], ilxtr.hasTemporaryId, TEMP[str(i)])) for i in range(1, 11)]

    ios = []
    for eff in ('phenotype-core.ttl', 'phenotypes.ttl'):
        path = auth.get_path('ontology-local-repo') / eff
        input_graph = OntGraph(path=path)
        input_graph.parse()
        output_graph = input_graph.mapTempToIndex(index_graph, ILX, ilxtr)
        ios.append((input_graph, output_graph))

    input_graph, output_graph = ios[0]
    a, r, c = output_graph.subjectsChanged(input_graph)
    index_graph.write()
    # [o.write() for i, o, in ios]  # when ready
    #from sparcur.paths import Path
    #Path(index_graph.path).xopen()
    breakpoint()
Пример #2
0
class TestOntGraph(unittest.TestCase):
    ts1 = ((ilxtr.a, ilxtr.b, ilxtr.c), )
    ts2 = ((ilxtr.a, ilxtr.b, ilxtr.d), )

    def populate(self, graph, triples):
        [graph.add(t) for t in triples]

    def setUp(self):
        self.graph1 = OntGraph()
        self.graph2 = OntGraph()

    def test_subjectsChanged(self):
        self.populate(self.graph1, self.ts1)
        self.populate(self.graph2, self.ts2)
        d = a, r, c = self.graph1.subjectsChanged(self.graph2)
        assert not a, d
        assert not a, d
        assert c, d

    def test_not_subjectsChanged(self):
        self.populate(self.graph1, self.ts1)
        self.populate(self.graph2, self.ts1)
        d = a, r, c = self.graph1.subjectsChanged(self.graph2)
        assert not a, d
        assert not r, d
        assert not c, d
Пример #3
0
def load_header(filepath, remote=False):
    oo = b'owl:Ontology'
    path = Path(filepath)
    if path.suffix == '.ttl':
        infmt = 'turtle'
    else:
        infmt = 'xml'  # FIXME assumption

    if remote:
        resp = requests.get(
            filepath
        )  # TODO nonblocking pull these out, fetch, run inner again until done
        raw = resp.text.encode()
    else:
        with open(filepath, 'rb') as f:  # do not catch FileNotFoundErrors
            raw = f.read()

    if oo in raw:  # we only care if there are imports or an ontology iri
        scratch = OntGraph()
        if infmt == 'turtle':
            data, rest = raw.split(b'###', 1)
        elif infmt == None:  # assume xml
            xml_tree = etree.parse(BytesIO(raw))
            xml_root = xml_tree.getroot()
            xml_ontology = xml_tree.xpath(
                "/*[local-name()='RDF']/*[local-name()='Ontology']")
            xml_root.clear()
            xml_root.append(xml_ontology[0])
            data = etree.tostring(xml_root)
        scratch.parse(data=data, format=infmt)

    return scratch
Пример #4
0
def _populate_published(curation_export, graphs):

    # datasets = [list(g[:rdf.type:sparc.Dataset]) for g in graphs]
    published_graphs = [
        g for g, doi in [(g, list(g[ds:TEMP.hasDoi])) for g in graphs
                         for ds in g[:rdf.type:sparc.Dataset]] if doi
    ]

    merged = OntGraph()
    for g in published_graphs:
        merged.namespace_manager.populate_from({
            k: v
            for k, v in dict(g.namespace_manager).items()
            if k not in ('contributor', 'sample', 'subject')
        })
        merged.populate_from_triples(
            g.data)  # g.data excludes the owl:Ontology section
        # TODO switch the rdf:type of metadata section on combination to preserve export related metadata

    mg = curation_export.metadata().graph
    mg.namespace_manager.populate(merged)

    new_bi = rdflib.URIRef(
        mg.boundIdentifier.replace('ontologies/', 'ontologies/published/'))
    new_vi = rdflib.URIRef(
        mg.versionIdentifier.replace('ontologies/', 'ontologies/published/'))
    replace_pairs = (
        (rdflib.Literal("SPARC Consortium curation export published graph"),
         rdflib.Literal("SPARC Consortium curation export graph")),
        (new_bi, mg.boundIdentifier), (new_vi, mg.versionIdentifier))

    new_meta = mg.replaceIdentifiers(replace_pairs)
    new_meta.populate(merged)

    return merged
Пример #5
0
def main(g=None, ce_g=None, protcur_export_path=None, curation_export_path=None):

    if g is None:
        if not protcur_export_path:
            ori = OntResIri('https://cassava.ucsd.edu/sparc/preview/exports/protcur.ttl')
            g = ori.graph
        else:
            g = OntGraph().parse(protcur_export_path)

    pids = list(g[:rdf.type:sparc.Protocol])

    if ce_g is None:
        if not curation_export_path:
            ce_ori = OntResIri('https://cassava.ucsd.edu/sparc/preview/exports/curation-export.ttl')
            ce_g = ce_ori.graph
        else:
            ce_g = OntGraph().parse(curation_export_path)

    ce_pids = list(ce_g[:rdf.type:sparc.Protocol])
    ap = [(p, d, list(ce_g[d:TEMP.hasDoi:]))
          for p in ce_pids for d in ce_g[:TEMP.hasProtocol:p]
          if list(ce_g[d:TEMP.hasDoi:])]
    with_published_dataset = {p:dois[0] for p, d, dois in ap}
    graphs = make_graphs(g, pids, with_published_dataset)
    write_graphs(graphs, path=None)
Пример #6
0
 def test_part_of(self):
     eeeee = self.OntTerm('UBERON:0008933',
                          label='primary somatosensory cortex')
     g = OntGraph()
     [g.add(t) for t in eeeee.triples_simple]
     g.debug()
     po = [t for t in eeeee.triples_simple if partOf in t]
     assert po, 'sadness'
Пример #7
0
def make_graphs(g, pids, published):
    sgs = []
    for i in pids:
        ng = OntGraph()
        ng.namespace_manager.populate_from(g)
        ng.namespace_manager.bind(
            'spjl', 'https://uilx.org/tgbugs/u/sparcur-protcur-json-ld/')
        ng.populate_from_triples(tobn(g.subjectGraphClosure(i), published))
        sgs.append(ng)
    return sgs
Пример #8
0
def normalize_prefixes(graph, curies):
    new_graph = OntGraph()
    oc = OntCuries.new()
    curies.pop('', None)
    curies['rdf'] = str(rdf)
    curies['rdfs'] = str(rdfs)
    oc(curies)
    oc.populate(new_graph)
    [new_graph.add(t) for t in graph]
    return new_graph
Пример #9
0
 def test_new_index(self):
     rp = temp_path / 'sneechenator'
     wrangler = snch.SneechWrangler(rp)
     path_index = wrangler.new_index('uri.interlex.org')
     assert path_index.exists(), 'wat'
     g = OntGraph(path=path_index).parse()
     try:
         next(g[:rdf.type:snch.snchn.IndexGraph])
     except StopIteration:
         assert False, g.debug()
Пример #10
0
    def populate(self, graph=None):
        """ Populate a graph, or if no graph is provided
            populate a new empty graph from the current
            content. (Also useful for debug) """

        if graph is None:
            graph = OntGraph()

        [graph.add(t) for t in self.triples]
        OntCuries.populate(graph)
        return graph
Пример #11
0
    def make_import_chain(self, ontology='nif.ttl'):
        itrips = self.get_itrips()
        if not any(ontology in t[0] for t in itrips):
            return None, None

        ontologies = ontology,  # hack around bad code in ontload
        import_graph = OntGraph()
        [import_graph.add(t) for t in itrips]

        self.tree, self.extra = next(import_tree(import_graph, ontologies))
        return self.tree, self.extra
Пример #12
0
    def triples(self):
        for blob in self.data['identifier_metadata']:
            id = blob['id']
            if not isinstance(id, idlib.Stream):
                id = idlib.Auto(id)

            s = id.asType(rdflib.URIRef)
            if 'source' in blob:
                source = blob[
                    'source']  # FIXME we need to wrap this in our normalized representation
                if source == 'Crossref':  # FIXME CrossrefConvertor etc. OR put it in idlib as a an alternate ttl
                    pos = (
                        (rdf.type, owl.NamedIndividual),
                        (rdf.type, TEMP[blob['type']]),
                        (dc.publisher, blob['publisher']),
                        #(dc.type, blob['type']),  # FIXME semantify
                        (dc.title, blob['title']),
                        (dc.date,
                         self.published_online(blob)),  # FIXME .... dangerzone
                    )
                    g = OntGraph()
                    doi = idlib.Doi(id) if not isinstance(
                        id, idlib.Doi
                    ) else id  # FIXME idlib streams need to recognize their own type in __new__
                    g.parse(data=doi.ttl(), format='ttl')  # FIXME network bad
                    _their_record_s = [
                        s for s, p, o in g if p == rdflib.term.URIRef(
                            'http://prismstandard.org/namespaces/basic/2.1/doi'
                        )
                    ][0]
                    yield s, owl.sameAs, _their_record_s
                    yield from g
                else:
                    msg = f'dont know what to do with {source}'
                    log.error(msg)
                    #raise NotImplementedError(msg)
                    return
            else:
                msg = f'dont know what to do with {blob} for {id.identifier}'
                log.error(msg)
                #raise NotImplementedError(msg)
                return

            for p, oraw in pos:
                if oraw is not None:
                    o = rdflib.Literal(oraw) if not isinstance(
                        oraw, rdflib.URIRef) else oraw
                    yield s, p, o
Пример #13
0
    def export_rdf(self, dump_path, latest_path, dataset_blobs):
        dataset_dump_path = dump_path / 'datasets'
        dataset_dump_path.mkdir()
        suffix = '.ttl'
        mode = 'wb'

        teds = []
        for dataset_blob in dataset_blobs:
            filename = dataset_blob['id']
            filepath = dataset_dump_path / filename
            filepsuf = filepath.with_suffix(suffix)
            lfilepath = self.latest_datasets_path / filename
            lfilepath = latest_path / filename
            lfilepsuf = lfilepath.with_suffix(suffix)

            ted = ex.TriplesExportDataset(dataset_blob)
            teds.append(ted)

            if self.latest and lfilepsuf.exists():
                filepsuf.copy_from(lfilepsuf)
                graph = OntGraph(path=lfilepsuf).parse()
                ted._graph = graph
            else:
                ted.graph.write(filepsuf)  # yay OntGraph defaults

            loge.info(f'dataset graph exported to {filepsuf}')

        return teds
Пример #14
0
    def fromYaml(cls, in_path):
        in_path = aug.RepoPath(in_path).resolve()

        with open(in_path, 'rt') as f:
            blob = yaml.safe_load(f)

        if 'include' in blob:
            orgs = [
                OntResGit(path=aug.RepoPath(subblob['path']),
                          ref=subblob['ref']) for subblob in blob['include']
            ]
        else:
            orgs = [
                OntResGit(path=aug.RepoPath(subblob['path']),
                          ref=subblob['ref']) for subblob in blob['paths']
            ]

        if not orgs:
            raise ValueError(f'orgs is epty for {in_path}')

        referenceIndex = blob['referenceIndex']
        namespaces = blob['namespaces']
        if isinstance(namespaces, str):
            namespaces = namespaces.split(' ')

        snchf = cls(orgs=orgs,
                    namespaces=namespaces,
                    referenceIndex=referenceIndex)
        return cls(graph=snchf.populate(OntGraph()))
Пример #15
0
 def build_instances(self, instances, dids):
     folder = Path(self.config.out_graph_path()).parent
     # WOW do I need to implement the new/better way of
     # managing writing collections of neurons to graphs
     neuron_uri = next(NeuronACT.out_graph[:rdf.type:owl.Ontology])
     name = 'allen-cell-instances.ttl'
     base, _ = neuron_uri.rsplit('/', 1)
     uri = rdflib.URIRef(base + '/' + name)
     metadata = ((uri, rdf.type, owl.Ontology),)
     instance_graph = OntGraph(path=folder / name)
     instance_graph.bind('AIBSSPEC', AIBSSPEC)
     instance_graph.bind('npokb', npokb)
     [instance_graph.add(t) for t in metadata]
     [instance_graph.add(t) for t in instances]
     [instance_graph.add(t) for t in allDifferent(None, distinctMembers(*dids))]
     instance_graph.write()
Пример #16
0
    def graph(self):
        if not hasattr(self, '_graph'):
            self._graph = populateFromJsonLd(OntGraph(), self.asJsonLd())
            OntCuries.populate(self._graph)
            self.populateHeader(self._graph)

        return self._graph
Пример #17
0
    def debug(self, target=None):
        if target is None:
            target = self.asOwl

        OntGraph(
            namespace_manager=dict(OntCuries._dict)).populate_from_triples(
                target()).debug()
Пример #18
0
    def graph(self):
        """ you can populate other graphs, but this one runs once """
        if not hasattr(self, '_graph'):
            graph = OntGraph()
            self.populate(graph)
            self._graph = graph

        return self._graph
Пример #19
0
    def sneechReviewGraph(self,
                          source_graph,
                          namespaces,
                          sneech_file=None,
                          path_out=None):
        # TODO cache
        (already, cannot, maybe, sneeches,
         maybe_sneeches) = self.preSneech(source_graph, namespaces)
        # TODO not entirely sure about the best place to put this ...
        self.reView(source_graph, maybe_sneeches)  # FIXME dump and commit

        review_graph = OntGraph(path=path_out)
        oq.OntCuries.populate(review_graph)
        review_graph.bind('snchn', str(snchn))  # FIXME -> curies probably
        review_graph.bind('sncho', str(sncho))  # FIXME -> curies probably
        review_graph.bind('h', str(sghashes))  # FIXME -> curies probably
        if sneech_file:
            sneech_file.populate(review_graph)

        gen = self.triples_review(already, cannot, maybe, sneeches,
                                  sneech_file)
        [review_graph.add(t) for t in gen]
        # TODO hasReport -> maybe_sneeches report / reView
        # TODO snchn predicate ordering
        return review_graph, maybe_sneeches
Пример #20
0
 def setUpClass(cls):
     iri = 'https://cassava.ucsd.edu/sparc/preview/exports/protcur.ttl'
     cls.graph = OntGraph().parse(iri, format='ttl')
     cls.nsm = cls.graph.namespace_manager
     cls.spaql_templates = SparqlQueries(cls.nsm)
     cls._q_protocol_aspects = cls.spaql_templates.protocol_aspects()
     cls._q_protocol_inputs = cls.spaql_templates.protocol_inputs()
     cls._q_protocol_species_dose = cls.spaql_templates.protocol_species_dose(
     )
Пример #21
0
def graphFromGithub(link, verbose=False):
    # mmmm no validation
    # also caching probably
    if verbose:
        log.info(link)

    g = OntGraph().parse(f'{link}?raw=true', format='turtle')
    OntCuries.populate(g)
    return g
Пример #22
0
def loadall(git_local, repo_name, local=False, dobig=False):
    local_base = jpth(git_local, repo_name)
    lb_ttl = os.path.realpath(jpth(local_base, 'ttl'))

    #match = (rdflib.term.URIRef('http://purl.org/dc/elements/1.1/member'),  # iao.owl
    #rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
    #rdflib.term.URIRef('http://www.w3.org/2002/07/owl#AnnotationProperty'))

    done = []
    filenames = [
        f for g in ('*', '*/*', '*/*/*')
        for f in glob(lb_ttl + '/' + g + '.ttl')
    ]
    graph = OntGraph()
    for f in filenames:
        print(f)
        done.append(os.path.basename(f))
        graph.parse(f, format='turtle')
        #if match in graph:
        #raise BaseException('Evil file found %s' % f)

    def repeat(
            dobig=dobig):  # we don't really know when to stop, so just adjust
        for s, o in graph.subject_objects(owl.imports):
            if os.path.basename(o) not in done and o not in done:
                #if (o, rdf.type, owl.Ontology) not in graph:
                print(o)
                done.append(o)
                ext = os.path.splitext(o)[1]
                fmt = 'turtle' if ext == '.ttl' else 'xml'
                if noneMembers(o, *bigleaves) or dobig:
                    graph.parse(o, format=fmt)
                    #if match in graph:
                    #raise BaseException('Evil file found %s' % o)

    #if local:
    #repeat(False)
    #else:
    if not local:
        for i in range(10):
            repeat(True)

    return graph
Пример #23
0
    def export_protcur(self, dump_path, *hypothesis_groups, no_network=False):
        # FIXME no_network passed in here is dumb
        #if (self.latest and  # FIXME NOTE this only points to the latest integrated release
        #self.latest_protcur_path.exists()):
        #blob_protcur = self.latest_protocols
        #else:

        pipeline = pipes.ProtcurPipeline(*hypothesis_groups,
                                         no_network=no_network)
        # FIXME NOTE this does not do the identifier expansion pass
        protcur = pipeline.data
        context = {
            **sc.base_context,
            **sc.protcur_context,
        }
        for f in ('meta', 'subjects', 'samples', 'contributors'):
            context.pop(f)  # FIXME HACK meta @graph for datasets

        ontology_header = {  # FIXME should probably not be added here since it is obscure ...
            '@id': 'https://cassava.ucsd.edu/sparc/ontologies/protcur.ttl',
            '@type': 'owl:Ontology',
        }

        protcur.append(ontology_header)

        blob_protcur = {  # FIXME this should not be defined here so confusing that it is not with the pipeline ...
            '@context': context,
            'meta': {
                'count': len(protcur)
            },  # FIXME adjust to structure
            'prov': {
                'timestamp_export_start': self.timestamp,
                'export_system_identifier': Path.sysid,
                'export_hostname': gethostname(),
            },
            '@graph': protcur,  # FIXME regularize elements ?
        }

        dump_path.mkdir(parents=True, exist_ok=True)
        # FIXME TODO make these latest paths accessible
        # probably by splitting protcur export out into
        # its own class
        latest_path = dump_path.parent / 'LATEST'
        latest_partial_path = dump_path.parent / 'LATEST_PARTIAL'
        fn = dump_path / 'protcur.json'
        with open(fn, 'wt') as f:
            json.dump(blob_protcur, f, sort_keys=True, indent=2, cls=JEncode)

        symlink_latest(dump_path, latest_partial_path)

        g = populateFromJsonLd(OntGraph(), fn).write(fn.with_suffix('.ttl'))

        symlink_latest(dump_path, latest_path)

        return blob_protcur
Пример #24
0
def simplify(collapse, blob):
    to_remove = []
    for coll in collapse:
        exclude = set(p for p in coll)
        candidates = [e for e in blob['edges'] if e['pred'] in exclude]
        for c in candidates:
            # make sure we can remove the edges later
            # if they have meta the match will fail
            if 'meta' in c:
                c.pop('meta')

        if candidates:
            edges = [Edge.fromOboGraph(c) for c in candidates]
            g = OntGraph().populate_from_triples(e.asRdf() for e in edges)
            nxg = egl.rdflib_to_networkx_multidigraph(g)
            connected = list(nx.weakly_connected_components(nxg))  # FIXME may not be minimal
            ends = [e.asRdf()[-1] for e in edges if e.p == coll[-1]]
            for c in connected:
                #log.debug('\n' + pformat(c))
                nxgt = nx.MultiDiGraph()
                nxgt.add_edges_from(nxg.edges(c, keys=True))
                ordered_nodes = list(nx.topological_sort(nxgt))
                paths = [p
                         for n in nxgt.nodes()
                         for e in ends
                         for p in list(nx.all_simple_paths(nxgt, n, e))
                         if len(p) == len(coll) + 1]

                for path in sorted(paths):
                    ordered_edges = nxgt.edges(path, keys=True)
                    oe2 = [Edge.fromNx(e) for e in ordered_edges if all([n in path for n in e[:2]])]
                    predicates = [e.p for e in oe2]
                    #log.debug('\n' + pformat(oe2))
                    if predicates == coll: #in collapse:
                        to_remove.extend(zap(path, predicates, oe2, blob))
                    else:  # have to retain this branch to handle cases where the end predicate is duplicated
                        log.error('\n' + pformat(predicates) +
                                    '\n' + pformat(coll))
                        for preds in [coll]:
                            sublist_start = listIn(predicates, preds)
                            if sublist_start is not None:
                                i = sublist_start
                                j = i + len(preds)
                                npath = path[i:j + 1]  # + 1 to include final node
                                oe2 = oe2[i:j]
                                predicates = predicates[i:j]
                                to_remove.extend(zap(npath, predicates, oe2, blob))

    for r in to_remove:
        if r in blob['edges']:
            blob['edges'].remove(r)

    #log.debug('\n' + pformat(blob['edges']))
    return blob  # note that this is in place modification so sort of supruflous
Пример #25
0
    def default(self):
        g = OntGraph().parse(
            auth.get_path('ontology-local-repo') / 'ttl/stimulation.ttl')
        preds = sorted(set(g.qname(p) for p in g.predicates()))

        header = [[
            'id', 'rdf:type', 'rdfs:domain', 'rdfs:range', 'rdfs:label',
            'NIFRID:synonym', 'NIFRID:abbrev', 'definition:', 'editorNote:',
            'rdfs:comment'
        ]]

        _rows = []
        for type_ in (
                owl.ObjectProperty,
                owl.Class,
        ):
            for s in sorted(g[:rdf.type:type_], key=natsort):
                if isinstance(s, rdflib.URIRef):
                    row = [g.qname(s), g.qname(type_)
                           ] + [fun(g, s) for fun in funs]
                    _rows.append(row)

        rows = header + _rows

        defs = Defs(readonly=not self.options.update)
        if self.options.update:
            defs.upsert(*rows)  # FIXME upsert broken on header reordering ?
            defs.commit()

        return rows
Пример #26
0
def main():
    url = (
        'http://data.bioontology.org/ontologies/'
        f'PCL/submissions/7/download?apikey={auth.user_config.secrets("bioportal")}'
    )
    g = OntGraph().parse(url, format='application/rdf+xml')
    g = fixns(g)
    og = OntGraph()
    g.namespace_manager.populate(og)
    og.populate_from_triples(to_ebm(g))
    og.write('/tmp/pCL.ttl')
Пример #27
0
 def graph(self):
     g = OntGraph()
     OntCuries.populate(g)
     self.populate(g)
     g.bind('local', self.context)
     g.bind('apinatomy', readable)  # FIXME populate from store
     g.bind('elements', elements)
     return g
Пример #28
0
    def asOwl(self, identifier_function=lambda self: rdflib.BNode()):
        # FIXME maybe just use the render function? and control all
        # of this as part of the renderer?
        s = identifier_function(self)
        for type in (PhenotypeCollection, PhenotypeBase):
            combinators = (phenotype.combinator for phenotype in self
                           if isinstance(phenotype, type))
            combinators = list(combinators)
            if type == PhenotypeCollection and combinators:
                asdf = list(self.combinator(s, *combinators))
                OntGraph().populate_from_triples(asdf).debug()
                breakpoint()  # XXX

            yield from self.combinator(s, *combinators)
Пример #29
0
 def loadData(cls):
     """ corresponds to the list of FMA ids from organParts
         for all organs in the sparc organsList """
     g = OntGraph()
     g.namespace_manager.populate_from(uPREFIXES)  # cls._ghead except fma doesn't define FMA:
     ol = cls.sgd.prod_sparc_organList()
     top_ids = [n['id'] for n in ol['nodes']]
     res = Async()(deferred(by_organ)(i, cls.sgd) for i in top_ids)
     #res = [by_organ(i, cls.sgd) for i in top_ids]
     #res_stats(res)  # why are there dupes? now we know!
     nodes = [n for o, r in res for n in r['nodes']]
     ids_raw = set(n['id'] for n in nodes if not n['id'].startswith('_:') and n['id'] != 'owl:Nothing')
     ids = set(g.namespace_manager.expand(id).toPython() for id in ids_raw)
     return ids_raw, ids
Пример #30
0
    def COMMENCE(self, *, namespaces=tuple(), orgs=tuple(), sneech_file=None, path_out=None, **kwargs):
        if sneech_file is not None and not orgs:
            return sneech_file.COMMENCE(self, path_out)

        if not orgs:
            raise TypeError('orgs cannot be empty!')

        source_graph = OntGraph()
        for org in orgs:
            org.populate(source_graph)

        #derp = g.namespace_manager.store.namespace
        #namespaces = [derp(p) for p in prefixes]  # FIXME prefix vs namespace
        rg, maybe_sneeches = self.sneechReviewGraph(source_graph, namespaces,  sneech_file, path_out)