示例#1
0
    def graph(self):
        if not hasattr(self, '_graph'):
            self._graph = populateFromJsonLd(OntGraph(), self.asJsonLd())
            OntCuries.populate(self._graph)
            self.populateHeader(self._graph)

        return self._graph
示例#2
0
    def export_protcur(self, dump_path, *hypothesis_groups, no_network=False):
        # FIXME no_network passed in here is dumb
        #if (self.latest and  # FIXME NOTE this only points to the latest integrated release
        #self.latest_protcur_path.exists()):
        #blob_protcur = self.latest_protocols
        #else:

        pipeline = pipes.ProtcurPipeline(*hypothesis_groups,
                                         no_network=no_network)
        # FIXME NOTE this does not do the identifier expansion pass
        protcur = pipeline.data
        context = {
            **sc.base_context,
            **sc.protcur_context,
        }
        for f in ('meta', 'subjects', 'samples', 'contributors'):
            context.pop(f)  # FIXME HACK meta @graph for datasets

        ontology_header = {  # FIXME should probably not be added here since it is obscure ...
            '@id': 'https://cassava.ucsd.edu/sparc/ontologies/protcur.ttl',
            '@type': 'owl:Ontology',
        }

        protcur.append(ontology_header)

        blob_protcur = {  # FIXME this should not be defined here so confusing that it is not with the pipeline ...
            '@context': context,
            'meta': {
                'count': len(protcur)
            },  # FIXME adjust to structure
            'prov': {
                'timestamp_export_start': self.timestamp,
                'export_system_identifier': Path.sysid,
                'export_hostname': gethostname(),
            },
            '@graph': protcur,  # FIXME regularize elements ?
        }

        dump_path.mkdir(parents=True, exist_ok=True)
        # FIXME TODO make these latest paths accessible
        # probably by splitting protcur export out into
        # its own class
        latest_path = dump_path.parent / 'LATEST'
        latest_partial_path = dump_path.parent / 'LATEST_PARTIAL'
        fn = dump_path / 'protcur.json'
        with open(fn, 'wt') as f:
            json.dump(blob_protcur, f, sort_keys=True, indent=2, cls=JEncode)

        symlink_latest(dump_path, latest_partial_path)

        g = populateFromJsonLd(OntGraph(), fn).write(fn.with_suffix('.ttl'))

        symlink_latest(dump_path, latest_path)

        return blob_protcur
示例#3
0
    def export_other_formats(self, dump_path, filepath_json, blob_ir,
                             blob_export_json, *rest):
        summary, previous_latest, previous_latest_datasets = rest
        dataset_blobs = blob_ir['datasets']

        # jsonld
        blob_export_jsonld = self.export_jsonld(filepath_json,
                                                blob_export_json)

        # identifier metadata
        blob_id_met = self.export_identifier_metadata(dump_path,
                                                      previous_latest,
                                                      dataset_blobs)
        teim = self.export_identifier_rdf(dump_path, blob_id_met)

        # rdf
        teds = self.export_rdf(dump_path, previous_latest_datasets,
                               dataset_blobs)
        tes = ex.TriplesExportSummary(blob_ir, teds=teds + [teim])

        # protcur  # FIXME running after because rdf export side effects anno sync
        blob_protcur = self.export_protcur(
            dump_path, 'sparc-curation')  # FIXME  # handle orthogonally

        blob_protcur_path = dump_path / 'protcur.json'  # FIXME SIGH
        populateFromJsonLd(tes.graph,
                           blob_protcur_path)  # this makes me so happy

        with open(filepath_json.with_suffix('.ttl'), 'wb') as f:
            f.write(tes.ttl)

        # protocol  # handled orthogonally ??
        #blob_protocol = self.export_protocols(dump_path, dataset_blobs, blob_protcur)

        # xml
        self.export_xml(filepath_json, dataset_blobs)

        # disco
        self.export_disco(filepath_json, dataset_blobs, teds)
示例#4
0
def main():
    dandi_terms_path = aug.LocalPath.cwd()
    g = OntGraph()

    _ = [
        populateFromJsonLd(g, path_yaml(p))
        for p in dandi_terms_path.rglob('*.yaml')
    ]
    g.write('dandi-raw.ttl')
    remove = [(s, p, o) for p in (schema.domainIncludes, schema.rangeIncludes,
                                  rdfs.subClassOf, rdf.type)
              for s, o in g[:p:]]
    add = [(s, p, (g.namespace_manager.expand(o.toPython()) if isinstance(
        o, rdflib.Literal) else o)) for s, p, o in remove]
    _ = [g.remove(t) for t in remove]
    _ = [g.add(t) for t in add]
    # TODO ontology metadata header section
    g.write('dandi.ttl')
示例#5
0
    def export_protcur(
            self,
            dump_path,
            *hypothesis_groups,
            rerun_protcur_export=False,
            # FIXME no_network passed in here is dumb
            no_network=False,
            # FIXME direct= is a hack
            direct=False):
        if not direct and self.export_base != self.export_protcur_base:
            # workaround to set the correct export base path
            nargs = {**self._args}
            nargs['export_base'] = self.export_protcur_base
            export = ExportProtcur(**nargs)
            return export.export_protcur(export.dump_path,
                                         *hypothesis_groups,
                                         no_network=no_network), export

        pipeline = pipes.ProtcurPipeline(*hypothesis_groups,
                                         no_network=no_network)
        annos = pipeline.load()
        if not annos:
            msg = ('No annos. Did you remember to run\n'
                   'python -m sparcur.simple.fetch_annotations')
            raise ValueError(msg)

        if self.latest_export_path.exists():
            # FIXME this only points to the latest integrated release
            # which is not what we want, we need the latest protcur to be independent
            #self.latest and
            blob_protcur = self.latest_export
            t_lex = blob_protcur['prov']['timestamp_export_start']
            t_lup = max(a.updated for a in annos).replace('+00:00', 'Z')
            new_annos_here = t_lex < t_lup  # <= is pretty much impossible
            if not (new_annos_here or rerun_protcur_export):
                return blob_protcur

        # FIXME NOTE this does not do the identifier expansion pass
        protcur = pipeline._make_blob(annos=annos)
        context = {
            **sc.base_context,
            **sc.protcur_context,
        }
        for f in ('meta', 'subjects', 'samples', 'contributors'):
            # subjects samples and contributors no longer included in context directly
            if f in context:
                context.pop(f)  # FIXME HACK meta @graph for datasets

        ontology_header = {  # FIXME should probably not be added here since it is obscure ...
            '@id': 'https://cassava.ucsd.edu/sparc/ontologies/protcur.ttl',
            '@type': 'owl:Ontology',
        }

        protcur.append(ontology_header)

        blob_protcur = {  # FIXME this should not be defined here so confusing that it is not with the pipeline ...
            '@context': context,
            'meta': {
                'count': len(protcur)
            },  # FIXME adjust to structure
            'prov': {
                'timestamp_export_start': self.timestamp,
                'export_system_identifier': Path.sysid,
                'export_hostname': gethostname(),
            },
            '@graph': protcur,  # FIXME regularize elements ?
        }

        dump_path.mkdir(parents=True, exist_ok=True)
        # FIXME TODO make these latest paths accessible
        # probably by splitting protcur export out into
        # its own class
        latest_path = dump_path.parent / 'LATEST'
        latest_partial_path = dump_path.parent / 'LATEST_PARTIAL'
        fn = dump_path / 'protcur.json'
        with open(fn, 'wt') as f:
            json.dump(blob_protcur, f, sort_keys=True, indent=2, cls=JEncode)

        symlink_latest(dump_path, latest_partial_path)

        g = populateFromJsonLd(OntGraph(), fn).write(fn.with_suffix('.ttl'))

        symlink_latest(dump_path, latest_path)

        return blob_protcur