Пример #1
0
    def triples(self):
        self.iri = rdflib.URIRef(f'https://apinatomy.org/uris/models/{self.id}')
        yield self.iri, rdf.type, readable.Graph
        yield self.iri, readable.name, rdflib.Literal(self.name)
        yield self.iri, readable.abbreviation, rdflib.Literal(self.abbreviation)
        externals = []
        for id, blob in self.resources.items():
            if 'class' not in blob:
                logd.warning(f'no class in\n{blob!r} for {id}')
                continue
            elif blob['class'] == 'Graph':
                continue

            obj = getattr(self, blob['class'])(blob, self.context, self.label_suffix)

            if blob['class'] == 'External':
                # defer lookup
                externals.append(obj)
                continue

            yield from obj.triples()

        Async()(deferred(lambda x: x._term)(e) for e in externals)
        for e in externals:
            yield from e.triples()
Пример #2
0
        def mkval(cell):
            hl = cell.hyperlink
            if hl is not None:
                return AutoId(hl)

            else:
                logd.warning(f'unhandled value {cell.value}')
                return cell.value
Пример #3
0
        def protocol_url_or_doi(self, value):
            #_, s = self.c.protocol_url_or_doi(value)
            #yield s, rdf.type, owl.NamedIndividual
            #yield s, rdf.type, sparc.Protocol
            log.debug(value)
            if not isinstance(value, idlib.Pio):
                if isinstance(value, idlib.Doi):
                    try:
                        t = None
                        for t in value.triples_gen:
                            yield t
                    except idlib.exc.RemoteError as e:
                        if t is None:
                            # we already logged this error during id dereferencing
                            return

                    ds, _, _ = t
                    try:
                        pioid = value.dereference(asType=idlib.Pio)
                        s = self.c.l(pioid)
                        yield ds, TEMP.dereferencesTo, s
                        yield s, TEMP.hasDoi, ds
                    except idlib.exc.MalformedIdentifierError as e:
                        log.warning(e)
                        return
                else:
                    try:
                        pioid = idlib.Pio(
                            value
                        )  # FIXME :/ should be handled in Pio directly probably?
                    except idlib.exc.MalformedIdentifierError as e:
                        logd.warning(e)
                        return
            else:
                pioid = value

            try:
                pioid_int = pioid.uri_api_int
                s = self.c.l(pioid_int)
                yield from pioid_int.triples_gen
                # FIXME needs to be a pipeline so that we can export errors
                try:
                    data = pioid.data()
                except (OntId.BadCurieError,
                        idlib.exc.MalformedIdentifierError) as e:
                    loge.error(e)  # FIXME export errors ...
                    data = None
            except idlib.exc.RemoteError as e:  # FIXME sandbox violation
                loge.exception(e)
                s = self.c.l(pioid)
                data = None

            yield s, rdf.type, sparc.Protocol

            if data:
                yield s, rdfs.label, rdflib.Literal(pioid.label)
                nsteps = len(data['steps'])
                yield s, TEMP.protocolHasNumberOfSteps, rdflib.Literal(nsteps)
Пример #4
0
        def mkval(cell):
            hl = cell.hyperlink
            if hl is not None:
                oid = OntId(hl)
                if oid.prefix == 'TEMP':
                    logd.warning(f'{cell.value} -> {oid!r}')
                    #return OntTerm(curie=f'lex:{quote(cell.value)}')
                #else:

                return oid.asTerm()

            else:
                logd.warning(f'unhandled technique {cell.value}')
                return cell.value
Пример #5
0
    def triples(self):
        self.iri = rdflib.URIRef(
            f'https://apinatomy.org/uris/models/{self.id}')
        yield self.iri, rdf.type, readable.Graph
        for id, blob in self.resources.items():
            if 'class' not in blob:
                logd.warning(f'no class in\n{blob!r}')
                continue
            elif blob['class'] == 'Graph':
                log.warning('Graph is in resources itself')
                continue

            yield from getattr(self, blob['class'])(blob,
                                                    self.context).triples()
Пример #6
0
        def mkval(cell):
            hl = cell.hyperlink
            cv = cell.value
            if hl is None:
                hl = cv if cv else None

            if hl is not None:
                try:
                    return idlib.Pio(hl)
                except idlib.exc.IdlibError as e:
                    try:
                        return idlib.Doi(hl)
                    except idlib.exc.IdlibError as e:
                        pass

            logd.warning(f'unhandled value {cell.value}')
            return cv
Пример #7
0
    def validate_path_json_metadata(cls, path_meta_blob):
        from sparcur.core import HasErrors  # FIXME
        he = HasErrors(pipeline_stage=cls.__name__ +
                       '.validate_path_json_metadata')
        mimetypes, suffixes = cls._file_type_status_lookup(
        )  # SIGH this overhead is 2 function calls and a branch
        for i, path_meta in enumerate(path_meta_blob['data']):
            if path_meta['basename'] in cls._banned_basenames:
                msg = f'illegal file detect {path_meta["basename"]}'
                dsrp = path_meta['dataset_relative_path']
                if he.addError(msg, path=dsrp, json_path=('data', i)):
                    logd.error(msg)
                status = 'banned'
                path_meta['status'] = status
                continue

            if 'magic_mimetype' in path_meta and 'mimetype' in path_meta:
                # FIXME NOT clear whether magic_mimetype should be used by itself
                # usually magic and file extension together work, magic by itself
                # can give some completely bonkers results
                source = 'magic_mimetype'
                mimetype = path_meta['magic_mimetype']
                muggle_mimetype = path_meta['mimetype']
                if mimetype != muggle_mimetype:
                    msg = f'mime types do not match {mimetype} != {muggle_mimetype}'
                    dsrp = path_meta['dataset_relative_path']
                    if he.addError(msg, path=dsrp, json_path=('data', i)):
                        log.error(msg)
            elif 'magic_mimetype' in path_meta:
                source = 'magic_mimetype'
                mimetype = path_meta['magic_mimetype']
            elif 'mimetype' in path_meta:
                source = 'mimetype'
                mimetype = path_meta['mimetype']
            else:
                mimetype = None

            if mimetype is not None:
                try:
                    status = mimetypes[mimetype]
                    if status == 'banned':
                        msg = f'banned mimetype detected {mimetype}'
                        dsrp = path_meta['dataset_relative_path']
                        if he.addError(msg,
                                       path=dsrp,
                                       json_path=('data', i, source)):
                            logd.error(msg)
                except KeyError as e:
                    status = 'known'
                    if mimetype not in cls._unclassified_mimes:
                        cls._unclassified_mimes.add(mimetype)
                        log.info(f'unclassified mimetype {mimetype}')
            else:
                status = 'unknown'
                dsrp = path_meta['dataset_relative_path']
                if isinstance(dsrp, str):
                    if not dsrp:
                        msg = f'FIXME top level folder needs a mimetype!'
                    else:
                        msg = f'unknown mimetype {path_meta["basename"]}'
                else:
                    msg = f'unknown mimetype {"".join(dsrp.suffixes)}'
                    cls._unknown_suffixes.add(tuple(dsrp.suffixes))
                if he.addError(msg, path=dsrp, json_path=('data', i)):
                    logd.warning(msg)

            path_meta['status'] = status

        if he._errors_set:
            he.embedErrors(path_meta_blob)