Exemplo n.º 1
0
    def normv(v):
        if isinstance(v, str) and v.startswith('http'):
            # needed for loading from json that has been serialized
            # rather than from our internal representation
            # probably better to centralized the reload ...
            oid = OntId(v)
            if oid.prefix in want_prefixes:
                return OntTerm(v).asCell()
            else:
                return oid.iri

        if isinstance(v, idlib.Stream):
            if hasattr(v, 'asCell'):
                return v.asCell()
            else:
                loge.debug(
                    f'{type(v)} does not implement an asCell representation')
                return v.asType(str)

        if isinstance(v, OntId):
            if not isinstance(v, OntTerm):
                v = OntTerm(v)

            v = v.asCell()
        if isinstance(v, list) or isinstance(v, tuple):
            v = ','.join(
                json.dumps(_, cls=JEncode) if isinstance(_, dict) else normv(_)
                for _ in v)
            v = v.replace('\n', ' ').replace('\t', ' ')
        elif any(isinstance(v, c) for c in (int, float, str)):
            v = str(v)
            v = v.replace('\n', ' ').replace('\t',
                                             ' ')  # FIXME tests to catch this

        elif isinstance(v, dict):
            v = json.dumps(v, cls=JEncode)

        return v
Exemplo n.º 2
0
    def added(self):
        data = super().added
        if data['meta'] == {'techniques': []}:
            breakpoint()

        # FIXME conditional lifts ...
        if 'award_number' not in data['meta']:
            am = self.lifters.award_manual
            if am:
                data['meta']['award_number'] = am

        if 'modality' not in data['meta']:
            m = self.lifters.modality
            if m:
                data['meta']['modality'] = m

        if False and 'organ' not in data['meta']:
            # skip here, now attached directly to award
            if 'award_number' in data['meta']:
                an = data['meta']['award_number']
                o = self.lifters.organ(an)
                if o:
                    if o != 'othertargets':
                        o = OntId(o)
                        if o.prefix == 'FMA':
                            ot = OntTerm(o)
                            o = next(
                                OntTerm.query(label=ot.label, prefix='UBERON'))

                    data['meta']['organ'] = o

        if 'organ' not in data['meta'] or data['meta'][
                'organ'] == 'othertargets':
            o = self.lifters.organ_term
            if o:
                if isinstance(o, str):
                    o = o,

                out = tuple()
                for _o in o:
                    _o = OntId(_o)
                    if _o.prefix == 'FMA':
                        ot = OntTerm(_o)
                        _o = next(
                            OntTerm.query(label=ot.label, prefix='UBERON'))

                    out += (_o, )

                data['meta']['organ'] = out

        if 'protocol_url_or_doi' not in data['meta']:
            if self.lifters.protocol_uris:
                data['meta']['protocol_url_or_doi'] = tuple(
                    self.lifters.protocol_uris)

        else:
            if not isinstance(data['meta']['protocol_url_or_doi'], tuple):
                _test_path = deque(['meta', 'protocol_url_or_doi'])
                if not [e for e in data['errors'] if e['path'] == _test_path]:
                    raise ext.ShouldNotHappenError('urg')

            else:
                data['meta']['protocol_url_or_doi'] += tuple(
                    self.lifters.protocol_uris)
                data['meta']['protocol_url_or_doi'] = tuple(
                    sorted(set(data['meta']['protocol_url_or_doi'])))  # ick

        return data
Exemplo n.º 3
0
    def _process(self, contributor):
        # get member if we can find them
        he = dat.HasErrors(pipeline_stage=self.__class__.__name__ + '.data')
        if 'name' in contributor and 'first_name' in contributor:
            name = contributor['name']
            if ';' in name:
                msg = f'Bad symbol in name {name!r}'
                he.addError(msg)
                logd.error(msg)

            fn = contributor['first_name']
            ln = contributor['last_name']
            if ' ' in fn:
                fn, mn = fn.split(' ', 1)
                mn, _mn = mn.rstrip('.'), mn
                if mn != _mn:
                    he.addError(
                        f'Middle initials don\'t need periods :) {name!r}',
                        logfunc=logd.error)
                contributor['middle_name'] = mn
                contributor['first_name'] = fn

            if ' ' in ln:
                msg = f'Malformed last_name {ln!r}'
                he.addError(msg)
                logd.error(msg)
                ln = ln.replace(' ', '-')

            failover = f'{fn}-{ln}'
            member = self.member(fn, ln)

            if member is not None:
                userid = OntId('https://api.blackfynn.io/users/' + member.id)
                contributor['blackfynn_user_id'] = userid

        else:
            member = None
            failover = 'no-orcid-no-name'
            log.warning(f'No name!' + lj(contributor))

        orcid = None
        if 'contributor_orcid_id' in contributor:
            orcid = contributor['contributor_orcid_id']
            if type(orcid) == str and 'orcid.org' in orcid:
                orcid = OrcidId(orcid)  # FIXME reloading from json

            if isinstance(orcid, OrcidId):
                s = orcid
            else:  # it's not an orcid or its a bad orcid
                orcid = None

        if orcid is None:
            if member is not None:
                s = userid
            else:
                log.debug(lj(contributor))
                s = OntId(self.dsid + '/contributors/' + failover)

        contributor['id'] = s
        he.embedErrors(contributor)

        # lifting + adding
        if 'contributor_affiliation' in contributor:
            ca = contributor['contributor_affiliation']
            maybe_ror = self.lifters.affiliations(ca)
            if maybe_ror is not None:
                contributor['affiliation'] = maybe_ror
Exemplo n.º 4
0
    def triples_gen(self):
        rm = self._source

        # FIXME there doesn't seem to be a section that tells me the name
        # of top level model so I have to know its name beforhand
        # the id is in the model, having the id in the resource map
        # prevents issues if these things get sent decoupled
        id = rm['id']
        mid = id.replace(' ', '-')

        links = rm[id]['links']
        #linknodes = [n for n in rm[id]['nodes'] if n['class'] == 'Link']  # visible confusion

        st = []
        from_to = []
        ot = None
        yield from self.apinatbase()
        for link in links:
            if 'conveyingType' in link:
                if link['conveyingType'] == 'ADVECTIVE':
                    p_is = TEMP.isAdvectivelyConnectedTo
                    p_from = TEMP.advectivelyConnectsFrom
                    p_to = TEMP.advectivelyConnectsTo
                    p_cmat = TEMP.advectivelyConnectsMaterial
                    diffusive = False
                elif link['conveyingType'] == 'DIFFUSIVE':
                    p_is = TEMP.isDiffusivelyConnectedTo
                    p_from = TEMP.diffusivelyConnectsFrom
                    p_to = TEMP.diffusivelyConnectsTo
                    p_cmat = TEMP.diffusivelyConnectsMaterial
                    diffusive = True
                else:
                    log.critical(f'unhandled conveying type {link}')
                    continue

                source = link['source']
                target = link['target']
                ok = True
                if len(from_to) == 2:  # otherwise
                    st = []
                    from_to = []
                for i, e in enumerate((source, target)):
                    ed = rm[e]
                    if 'external' not in ed:
                        if not i and from_to:
                            # TODO make sure the intermediate ids match
                            pass
                        else:
                            ok = False
                            break
                    else:
                        st.append(e)
                        from_to.append(OntId(ed['external'][0]))

                conveying = link['conveyingLyph']
                cd = rm[conveying]
                if 'external' in cd:
                    old_ot = ot
                    ot = OntTerm(cd['external'][0])
                    yield ot.u, rdf.type, owl.Class
                    yield ot.u, TEMP.internalId, rdflib.Literal(conveying)
                    yield ot.u, rdfs.label, rdflib.Literal(ot.label)

                    yield from self.materialTriples(
                        ot.u, link, p_cmat)  # FIXME locate this correctly

                    if ok:
                        u, d = from_to
                        if st[0] == source:
                            yield u, rdfs.label, rdflib.Literal(
                                OntTerm(u).label)
                            yield u, rdf.type, owl.Class
                            yield from cmb.restriction.serialize(
                                ot.u, p_from, u)

                        if st[1] == target:
                            yield d, rdfs.label, rdflib.Literal(
                                OntTerm(d).label)
                            yield d, rdf.type, owl.Class
                            yield from cmb.restriction.serialize(ot.u, p_to, d)

                    if old_ot is not None and old_ot != ot:
                        yield from cmb.restriction.serialize(
                            ot.u, p_from, old_ot.u)

                if diffusive:
                    # we can try to hack this using named individuals
                    # but it is not going to do exactly what is desired
                    s_link = TEMP[f'ApiNATOMY/{mid}/{link["id"]}']
                    s_cd = TEMP[f'ApiNATOMY/{mid}/{cd["id"]}']
                    yield s_link, rdf.type, owl.NamedIndividual
                    yield s_link, rdf.type, TEMP.diffusiveLink  # FIXME I'm not sure these go in the model ...
                    yield s_cd, rdf.type, owl.NamedIndividual
                    if 'external' in cd and cd['external']:
                        oid = OntId(cd['external'][0])
                        yield s_cd, rdf.type, oid.u
                        ot = oid.asTerm()
                        if ot.label:
                            yield oid.u, rdfs.label, ot.label

                    else:
                        yield s_cd, rdf.type, TEMP.conveyingLyph
                        for icd in cd['inCoalescences']:
                            dcd = rm[icd]
                            log.info(lj(dcd))
                            s_icd = TEMP[f'ApiNATOMY/{mid}/{dcd["id"]}']
                            yield s_cd, TEMP.partOfCoalescence, s_icd
                            yield s_icd, rdf.type, owl.NamedIndividual
                            yield s_icd, rdf.type, TEMP[
                                'ApiNATOMY/Coalescence']
                            if 'external' in dcd and dcd['external']:
                                oid = OntId(dcd['external'][0])
                                yield s_icd, rdf.type, oid.u
                                ot = oid.asTerm()
                                if ot.label:
                                    yield oid.u, rdfs.label, ot.label

                            for lyphid in dcd['lyphs']:
                                ild = rm[lyphid]
                                log.info(lj(ild))
                                if 'external' in ild and ild['external']:
                                    yield s_icd, TEMP.hasLyphWithMaterial, OntId(
                                        ild['external'][0])

                if not ok:
                    logd.info(f'{source} {target} issue')
                    continue

                for inid, e in zip(st, from_to):
                    yield e.u, rdf.type, owl.Class
                    yield e.u, rdfs.label, rdflib.Literal(OntTerm(e).label)
                    yield e.u, TEMP.internalId, rdflib.Literal(inid)

                f, t = from_to
                yield from cmb.restriction.serialize(f.u, p_is, t.u)
Exemplo n.º 5
0
 def triples_external(self):
     if 'externals' in self.blob:
         for external in self.blob['external']:
             yield self.s, rdf.type, OntId(external).URIRef
Exemplo n.º 6
0
    def added(self):
        data = super().added
        if data['meta'] == {'techniques': []}:
            breakpoint()

        # FIXME conditional lifts ...
        if 'award_number' not in data['meta']:
            am = self.lifters.award_manual
            if am:
                data['meta']['award_number'] = am

        if 'modality' not in data['meta']:
            m = self.lifters.modality
            if m:
                data['meta']['modality'] = m

        if False and 'organ' not in data['meta']:
            # skip here, now attached directly to award
            if 'award_number' in data['meta']:
                an = data['meta']['award_number']
                o = self.lifters.organ(an)
                if o:
                    if o != 'othertargets':
                        o = OntId(o)
                        if o.prefix == 'FMA':
                            ot = OntTerm(o)
                            o = next(OntTerm.query(label=ot.label, prefix='UBERON'))

                    data['meta']['organ'] = o

        if 'organ' not in data['meta'] or data['meta']['organ'] == 'othertargets':
            o = self.lifters.organ_term
            if o:
                if isinstance(o, str):
                    o = o,

                out = tuple()
                for _o in o:
                    _o = OntId(_o)
                    if _o.prefix == 'FMA':
                        ot = OntTerm(_o)
                        _o = next(OntTerm.query(label=ot.label, prefix='UBERON'))

                    out += (_o,)

                data['meta']['organ'] = out

        if 'protocol_url_or_doi' not in data['meta']:
            if self.lifters.protocol_uris:
                data['meta']['protocol_url_or_doi'] = tuple(self.lifters.protocol_uris)

        else:
            if not isinstance(data['meta']['protocol_url_or_doi'], tuple):
                _test_path = deque(['meta', 'protocol_url_or_doi'])
                if not [e for e in data['errors']
                        if 'path' in e and e['path'] == _test_path]:
                    raise ext.ShouldNotHappenError('urg')

            else:
                data['meta']['protocol_url_or_doi'] += tuple(self.lifters.protocol_uris)
                data['meta']['protocol_url_or_doi'] = tuple(sorted(set(data['meta']['protocol_url_or_doi'])))  # ick


        # FIXME this is a really bad way to do this :/ maybe stick the folder in data['prov'] ?
        # and indeed, when we added PipelineStart this shifted and broke everything
        local = (self
                 .previous_pipeline.pipelines[0]
                 .previous_pipeline.pipelines[0]
                 .previous_pipeline.pipelines[0]
                 .path)
        remote = local.remote
        if 'doi' not in data['meta']:
            doi = remote.doi
            if doi is not None:
                try:
                    metadata = doi.metadata()
                    if metadata is not None:
                        data['meta']['doi'] = doi.identifier
                except requests.exceptions.HTTPError:
                    data['meta']['doi'] = None
                    pass
            else:
                data['meta']['doi'] = None

        if 'status' not in data:
            data['status'] = {}

        if 'status_on_platform' not in data['status']:
            data['status']['status_on_platform'] = remote.bfobject.status

        return data