示例#1
0
    def _rowhack(self, ns, rdflib):
        # can't overwrite pyontutils.sheets.Row like we do in other cases sigh
        curie = self.curie().value
        if not curie:
            return

        oid = OntId(curie)
        s = oid.u

        _lon = (lambda v: None if not v else rdflib.Literal(v))
        lon = lambda c: _lon(c.value)

        _blon = lambda v: _lon(True if v == 'TRUE' else False
                               )  # sigh sheets datatypes
        blon = lambda c: _blon(c.value)

        _oon = (lambda v: None if not v else OntId(v).u)
        oon = lambda c: _oon(c.value)

        self.type().value
        self.mistake().value
        pos = (
            (ns.ilxtr.curationInternal, blon(self.curation_internal())),
            (ns.definition, lon(self.definition())),
            (ns.editorNote, lon(self.notes())),
            (ns.rdf.type, oon(self.rdf_type())),
            (ns.replacedBy, oon(self.replacedby_())),
            (ns.ilxtr.futureType, oon(self.future_type())),
        )

        for p, o in pos:
            if o is not None:
                yield s, p, o
示例#2
0
    def _process(self, contributor):
        # get member if we can find them
        he = dat.HasErrors(pipeline_stage=self.__class__.__name__ + '.data')
        if 'name' in contributor and 'first_name' in contributor:
            name = contributor['name']
            if ';' in name:
                msg = f'Bad symbol in name {name!r}'
                he.addError(msg)
                logd.error(msg)

            fn = contributor['first_name']
            ln = contributor['last_name']
            if ' ' in fn:
                fn, mn = fn.split(' ', 1)
                contributor['middle_name'] = mn
                contributor['first_name'] = fn

            if ' ' in ln:
                msg = f'Malformed last_name {ln!r}'
                he.addError(msg)
                logd.error(msg)
                ln = ln.replace(' ', '-')

            failover = f'{fn}-{ln}'
            member = self.member(fn, ln)

            if member is not None:
                userid = OntId('https://api.blackfynn.io/users/' + member.id)
                contributor['blackfynn_user_id'] = userid

        else:
            member = None
            failover = 'no-orcid-no-name'
            log.warning(f'No name!' + lj(contributor))

        orcid = None
        if 'contributor_orcid_id' in contributor:
            orcid = contributor['contributor_orcid_id']
            if type(orcid) == str and 'orcid.org' in orcid:
                orcid = OrcidId(orcid)  # FIXME reloading from json

            if isinstance(orcid, OrcidId):
                s = orcid
            else:  # it's not an orcid or its a bad orcid
                orcid = None

        if orcid is None:
            if member is not None:
                s = userid
            else:
                log.debug(lj(contributor))
                s = OntId(self.dsid + '/contributors/' + failover)

        contributor['id'] = s
        he.embedErrors(contributor)
示例#3
0
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        # PLEASE DO NOT PUT PMIDs as external ids!!!
        # FIXME idlib PMID(thing) urg the regex state machine is so simple ;_;
        if self.id.startswith('PMID:'):
            log.warning('PMIDs should never be External IDs!')
            self._c_term = fake
            self.s = OntId(self.id).URIRef
            return

        self.s = OntId(self.id).URIRef
示例#4
0
    def added(self):
        data = super().added
        # FIXME conditional lifts ...
        if 'award_number' not in data['meta']:
            am = self.lifters.award_manual
            if am:
                data['meta']['award_number'] = am

        if 'modality' not in data['meta']:
            m = self.lifters.modality
            if m:
                data['meta']['modality'] = m

        if 'organ' not in data['meta']:
            if 'award_number' in data['meta']:
                an = data['meta']['award_number']
                o = self.lifters.organ(an)
                if o:
                    if o != 'othertargets':
                        o = OntId(o)
                        if o.prefix == 'FMA':
                            ot = OntTerm(o)
                            o = next(
                                OntTerm.query(label=ot.label,
                                              prefix='UBERON')).OntTerm

                    data['meta']['organ'] = o

        if 'organ' not in data['meta'] or data['meta'][
                'organ'] == 'othertargets':
            o = self.lifters.organ_term
            if o:
                if isinstance(o, str):
                    o = o,

                out = tuple()
                for _o in o:
                    _o = OntId(_o)
                    if _o.prefix == 'FMA':
                        ot = OntTerm(_o)
                        _o = next(
                            OntTerm.query(label=ot.label,
                                          prefix='UBERON')).OntTerm

                    out += (_o, )

                data['meta']['organ'] = out

        return data
示例#5
0
    def l(self, value):
        if isinstance(value, idlib.Stream) and hasattr(value, '_id_class'):
            if hasattr(value, 'asUri'):  # FIXME
                return value.asUri(rdflib.URIRef)
            else:
                return value.asType(rdflib.URIRef)
        if isinstance(value, OntId):
            return value.u
        if isinstance(value, ProtcurExpression):
            return value
        if isinstance(value, Quantity):
            return value
        elif isinstance(value, str) and value.startswith('http'):
            return OntId(value).u
        elif isinstance(value, dict):  # FIXME this is too late to convert?
            # NOPE! This idiot put a type field in his json dicts!
            if 'type' in value:
                if value['type'] == 'quantity':
                    return self.pyru._Quant.fromJson(value)
                elif value['type'] == 'range':
                    return self.pyru.Range.fromJson(value)
                elif value['type'] == 'identifier':
                    return fromJson(value).asType(rdflib.URIRef)

            raise ValueError(value)
        else:
            return rdflib.Literal(value)
示例#6
0
    def triples_protcur(self, protocol_subject):
        ps = list(self._protcur(str(protocol_subject)))
        anatomy = [(p,
                    OntId('UBERON:' +
                          str(p).split('UBERON:', 1)[-1].split(' ', 1)[0]))
                   for p in ps if p.astType == 'protc:input'
                   and '(protc:input (term UBERON' in str(p)]
        #breakpoint()
        dataset_subject = rdflib.URIRef(self.uri_api)
        yield protocol_subject, TEMP.hasNumberOfProtcurAnnotations, rdflib.Literal(
            len(ps))
        done = set()
        for anno, term in anatomy:
            if term in done:
                continue

            done.add(term)
            o = term.u
            t = dataset_subject, TEMP.involvesAnatomicalRegion, o
            sl = rdflib.URIRef(anno.shareLink)
            av = (((ilxtr.annotationValue, rdflib.Literal(anno.value)), )
                  if anno.value != o else tuple())
            notes = [(ilxtr.curatorNote, rdflib.Literal(n))
                     for n in anno.curatorNotes]
            prov = [(ilxtr.hasAnnotation, sl)]
            yield t
            yield from cmb.annotation(t, *av, *notes, *prov)()
示例#7
0
 def organ_term(self, dataset_id):
     row = self._lookup(dataset_id)
     if row:
         ot = row.organ_term if row.organ_term else None
         if ot:
             ts = tuple(OntId(t) for t in ot.split(' ') if t)
             return ts
示例#8
0
        def normv(v):
            if isinstance(v, str) and v.startswith('http'):
                # needed for loading from json that has been serialized
                # rather than from our internal representation
                # probably better to centralized the reload ...
                oid = OntId(v)
                if oid.prefix in want_prefixes:
                    return OntTerm(v).tabular()
                else:
                    return oid.iri

            if isinstance(v, OntId):
                if not isinstance(v, OntTerm):
                    v = OntTerm(v)

                v = v.tabular()
            if isinstance(v, list) or isinstance(v, tuple):
                v = ','.join(
                    json.dumps(_, cls=JEncode) if isinstance(_, dict
                                                             ) else normv(_)
                    for _ in v)
                v = v.replace('\n', ' ').replace('\t', ' ')
            elif any(isinstance(v, c) for c in (int, float, str)):
                v = str(v)
                v = v.replace('\n',
                              ' ').replace('\t',
                                           ' ')  # FIXME tests to catch this

            elif isinstance(v, dict):
                v = json.dumps(v, cls=JEncode)

            return v
示例#9
0
    def l(self, value):
        if isinstance(value, BlackfynnId):
            # FIXME this has to come first due ordering and impl issues with
            # bfpnids being streams but not supporting all sorts of stuff
            return rdflib.URIRef(value.uri_api)
        if isinstance(value, idlib.Stream) and hasattr(value, '_id_class'):
            if hasattr(value, 'asUri'):  # FIXME
                return value.asUri(rdflib.URIRef)
            else:
                return value.asType(rdflib.URIRef)
        if isinstance(value, OntId):
            return value.u
        if isinstance(value, ProtcurExpression):
            return value
        if isinstance(value, Quantity):
            return value
        elif isinstance(value, str) and value.startswith('http'):
            return OntId(value).u
        elif isinstance(value, dict):  # FIXME this is too late to convert?
            # NOPE! This idiot put a type field in his json dicts!
            if 'type' in value:
                if value['type'] == 'quantity':
                    return self.pyru._Quant.fromJson(value)
                elif value['type'] == 'range':
                    return self.pyru.Range.fromJson(value)
                elif value['type'] == 'identifier':
                    return fromJson(value).asType(rdflib.URIRef)

            raise ValueError(value)
        else:
            return rdflib.Literal(value)
示例#10
0
 def organ_term(self, dataset_id):
     row = self._lookup(dataset_id)
     organ_term = self.byCol.header.index('organ_term')
     if row:
         ot = row[organ_term] if row[organ_term] else None
         if ot:
             ts = tuple(OntId(t) for t in ot.split(' ') if t and t.lower() != 'na')
             return ts
示例#11
0
    def triples_objects_ordered(self):
        for key, predicate in self.objects_ordered_succession.items():
            if key in self.blob:
                values = self.blob[key]
                if values:
                    assert not isinstance(values, str), f'{values} in {key}'
                    objects = [OntId(self.context[v.replace(' ', '-')]).URIRef for v in values]
                    for s, o in zip(objects[:-1],objects[1:]):
                        yield s, predicate, o

        for key in self.objects_ordered:
            if key in self.blob:
                values = self.blob[key]
                if values:
                    assert not isinstance(values, str), f'{values} in {key}'
                    objects = [OntId(self.context[v.replace(' ', '-')]).URIRef for v in values]
                    yield from cmb.olist(*objects)(self.s, ordered[key])  # NOTE scigraph does not translate rdf lists
示例#12
0
        def mkval(cell):
            hl = cell.hyperlink
            if hl is not None:
                oid = OntId(hl)
                if oid.prefix == 'TEMP':
                    logd.warning(f'{cell.value} -> {oid!r}')
                    #return OntTerm(curie=f'lex:{quote(cell.value)}')
                #else:

                return oid.asTerm()

            else:
                logd.warning(f'unhandled technique {cell.value}')
                return cell.value
示例#13
0
        def award_number(self, value):
            _, s = self.c.award_number(value)
            yield s, a, owl.NamedIndividual
            yield s, a, TEMP.FundedResearchProject
            return
            o = self.integrator.organ(value)
            if o:
                if o != 'othertargets':
                    o = OntId(o)
                    if o.prefix == 'FMA':
                        ot = OntTerm(o)
                        o = next(OntTerm.query(label=ot.label, prefix='UBERON')).OntTerm

                    yield s, isAbout, o.u
示例#14
0
 def organ_term(self, dataset_id):
     row = self._lookup(dataset_id)
     if row:
         organ_term = row.organ_term()
         otv = organ_term.value
         ot = otv if otv else None
         if ot:
             try:
                 ts = tuple(
                     OntId(t) for t in ot.split(' ')
                     if t and t.lower() != 'na')
                 return ts
             except OntId.BadCurieError:
                 log.error(ot)
示例#15
0
    def fromRdf(cls, uri, graph, context=None):
        oid = OntId(uri)
        id = oid.curie
        blob = {'id': id}
        for p, o in graph[uri]:
            if p == rdf.type:
                key = 'class'
                value = 'External'
            else:
                if p == rdfs.label:
                    key = 'name'
                else:
                    _, key = p.rsplit('/', 1)

                if isinstance(o, rdflib.Literal):
                    value = o.toPython()
                elif isinstance(o, rdflib.URIRef):
                    oid = OntId(o)
                    if oid.prefix == 'local':
                        value = oid.suffix
                    elif oid.prefix == 'apinatomy':  # FIXME hrm?
                        value = oid.suffix
                    else:
                        value = oid.curie  # FIXME external is tricky
                        log.warning(f'{oid!r}')

            if key in cls.objects_multi:
                if key in blob:
                    blob[key].append(value)
                else:
                    blob[key] = [value]

            else:
                blob[key] = value
            
        return cls(blob, context)
示例#16
0
    def triples_objects_multi(self):
        for key in self.objects_multi:
            if key in self.blob:
                values = self.blob[key]
                assert not isinstance(values, str), f'{values} in {key}'
                for value in values:
                    if key == 'external':
                        try:
                            o = OntId(value).URIRef
                            yield o, readable.annotates, self.s
                        except OntId.UnknownPrefixError as e:
                            log.exception(e)
                            continue
                    elif key == 'inheritedExternal':
                        try:
                            o = OntId(value).URIRef
                        except OntId.UnknownPrefixError as e:
                            log.exception(e)
                            continue
                    else:
                        value = value.replace(' ', '-')  # FIXME require no spaces in internal ids
                        o = self.context[value]

                    yield self.s, readable[key], o
示例#17
0
    def fromRdf(cls, uri, graph, context=None):
        _, id = uri.rsplit('/', 1)
        blob = {'id': id}
        for p, o in graph[uri]:
            if p == rdf.type:
                if o != owl.NamedIndividual:
                    key = 'class'
                    _, value = o.rsplit('/', 1)
                else:
                    continue  # TODO s rdf:type apinatomy:External ??
            else:
                _, key = p.rsplit('/', 1)

                if isinstance(o, rdflib.Literal):
                    value = o.toPython()
                elif isinstance(o, rdflib.URIRef):
                    oid = OntId(o)
                    if oid.prefix == 'local':
                        value = oid.suffix
                    elif oid.prefix == 'apinatomy':  # FIXME hrm?
                        value = oid.suffix
                    else:
                        value = oid.curie  # FIXME external is tricky
                        log.warning(f'{oid!r}')
                elif isinstance(o, rdflib.BNode):
                    raise NotImplementedError(f'a bit more complex ...')
                else:
                    raise NotImplementedError(f'{o}')
                
            if key in cls.objects_ordered:  # ordered representation takes priority
                raise NotImplementedError('TODO this is quite a bit more complex')
                if key in blob:
                    blob[key].append(value)
                else:
                    blob[key] = [value]

            elif key in cls.objects_multi:
                if key in blob:
                    blob[key].append(value)
                else:
                    blob[key] = [value]

            else:
                blob[key] = value

        return cls(blob, context)
示例#18
0
    def _psd(self, rec, dsi):
        type = rec['type']
        spec_id = rec['specimen_id']
        if type == 'SampleDirs':
            sid = self.primary_key(spec_id)
        elif type == 'SubjectDirs':
            sid = self.subject_id(spec_id)
        else:
            raise NotImplementedError(f'wat {type}')

        for drp in rec['dirs']:
            path_record = dsi[drp]
            collection_id = path_record['remote_id']
            #p = (self.data['prov']['export_project_path'] /
            #self.data['meta']['folder_name'] /
            #drp)
            #cid = p.cache.cache.uri_api
            cid = OntId(collection_id).u
            yield sid, TEMP.hasFolderAboutIt, cid
示例#19
0
    def l(self, value):
        if isinstance(value, OntId):
            return value.u
        if isinstance(value, Expr):
            return value
        if isinstance(value, Quantity):
            return value
        elif isinstance(value, str) and value.startswith('http'):
            return OntId(value).u
        elif isinstance(value, dict):  # FIXME this is too late to convert?
            # NOPE! This idiot put a type field in his json dicts!
            if 'type' in value:
                if value['type'] == 'quantity':
                    return Quantity.fromJson(value)
                elif value['type'] == 'range':
                    return Range.fromJson(value)

            raise ValueError(value)
        else:
            return rdflib.Literal(value)
示例#20
0
    def triples_gen(self):
        rm = self._source

        # FIXME there doesn't seem to be a section that tells me the name
        # of top level model so I have to know its name beforhand
        # the id is in the model, having the id in the resource map
        # prevents issues if these things get sent decoupled
        id = rm['id']
        mid = id.replace(' ', '-')

        links = rm[id]['links']
        #linknodes = [n for n in rm[id]['nodes'] if n['class'] == 'Link']  # visible confusion

        st = []
        from_to = []
        ot = None
        yield from self.apinatbase()
        for link in links:
            if 'conveyingType' in link:
                if link['conveyingType'] == 'ADVECTIVE':
                    p_is = TEMP.isAdvectivelyConnectedTo
                    p_from = TEMP.advectivelyConnectsFrom
                    p_to = TEMP.advectivelyConnectsTo
                    p_cmat = TEMP.advectivelyConnectsMaterial
                    diffusive = False
                elif link['conveyingType'] == 'DIFFUSIVE':
                    p_is = TEMP.isDiffusivelyConnectedTo
                    p_from = TEMP.diffusivelyConnectsFrom
                    p_to = TEMP.diffusivelyConnectsTo
                    p_cmat = TEMP.diffusivelyConnectsMaterial
                    diffusive = True
                else:
                    log.critical(f'unhandled conveying type {link}')
                    continue

                source = link['source']
                target = link['target']
                ok = True
                if len(from_to) == 2:  # otherwise
                    st = []
                    from_to = []
                for i, e in enumerate((source, target)):
                    ed = rm[e]
                    if 'external' not in ed:
                        if not i and from_to:
                            # TODO make sure the intermediate ids match
                            pass
                        else:
                            ok = False
                            break
                    else:
                        st.append(e)
                        from_to.append(OntId(ed['external'][0]))

                conveying = link['conveyingLyph']
                cd = rm[conveying]
                if 'external' in cd:
                    old_ot = ot
                    ot = OntTerm(cd['external'][0])
                    yield ot.u, rdf.type, owl.Class
                    yield ot.u, TEMP.internalId, rdflib.Literal(conveying)
                    yield ot.u, rdfs.label, rdflib.Literal(ot.label)

                    yield from self.materialTriples(
                        ot.u, link, p_cmat)  # FIXME locate this correctly

                    if ok:
                        u, d = from_to
                        if st[0] == source:
                            yield u, rdfs.label, rdflib.Literal(
                                OntTerm(u).label)
                            yield u, rdf.type, owl.Class
                            yield from cmb.restriction.serialize(
                                ot.u, p_from, u)

                        if st[1] == target:
                            yield d, rdfs.label, rdflib.Literal(
                                OntTerm(d).label)
                            yield d, rdf.type, owl.Class
                            yield from cmb.restriction.serialize(ot.u, p_to, d)

                    if old_ot is not None and old_ot != ot:
                        yield from cmb.restriction.serialize(
                            ot.u, p_from, old_ot.u)

                if diffusive:
                    # we can try to hack this using named individuals
                    # but it is not going to do exactly what is desired
                    s_link = TEMP[f'ApiNATOMY/{mid}/{link["id"]}']
                    s_cd = TEMP[f'ApiNATOMY/{mid}/{cd["id"]}']
                    yield s_link, rdf.type, owl.NamedIndividual
                    yield s_link, rdf.type, TEMP.diffusiveLink  # FIXME I'm not sure these go in the model ...
                    yield s_cd, rdf.type, owl.NamedIndividual
                    if 'external' in cd and cd['external']:
                        oid = OntId(cd['external'][0])
                        yield s_cd, rdf.type, oid.u
                        ot = oid.asTerm()
                        if ot.label:
                            yield oid.u, rdfs.label, ot.label

                    else:
                        yield s_cd, rdf.type, TEMP.conveyingLyph
                        for icd in cd['inCoalescences']:
                            dcd = rm[icd]
                            log.info(lj(dcd))
                            s_icd = TEMP[f'ApiNATOMY/{mid}/{dcd["id"]}']
                            yield s_cd, TEMP.partOfCoalescence, s_icd
                            yield s_icd, rdf.type, owl.NamedIndividual
                            yield s_icd, rdf.type, TEMP[
                                'ApiNATOMY/Coalescence']
                            if 'external' in dcd and dcd['external']:
                                oid = OntId(dcd['external'][0])
                                yield s_icd, rdf.type, oid.u
                                ot = oid.asTerm()
                                if ot.label:
                                    yield oid.u, rdfs.label, ot.label

                            for lyphid in dcd['lyphs']:
                                ild = rm[lyphid]
                                log.info(lj(ild))
                                if 'external' in ild and ild['external']:
                                    yield s_icd, TEMP.hasLyphWithMaterial, OntId(
                                        ild['external'][0])

                if not ok:
                    logd.info(f'{source} {target} issue')
                    continue

                for inid, e in zip(st, from_to):
                    yield e.u, rdf.type, owl.Class
                    yield e.u, rdfs.label, rdflib.Literal(OntTerm(e).label)
                    yield e.u, TEMP.internalId, rdflib.Literal(inid)

                f, t = from_to
                yield from cmb.restriction.serialize(f.u, p_is, t.u)
示例#21
0
    def added(self):
        data = super().added
        if data['meta'] == {'techniques': []}:
            breakpoint()

        # FIXME conditional lifts ...
        if 'award_number' not in data['meta']:
            am = self.lifters.award_manual
            if am:
                data['meta']['award_number'] = am

        if 'modality' not in data['meta']:
            m = self.lifters.modality
            if m:
                data['meta']['modality'] = m

        if False and 'organ' not in data['meta']:
            # skip here, now attached directly to award
            if 'award_number' in data['meta']:
                an = data['meta']['award_number']
                o = self.lifters.organ(an)
                if o:
                    if o != 'othertargets':
                        o = OntId(o)
                        if o.prefix == 'FMA':
                            ot = OntTerm(o)
                            o = next(OntTerm.query(label=ot.label, prefix='UBERON'))

                    data['meta']['organ'] = o

        if 'organ' not in data['meta'] or data['meta']['organ'] == 'othertargets':
            o = self.lifters.organ_term
            if o:
                if isinstance(o, str):
                    o = o,

                out = tuple()
                for _o in o:
                    _o = OntId(_o)
                    if _o.prefix == 'FMA':
                        ot = OntTerm(_o)
                        _o = next(OntTerm.query(label=ot.label, prefix='UBERON'))

                    out += (_o,)

                data['meta']['organ'] = out

        if 'protocol_url_or_doi' not in data['meta']:
            if self.lifters.protocol_uris:
                data['meta']['protocol_url_or_doi'] = tuple(self.lifters.protocol_uris)

        else:
            if not isinstance(data['meta']['protocol_url_or_doi'], tuple):
                _test_path = deque(['meta', 'protocol_url_or_doi'])
                if not [e for e in data['errors']
                        if 'path' in e and e['path'] == _test_path]:
                    raise ext.ShouldNotHappenError('urg')

            else:
                data['meta']['protocol_url_or_doi'] += tuple(self.lifters.protocol_uris)
                data['meta']['protocol_url_or_doi'] = tuple(sorted(set(data['meta']['protocol_url_or_doi'])))  # ick


        # FIXME this is a really bad way to do this :/ maybe stick the folder in data['prov'] ?
        # and indeed, when we added PipelineStart this shifted and broke everything
        local = (self
                 .previous_pipeline.pipelines[0]
                 .previous_pipeline.pipelines[0]
                 .previous_pipeline.pipelines[0]
                 .path)
        remote = local.remote
        if 'doi' not in data['meta']:
            doi = remote.doi
            if doi is not None:
                try:
                    metadata = doi.metadata()
                    if metadata is not None:
                        data['meta']['doi'] = doi.identifier
                except requests.exceptions.HTTPError:
                    data['meta']['doi'] = None
                    pass
            else:
                data['meta']['doi'] = None

        if 'status' not in data:
            data['status'] = {}

        if 'status_on_platform' not in data['status']:
            data['status']['status_on_platform'] = remote.bfobject.status

        return data
示例#22
0
 def triples_external(self):
     if 'externals' in self.blob:
         for external in self.blob['external']:
             yield self.s, rdf.type, OntId(external).URIRef
示例#23
0
    def added(self):
        data = super().added
        if data['meta'] == {'techniques': []}:
            breakpoint()

        # FIXME conditional lifts ...
        if 'award_number' not in data['meta']:
            am = self.lifters.award_manual
            if am:
                data['meta']['award_number'] = am

        if 'modality' not in data['meta']:
            m = self.lifters.modality
            if m:
                data['meta']['modality'] = m

        if False and 'organ' not in data['meta']:
            # skip here, now attached directly to award
            if 'award_number' in data['meta']:
                an = data['meta']['award_number']
                o = self.lifters.organ(an)
                if o:
                    if o != 'othertargets':
                        o = OntId(o)
                        if o.prefix == 'FMA':
                            ot = OntTerm(o)
                            o = next(
                                OntTerm.query(label=ot.label, prefix='UBERON'))

                    data['meta']['organ'] = o

        if 'organ' not in data['meta'] or data['meta'][
                'organ'] == 'othertargets':
            o = self.lifters.organ_term
            if o:
                if isinstance(o, str):
                    o = o,

                out = tuple()
                for _o in o:
                    _o = OntId(_o)
                    if _o.prefix == 'FMA':
                        ot = OntTerm(_o)
                        _o = next(
                            OntTerm.query(label=ot.label, prefix='UBERON'))

                    out += (_o, )

                data['meta']['organ'] = out

        if 'protocol_url_or_doi' not in data['meta']:
            if self.lifters.protocol_uris:
                data['meta']['protocol_url_or_doi'] = tuple(
                    self.lifters.protocol_uris)

        else:
            if not isinstance(data['meta']['protocol_url_or_doi'], tuple):
                _test_path = deque(['meta', 'protocol_url_or_doi'])
                if not [e for e in data['errors'] if e['path'] == _test_path]:
                    raise ext.ShouldNotHappenError('urg')

            else:
                data['meta']['protocol_url_or_doi'] += tuple(
                    self.lifters.protocol_uris)
                data['meta']['protocol_url_or_doi'] = tuple(
                    sorted(set(data['meta']['protocol_url_or_doi'])))  # ick

        return data
示例#24
0
    def _process(self, contributor):
        # get member if we can find them
        he = dat.HasErrors(pipeline_stage=self.__class__.__name__ + '.data')
        if 'contributor_name' in contributor and 'first_name' in contributor:
            name = contributor['contributor_name']
            if ';' in name:
                msg = f'Bad symbol in name {name!r}'
                he.addError(msg)
                logd.error(msg)

            fn = contributor['first_name']
            ln = contributor['last_name']
            if ' ' in fn:
                fn, mn = fn.split(' ', 1)
                mn, _mn = mn.rstrip('.'), mn
                if mn != _mn:
                    he.addError(f'Middle initials don\'t need periods :) {name!r}',
                                logfunc=logd.error)
                contributor['middle_name'] = mn
                contributor['first_name'] = fn

            if ' ' in ln:
                msg = f'Malformed last_name {ln!r}'
                he.addError(msg)
                logd.error(msg)
                ln = ln.replace(' ', '-')

            failover = f'{fn}-{ln}'
            member = self.member(fn, ln)

            if member is not None:
                userid = OntId('https://api.blackfynn.io/users/' + member.id)
                contributor['blackfynn_user_id'] = userid

        else:
            member = None
            failover = 'no-orcid-no-name'
            log.warning(f'No name!' + lj(contributor))

        orcid = None
        if 'contributor_orcid_id' in contributor:
            orcid = contributor['contributor_orcid_id']
            if type(orcid) == str and 'orcid.org' in orcid:
                orcid = idlib.Orcid(orcid)  # FIXME reloading from json

            if isinstance(orcid, idlib.Orcid):
                s = orcid
            else:  # it's not an orcid or its a bad orcid
                orcid = None

        if orcid is None:
            if member is not None:
                s = userid
            else:
                log.debug(lj(contributor))
                s = OntId(self.dsid + '/contributors/' + failover)

        contributor['id'] = s
        he.embedErrors(contributor)

        # lifting + adding
        if 'contributor_affiliation' in contributor:
            ca = contributor['contributor_affiliation']
            maybe_ror = self.lifters.affiliations(ca)
            if maybe_ror is not None:
                contributor['affiliation'] = maybe_ror