def _rowhack(self, ns, rdflib): # can't overwrite pyontutils.sheets.Row like we do in other cases sigh curie = self.curie().value if not curie: return oid = OntId(curie) s = oid.u _lon = (lambda v: None if not v else rdflib.Literal(v)) lon = lambda c: _lon(c.value) _blon = lambda v: _lon(True if v == 'TRUE' else False ) # sigh sheets datatypes blon = lambda c: _blon(c.value) _oon = (lambda v: None if not v else OntId(v).u) oon = lambda c: _oon(c.value) self.type().value self.mistake().value pos = ( (ns.ilxtr.curationInternal, blon(self.curation_internal())), (ns.definition, lon(self.definition())), (ns.editorNote, lon(self.notes())), (ns.rdf.type, oon(self.rdf_type())), (ns.replacedBy, oon(self.replacedby_())), (ns.ilxtr.futureType, oon(self.future_type())), ) for p, o in pos: if o is not None: yield s, p, o
def _process(self, contributor): # get member if we can find them he = dat.HasErrors(pipeline_stage=self.__class__.__name__ + '.data') if 'name' in contributor and 'first_name' in contributor: name = contributor['name'] if ';' in name: msg = f'Bad symbol in name {name!r}' he.addError(msg) logd.error(msg) fn = contributor['first_name'] ln = contributor['last_name'] if ' ' in fn: fn, mn = fn.split(' ', 1) contributor['middle_name'] = mn contributor['first_name'] = fn if ' ' in ln: msg = f'Malformed last_name {ln!r}' he.addError(msg) logd.error(msg) ln = ln.replace(' ', '-') failover = f'{fn}-{ln}' member = self.member(fn, ln) if member is not None: userid = OntId('https://api.blackfynn.io/users/' + member.id) contributor['blackfynn_user_id'] = userid else: member = None failover = 'no-orcid-no-name' log.warning(f'No name!' + lj(contributor)) orcid = None if 'contributor_orcid_id' in contributor: orcid = contributor['contributor_orcid_id'] if type(orcid) == str and 'orcid.org' in orcid: orcid = OrcidId(orcid) # FIXME reloading from json if isinstance(orcid, OrcidId): s = orcid else: # it's not an orcid or its a bad orcid orcid = None if orcid is None: if member is not None: s = userid else: log.debug(lj(contributor)) s = OntId(self.dsid + '/contributors/' + failover) contributor['id'] = s he.embedErrors(contributor)
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # PLEASE DO NOT PUT PMIDs as external ids!!! # FIXME idlib PMID(thing) urg the regex state machine is so simple ;_; if self.id.startswith('PMID:'): log.warning('PMIDs should never be External IDs!') self._c_term = fake self.s = OntId(self.id).URIRef return self.s = OntId(self.id).URIRef
def added(self): data = super().added # FIXME conditional lifts ... if 'award_number' not in data['meta']: am = self.lifters.award_manual if am: data['meta']['award_number'] = am if 'modality' not in data['meta']: m = self.lifters.modality if m: data['meta']['modality'] = m if 'organ' not in data['meta']: if 'award_number' in data['meta']: an = data['meta']['award_number'] o = self.lifters.organ(an) if o: if o != 'othertargets': o = OntId(o) if o.prefix == 'FMA': ot = OntTerm(o) o = next( OntTerm.query(label=ot.label, prefix='UBERON')).OntTerm data['meta']['organ'] = o if 'organ' not in data['meta'] or data['meta'][ 'organ'] == 'othertargets': o = self.lifters.organ_term if o: if isinstance(o, str): o = o, out = tuple() for _o in o: _o = OntId(_o) if _o.prefix == 'FMA': ot = OntTerm(_o) _o = next( OntTerm.query(label=ot.label, prefix='UBERON')).OntTerm out += (_o, ) data['meta']['organ'] = out return data
def l(self, value): if isinstance(value, idlib.Stream) and hasattr(value, '_id_class'): if hasattr(value, 'asUri'): # FIXME return value.asUri(rdflib.URIRef) else: return value.asType(rdflib.URIRef) if isinstance(value, OntId): return value.u if isinstance(value, ProtcurExpression): return value if isinstance(value, Quantity): return value elif isinstance(value, str) and value.startswith('http'): return OntId(value).u elif isinstance(value, dict): # FIXME this is too late to convert? # NOPE! This idiot put a type field in his json dicts! if 'type' in value: if value['type'] == 'quantity': return self.pyru._Quant.fromJson(value) elif value['type'] == 'range': return self.pyru.Range.fromJson(value) elif value['type'] == 'identifier': return fromJson(value).asType(rdflib.URIRef) raise ValueError(value) else: return rdflib.Literal(value)
def triples_protcur(self, protocol_subject): ps = list(self._protcur(str(protocol_subject))) anatomy = [(p, OntId('UBERON:' + str(p).split('UBERON:', 1)[-1].split(' ', 1)[0])) for p in ps if p.astType == 'protc:input' and '(protc:input (term UBERON' in str(p)] #breakpoint() dataset_subject = rdflib.URIRef(self.uri_api) yield protocol_subject, TEMP.hasNumberOfProtcurAnnotations, rdflib.Literal( len(ps)) done = set() for anno, term in anatomy: if term in done: continue done.add(term) o = term.u t = dataset_subject, TEMP.involvesAnatomicalRegion, o sl = rdflib.URIRef(anno.shareLink) av = (((ilxtr.annotationValue, rdflib.Literal(anno.value)), ) if anno.value != o else tuple()) notes = [(ilxtr.curatorNote, rdflib.Literal(n)) for n in anno.curatorNotes] prov = [(ilxtr.hasAnnotation, sl)] yield t yield from cmb.annotation(t, *av, *notes, *prov)()
def organ_term(self, dataset_id): row = self._lookup(dataset_id) if row: ot = row.organ_term if row.organ_term else None if ot: ts = tuple(OntId(t) for t in ot.split(' ') if t) return ts
def normv(v): if isinstance(v, str) and v.startswith('http'): # needed for loading from json that has been serialized # rather than from our internal representation # probably better to centralized the reload ... oid = OntId(v) if oid.prefix in want_prefixes: return OntTerm(v).tabular() else: return oid.iri if isinstance(v, OntId): if not isinstance(v, OntTerm): v = OntTerm(v) v = v.tabular() if isinstance(v, list) or isinstance(v, tuple): v = ','.join( json.dumps(_, cls=JEncode) if isinstance(_, dict ) else normv(_) for _ in v) v = v.replace('\n', ' ').replace('\t', ' ') elif any(isinstance(v, c) for c in (int, float, str)): v = str(v) v = v.replace('\n', ' ').replace('\t', ' ') # FIXME tests to catch this elif isinstance(v, dict): v = json.dumps(v, cls=JEncode) return v
def l(self, value): if isinstance(value, BlackfynnId): # FIXME this has to come first due ordering and impl issues with # bfpnids being streams but not supporting all sorts of stuff return rdflib.URIRef(value.uri_api) if isinstance(value, idlib.Stream) and hasattr(value, '_id_class'): if hasattr(value, 'asUri'): # FIXME return value.asUri(rdflib.URIRef) else: return value.asType(rdflib.URIRef) if isinstance(value, OntId): return value.u if isinstance(value, ProtcurExpression): return value if isinstance(value, Quantity): return value elif isinstance(value, str) and value.startswith('http'): return OntId(value).u elif isinstance(value, dict): # FIXME this is too late to convert? # NOPE! This idiot put a type field in his json dicts! if 'type' in value: if value['type'] == 'quantity': return self.pyru._Quant.fromJson(value) elif value['type'] == 'range': return self.pyru.Range.fromJson(value) elif value['type'] == 'identifier': return fromJson(value).asType(rdflib.URIRef) raise ValueError(value) else: return rdflib.Literal(value)
def organ_term(self, dataset_id): row = self._lookup(dataset_id) organ_term = self.byCol.header.index('organ_term') if row: ot = row[organ_term] if row[organ_term] else None if ot: ts = tuple(OntId(t) for t in ot.split(' ') if t and t.lower() != 'na') return ts
def triples_objects_ordered(self): for key, predicate in self.objects_ordered_succession.items(): if key in self.blob: values = self.blob[key] if values: assert not isinstance(values, str), f'{values} in {key}' objects = [OntId(self.context[v.replace(' ', '-')]).URIRef for v in values] for s, o in zip(objects[:-1],objects[1:]): yield s, predicate, o for key in self.objects_ordered: if key in self.blob: values = self.blob[key] if values: assert not isinstance(values, str), f'{values} in {key}' objects = [OntId(self.context[v.replace(' ', '-')]).URIRef for v in values] yield from cmb.olist(*objects)(self.s, ordered[key]) # NOTE scigraph does not translate rdf lists
def mkval(cell): hl = cell.hyperlink if hl is not None: oid = OntId(hl) if oid.prefix == 'TEMP': logd.warning(f'{cell.value} -> {oid!r}') #return OntTerm(curie=f'lex:{quote(cell.value)}') #else: return oid.asTerm() else: logd.warning(f'unhandled technique {cell.value}') return cell.value
def award_number(self, value): _, s = self.c.award_number(value) yield s, a, owl.NamedIndividual yield s, a, TEMP.FundedResearchProject return o = self.integrator.organ(value) if o: if o != 'othertargets': o = OntId(o) if o.prefix == 'FMA': ot = OntTerm(o) o = next(OntTerm.query(label=ot.label, prefix='UBERON')).OntTerm yield s, isAbout, o.u
def organ_term(self, dataset_id): row = self._lookup(dataset_id) if row: organ_term = row.organ_term() otv = organ_term.value ot = otv if otv else None if ot: try: ts = tuple( OntId(t) for t in ot.split(' ') if t and t.lower() != 'na') return ts except OntId.BadCurieError: log.error(ot)
def fromRdf(cls, uri, graph, context=None): oid = OntId(uri) id = oid.curie blob = {'id': id} for p, o in graph[uri]: if p == rdf.type: key = 'class' value = 'External' else: if p == rdfs.label: key = 'name' else: _, key = p.rsplit('/', 1) if isinstance(o, rdflib.Literal): value = o.toPython() elif isinstance(o, rdflib.URIRef): oid = OntId(o) if oid.prefix == 'local': value = oid.suffix elif oid.prefix == 'apinatomy': # FIXME hrm? value = oid.suffix else: value = oid.curie # FIXME external is tricky log.warning(f'{oid!r}') if key in cls.objects_multi: if key in blob: blob[key].append(value) else: blob[key] = [value] else: blob[key] = value return cls(blob, context)
def triples_objects_multi(self): for key in self.objects_multi: if key in self.blob: values = self.blob[key] assert not isinstance(values, str), f'{values} in {key}' for value in values: if key == 'external': try: o = OntId(value).URIRef yield o, readable.annotates, self.s except OntId.UnknownPrefixError as e: log.exception(e) continue elif key == 'inheritedExternal': try: o = OntId(value).URIRef except OntId.UnknownPrefixError as e: log.exception(e) continue else: value = value.replace(' ', '-') # FIXME require no spaces in internal ids o = self.context[value] yield self.s, readable[key], o
def fromRdf(cls, uri, graph, context=None): _, id = uri.rsplit('/', 1) blob = {'id': id} for p, o in graph[uri]: if p == rdf.type: if o != owl.NamedIndividual: key = 'class' _, value = o.rsplit('/', 1) else: continue # TODO s rdf:type apinatomy:External ?? else: _, key = p.rsplit('/', 1) if isinstance(o, rdflib.Literal): value = o.toPython() elif isinstance(o, rdflib.URIRef): oid = OntId(o) if oid.prefix == 'local': value = oid.suffix elif oid.prefix == 'apinatomy': # FIXME hrm? value = oid.suffix else: value = oid.curie # FIXME external is tricky log.warning(f'{oid!r}') elif isinstance(o, rdflib.BNode): raise NotImplementedError(f'a bit more complex ...') else: raise NotImplementedError(f'{o}') if key in cls.objects_ordered: # ordered representation takes priority raise NotImplementedError('TODO this is quite a bit more complex') if key in blob: blob[key].append(value) else: blob[key] = [value] elif key in cls.objects_multi: if key in blob: blob[key].append(value) else: blob[key] = [value] else: blob[key] = value return cls(blob, context)
def _psd(self, rec, dsi): type = rec['type'] spec_id = rec['specimen_id'] if type == 'SampleDirs': sid = self.primary_key(spec_id) elif type == 'SubjectDirs': sid = self.subject_id(spec_id) else: raise NotImplementedError(f'wat {type}') for drp in rec['dirs']: path_record = dsi[drp] collection_id = path_record['remote_id'] #p = (self.data['prov']['export_project_path'] / #self.data['meta']['folder_name'] / #drp) #cid = p.cache.cache.uri_api cid = OntId(collection_id).u yield sid, TEMP.hasFolderAboutIt, cid
def l(self, value): if isinstance(value, OntId): return value.u if isinstance(value, Expr): return value if isinstance(value, Quantity): return value elif isinstance(value, str) and value.startswith('http'): return OntId(value).u elif isinstance(value, dict): # FIXME this is too late to convert? # NOPE! This idiot put a type field in his json dicts! if 'type' in value: if value['type'] == 'quantity': return Quantity.fromJson(value) elif value['type'] == 'range': return Range.fromJson(value) raise ValueError(value) else: return rdflib.Literal(value)
def triples_gen(self): rm = self._source # FIXME there doesn't seem to be a section that tells me the name # of top level model so I have to know its name beforhand # the id is in the model, having the id in the resource map # prevents issues if these things get sent decoupled id = rm['id'] mid = id.replace(' ', '-') links = rm[id]['links'] #linknodes = [n for n in rm[id]['nodes'] if n['class'] == 'Link'] # visible confusion st = [] from_to = [] ot = None yield from self.apinatbase() for link in links: if 'conveyingType' in link: if link['conveyingType'] == 'ADVECTIVE': p_is = TEMP.isAdvectivelyConnectedTo p_from = TEMP.advectivelyConnectsFrom p_to = TEMP.advectivelyConnectsTo p_cmat = TEMP.advectivelyConnectsMaterial diffusive = False elif link['conveyingType'] == 'DIFFUSIVE': p_is = TEMP.isDiffusivelyConnectedTo p_from = TEMP.diffusivelyConnectsFrom p_to = TEMP.diffusivelyConnectsTo p_cmat = TEMP.diffusivelyConnectsMaterial diffusive = True else: log.critical(f'unhandled conveying type {link}') continue source = link['source'] target = link['target'] ok = True if len(from_to) == 2: # otherwise st = [] from_to = [] for i, e in enumerate((source, target)): ed = rm[e] if 'external' not in ed: if not i and from_to: # TODO make sure the intermediate ids match pass else: ok = False break else: st.append(e) from_to.append(OntId(ed['external'][0])) conveying = link['conveyingLyph'] cd = rm[conveying] if 'external' in cd: old_ot = ot ot = OntTerm(cd['external'][0]) yield ot.u, rdf.type, owl.Class yield ot.u, TEMP.internalId, rdflib.Literal(conveying) yield ot.u, rdfs.label, rdflib.Literal(ot.label) yield from self.materialTriples( ot.u, link, p_cmat) # FIXME locate this correctly if ok: u, d = from_to if st[0] == source: yield u, rdfs.label, rdflib.Literal( OntTerm(u).label) yield u, rdf.type, owl.Class yield from cmb.restriction.serialize( ot.u, p_from, u) if st[1] == target: yield d, rdfs.label, rdflib.Literal( OntTerm(d).label) yield d, rdf.type, owl.Class yield from cmb.restriction.serialize(ot.u, p_to, d) if old_ot is not None and old_ot != ot: yield from cmb.restriction.serialize( ot.u, p_from, old_ot.u) if diffusive: # we can try to hack this using named individuals # but it is not going to do exactly what is desired s_link = TEMP[f'ApiNATOMY/{mid}/{link["id"]}'] s_cd = TEMP[f'ApiNATOMY/{mid}/{cd["id"]}'] yield s_link, rdf.type, owl.NamedIndividual yield s_link, rdf.type, TEMP.diffusiveLink # FIXME I'm not sure these go in the model ... yield s_cd, rdf.type, owl.NamedIndividual if 'external' in cd and cd['external']: oid = OntId(cd['external'][0]) yield s_cd, rdf.type, oid.u ot = oid.asTerm() if ot.label: yield oid.u, rdfs.label, ot.label else: yield s_cd, rdf.type, TEMP.conveyingLyph for icd in cd['inCoalescences']: dcd = rm[icd] log.info(lj(dcd)) s_icd = TEMP[f'ApiNATOMY/{mid}/{dcd["id"]}'] yield s_cd, TEMP.partOfCoalescence, s_icd yield s_icd, rdf.type, owl.NamedIndividual yield s_icd, rdf.type, TEMP[ 'ApiNATOMY/Coalescence'] if 'external' in dcd and dcd['external']: oid = OntId(dcd['external'][0]) yield s_icd, rdf.type, oid.u ot = oid.asTerm() if ot.label: yield oid.u, rdfs.label, ot.label for lyphid in dcd['lyphs']: ild = rm[lyphid] log.info(lj(ild)) if 'external' in ild and ild['external']: yield s_icd, TEMP.hasLyphWithMaterial, OntId( ild['external'][0]) if not ok: logd.info(f'{source} {target} issue') continue for inid, e in zip(st, from_to): yield e.u, rdf.type, owl.Class yield e.u, rdfs.label, rdflib.Literal(OntTerm(e).label) yield e.u, TEMP.internalId, rdflib.Literal(inid) f, t = from_to yield from cmb.restriction.serialize(f.u, p_is, t.u)
def added(self): data = super().added if data['meta'] == {'techniques': []}: breakpoint() # FIXME conditional lifts ... if 'award_number' not in data['meta']: am = self.lifters.award_manual if am: data['meta']['award_number'] = am if 'modality' not in data['meta']: m = self.lifters.modality if m: data['meta']['modality'] = m if False and 'organ' not in data['meta']: # skip here, now attached directly to award if 'award_number' in data['meta']: an = data['meta']['award_number'] o = self.lifters.organ(an) if o: if o != 'othertargets': o = OntId(o) if o.prefix == 'FMA': ot = OntTerm(o) o = next(OntTerm.query(label=ot.label, prefix='UBERON')) data['meta']['organ'] = o if 'organ' not in data['meta'] or data['meta']['organ'] == 'othertargets': o = self.lifters.organ_term if o: if isinstance(o, str): o = o, out = tuple() for _o in o: _o = OntId(_o) if _o.prefix == 'FMA': ot = OntTerm(_o) _o = next(OntTerm.query(label=ot.label, prefix='UBERON')) out += (_o,) data['meta']['organ'] = out if 'protocol_url_or_doi' not in data['meta']: if self.lifters.protocol_uris: data['meta']['protocol_url_or_doi'] = tuple(self.lifters.protocol_uris) else: if not isinstance(data['meta']['protocol_url_or_doi'], tuple): _test_path = deque(['meta', 'protocol_url_or_doi']) if not [e for e in data['errors'] if 'path' in e and e['path'] == _test_path]: raise ext.ShouldNotHappenError('urg') else: data['meta']['protocol_url_or_doi'] += tuple(self.lifters.protocol_uris) data['meta']['protocol_url_or_doi'] = tuple(sorted(set(data['meta']['protocol_url_or_doi']))) # ick # FIXME this is a really bad way to do this :/ maybe stick the folder in data['prov'] ? # and indeed, when we added PipelineStart this shifted and broke everything local = (self .previous_pipeline.pipelines[0] .previous_pipeline.pipelines[0] .previous_pipeline.pipelines[0] .path) remote = local.remote if 'doi' not in data['meta']: doi = remote.doi if doi is not None: try: metadata = doi.metadata() if metadata is not None: data['meta']['doi'] = doi.identifier except requests.exceptions.HTTPError: data['meta']['doi'] = None pass else: data['meta']['doi'] = None if 'status' not in data: data['status'] = {} if 'status_on_platform' not in data['status']: data['status']['status_on_platform'] = remote.bfobject.status return data
def triples_external(self): if 'externals' in self.blob: for external in self.blob['external']: yield self.s, rdf.type, OntId(external).URIRef
def added(self): data = super().added if data['meta'] == {'techniques': []}: breakpoint() # FIXME conditional lifts ... if 'award_number' not in data['meta']: am = self.lifters.award_manual if am: data['meta']['award_number'] = am if 'modality' not in data['meta']: m = self.lifters.modality if m: data['meta']['modality'] = m if False and 'organ' not in data['meta']: # skip here, now attached directly to award if 'award_number' in data['meta']: an = data['meta']['award_number'] o = self.lifters.organ(an) if o: if o != 'othertargets': o = OntId(o) if o.prefix == 'FMA': ot = OntTerm(o) o = next( OntTerm.query(label=ot.label, prefix='UBERON')) data['meta']['organ'] = o if 'organ' not in data['meta'] or data['meta'][ 'organ'] == 'othertargets': o = self.lifters.organ_term if o: if isinstance(o, str): o = o, out = tuple() for _o in o: _o = OntId(_o) if _o.prefix == 'FMA': ot = OntTerm(_o) _o = next( OntTerm.query(label=ot.label, prefix='UBERON')) out += (_o, ) data['meta']['organ'] = out if 'protocol_url_or_doi' not in data['meta']: if self.lifters.protocol_uris: data['meta']['protocol_url_or_doi'] = tuple( self.lifters.protocol_uris) else: if not isinstance(data['meta']['protocol_url_or_doi'], tuple): _test_path = deque(['meta', 'protocol_url_or_doi']) if not [e for e in data['errors'] if e['path'] == _test_path]: raise ext.ShouldNotHappenError('urg') else: data['meta']['protocol_url_or_doi'] += tuple( self.lifters.protocol_uris) data['meta']['protocol_url_or_doi'] = tuple( sorted(set(data['meta']['protocol_url_or_doi']))) # ick return data
def _process(self, contributor): # get member if we can find them he = dat.HasErrors(pipeline_stage=self.__class__.__name__ + '.data') if 'contributor_name' in contributor and 'first_name' in contributor: name = contributor['contributor_name'] if ';' in name: msg = f'Bad symbol in name {name!r}' he.addError(msg) logd.error(msg) fn = contributor['first_name'] ln = contributor['last_name'] if ' ' in fn: fn, mn = fn.split(' ', 1) mn, _mn = mn.rstrip('.'), mn if mn != _mn: he.addError(f'Middle initials don\'t need periods :) {name!r}', logfunc=logd.error) contributor['middle_name'] = mn contributor['first_name'] = fn if ' ' in ln: msg = f'Malformed last_name {ln!r}' he.addError(msg) logd.error(msg) ln = ln.replace(' ', '-') failover = f'{fn}-{ln}' member = self.member(fn, ln) if member is not None: userid = OntId('https://api.blackfynn.io/users/' + member.id) contributor['blackfynn_user_id'] = userid else: member = None failover = 'no-orcid-no-name' log.warning(f'No name!' + lj(contributor)) orcid = None if 'contributor_orcid_id' in contributor: orcid = contributor['contributor_orcid_id'] if type(orcid) == str and 'orcid.org' in orcid: orcid = idlib.Orcid(orcid) # FIXME reloading from json if isinstance(orcid, idlib.Orcid): s = orcid else: # it's not an orcid or its a bad orcid orcid = None if orcid is None: if member is not None: s = userid else: log.debug(lj(contributor)) s = OntId(self.dsid + '/contributors/' + failover) contributor['id'] = s he.embedErrors(contributor) # lifting + adding if 'contributor_affiliation' in contributor: ca = contributor['contributor_affiliation'] maybe_ror = self.lifters.affiliations(ca) if maybe_ror is not None: contributor['affiliation'] = maybe_ror