def get(self, uri): #juri = uri + '.json' logd.info(uri) log.debug('going to network for protocols') resp = requests.get(uri, headers=self._pio_header) #log.info(str(resp.request.headers)) if resp.ok: try: j = resp.json() # the api is reasonably consistent except BaseException as e: log.exception(e) breakpoint() raise e return j else: try: j = resp.json() sc = j['status_code'] em = j['error_message'] msg = f'protocol issue {uri} {resp.status_code} {sc} {em} {self.id!r}' logd.error(msg) self.addError(msg) # can't return here because of the cache except BaseException as e: log.exception(e) logd.error(f'protocol no access {uri} {self.id!r}')
def _derive(data, derives, source_key_optional=True, allow_empty=False): # OLD """ derives is a list with the following structure [[[source-path, ...], derive-function, [target-path, ...]], ...] """ # TODO this is an implementaiton of copy that has semantics for handling lists for source_path, function, target_paths in derives: source_prefixes = source_path[:-1] source_key = source_path[-1] source = data failed = False for i, node_key in enumerate(source_prefixes): log.debug(lj(source)) if node_key in source: source = source[node_key] else: msg = f'did not find {node_key} in {source.keys()}' if not i: log.error(msg) failed = True break raise exc.NoSourcePathError(msg) if isinstance(source, list) or isinstance(source, tuple): new_source_path = source_prefixes[i + 1:] + [source_key] new_target_paths = [tp[i + 1:] for tp in target_paths] new_derives = [(new_source_path, function, new_target_paths)] for sub_source in source: _DictTransformer.derive(sub_source, new_derives, source_key_optional=source_key_optional) return # no more to do here if failed: continue # sometimes things are missing we continue to others if source_key not in source: msg = f'did not find {source_key} in {source.keys()}' if source_key_optional: return logd.info(msg) else: raise exc.NoSourcePathError(msg) source_value = source[source_key] new_values = function(source_value) if len(new_values) != len(target_paths): log.debug(f'{source_paths} {target_paths}') raise TypeError(f'wrong number of values returned for {function}\n' f'was {len(new_values)} expect {len(target_paths)}') #temp = b'__temporary' #data[temp] = {} # bytes ensure no collisions for target_path, value in zip(target_paths, new_values): if (not allow_empty and (value is None or hasattr(value, '__iter__') and not len(value))): raise ValueError(f'value to add to {target_path} may not be empty!') adops.add(data, target_path, value, fail_on_exists=True)
def _get_protocol_json(self, uri): #juri = uri + '.json' logd.info(uri) pi = get_right_id(uri) if 'protocols.io' in pi: pioid = pi.slug # FIXME normalize before we ever get here ... log.info(pioid) else: msg = f'protocol uri is not from protocols.io {pi} {self.id}' logd.error(msg) self.addError(msg) return #uri_path = uri.rsplit('/', 1)[-1] apiuri = 'https://protocols.io/api/v3/protocols/' + pioid #'https://www.protocols.io/api/v3/groups/sparc/protocols' #apiuri = 'https://www.protocols.io/api/v3/filemanager/folders?top' #print(apiuri, header) log.debug('going to network for protocols') resp = requests.get(apiuri, headers=self._pio_header) #log.info(str(resp.request.headers)) if resp.ok: try: j = resp.json() # the api is reasonably consistent except BaseException as e: log.exception(e) breakpoint() raise e return j else: try: j = resp.json() sc = j['status_code'] em = j['error_message'] msg = f'protocol issue {uri} {resp.status_code} {sc} {em} {self.id!r}' logd.error(msg) self.addError(msg) # can't return here because of the cache except BaseException as e: log.exception(e) logd.error(f'protocol no access {uri} {self.id!r}')
def triples_gen(self): rm = self._source # FIXME there doesn't seem to be a section that tells me the name # of top level model so I have to know its name beforhand # the id is in the model, having the id in the resource map # prevents issues if these things get sent decoupled id = rm['id'] mid = id.replace(' ', '-') links = rm[id]['links'] #linknodes = [n for n in rm[id]['nodes'] if n['class'] == 'Link'] # visible confusion st = [] from_to = [] ot = None yield from self.apinatbase() for link in links: if 'conveyingType' in link: if link['conveyingType'] == 'ADVECTIVE': p_is = TEMP.isAdvectivelyConnectedTo p_from = TEMP.advectivelyConnectsFrom p_to = TEMP.advectivelyConnectsTo p_cmat = TEMP.advectivelyConnectsMaterial diffusive = False elif link['conveyingType'] == 'DIFFUSIVE': p_is = TEMP.isDiffusivelyConnectedTo p_from = TEMP.diffusivelyConnectsFrom p_to = TEMP.diffusivelyConnectsTo p_cmat = TEMP.diffusivelyConnectsMaterial diffusive = True else: log.critical(f'unhandled conveying type {link}') continue source = link['source'] target = link['target'] ok = True if len(from_to) == 2: # otherwise st = [] from_to = [] for i, e in enumerate((source, target)): ed = rm[e] if 'external' not in ed: if not i and from_to: # TODO make sure the intermediate ids match pass else: ok = False break else: st.append(e) from_to.append(OntId(ed['external'][0])) conveying = link['conveyingLyph'] cd = rm[conveying] if 'external' in cd: old_ot = ot ot = OntTerm(cd['external'][0]) yield ot.u, rdf.type, owl.Class yield ot.u, TEMP.internalId, rdflib.Literal(conveying) yield ot.u, rdfs.label, rdflib.Literal(ot.label) yield from self.materialTriples( ot.u, link, p_cmat) # FIXME locate this correctly if ok: u, d = from_to if st[0] == source: yield u, rdfs.label, rdflib.Literal( OntTerm(u).label) yield u, rdf.type, owl.Class yield from cmb.restriction.serialize( ot.u, p_from, u) if st[1] == target: yield d, rdfs.label, rdflib.Literal( OntTerm(d).label) yield d, rdf.type, owl.Class yield from cmb.restriction.serialize(ot.u, p_to, d) if old_ot is not None and old_ot != ot: yield from cmb.restriction.serialize( ot.u, p_from, old_ot.u) if diffusive: # we can try to hack this using named individuals # but it is not going to do exactly what is desired s_link = TEMP[f'ApiNATOMY/{mid}/{link["id"]}'] s_cd = TEMP[f'ApiNATOMY/{mid}/{cd["id"]}'] yield s_link, rdf.type, owl.NamedIndividual yield s_link, rdf.type, TEMP.diffusiveLink # FIXME I'm not sure these go in the model ... yield s_cd, rdf.type, owl.NamedIndividual if 'external' in cd and cd['external']: oid = OntId(cd['external'][0]) yield s_cd, rdf.type, oid.u ot = oid.asTerm() if ot.label: yield oid.u, rdfs.label, ot.label else: yield s_cd, rdf.type, TEMP.conveyingLyph for icd in cd['inCoalescences']: dcd = rm[icd] log.info(lj(dcd)) s_icd = TEMP[f'ApiNATOMY/{mid}/{dcd["id"]}'] yield s_cd, TEMP.partOfCoalescence, s_icd yield s_icd, rdf.type, owl.NamedIndividual yield s_icd, rdf.type, TEMP[ 'ApiNATOMY/Coalescence'] if 'external' in dcd and dcd['external']: oid = OntId(dcd['external'][0]) yield s_icd, rdf.type, oid.u ot = oid.asTerm() if ot.label: yield oid.u, rdfs.label, ot.label for lyphid in dcd['lyphs']: ild = rm[lyphid] log.info(lj(ild)) if 'external' in ild and ild['external']: yield s_icd, TEMP.hasLyphWithMaterial, OntId( ild['external'][0]) if not ok: logd.info(f'{source} {target} issue') continue for inid, e in zip(st, from_to): yield e.u, rdf.type, owl.Class yield e.u, rdfs.label, rdflib.Literal(OntTerm(e).label) yield e.u, TEMP.internalId, rdflib.Literal(inid) f, t = from_to yield from cmb.restriction.serialize(f.u, p_is, t.u)