def run_reasoner(self): graph = self._mis_graph() expanded_graph = self._mis_graph() [(graph.add(t), expanded_graph.add(t)) for t in self.triples()] closure = rdfc.OWLRL_Semantics rdfc.DeductiveClosure(closure).expand(expanded_graph) with open(auth.get_path('cache-path') / 'reasoned-curation-export.ttl', 'wb') as f: f.write(expanded_graph.serialize(format='nifttl'))
def __init__(self, *args, **kwargs): self._cache_path = auth.get_path('cache-path') / 'google_sheets' if not self._only_cache: try: if 'readonly' not in kwargs or kwargs['readonly']: # readonly=True is default so we take this branch if not set self._saf = auth.get_path( 'google-api-service-account-file-readonly') else: self._saf = auth.get_path( 'google-api-service-account-file-rw') except KeyError as e: log.warn(e) except Exception as e: log.exception(e) try: super().__init__(*args, **kwargs) finally: self._saf = None
def latest_ir(org_id=None): if org_id is None: org_id = auth.get('blackfynn-organization') export = Export(auth.get_path('export-path'), None, None, None, latest=True, org_id=org_id) return export.latest_ir
def setup(cls, creds_file=None): if creds_file is None: try: creds_file = auth.get_path('protocols-io-api-creds-file') except KeyError as e: raise TypeError('creds_file is a required argument' ' unless you have it in secrets') from e _pio_creds = get_protocols_io_auth(creds_file) cls._pio_header = QuietDict({'Authorization': 'Bearer ' + _pio_creds.token}) _inst = cls() for wants in cls._instance_wanted_by: wants._protocol_data = _inst
def _file_type_status_lookup(cls): import json # FIXME if not hasattr(cls, '_sigh_ftslu'): resources = auth.get_path('resources') with open(resources / 'mimetypes.json', 'rt') as f: classification = json.load(f) mimetypes = { mimetype: status for status, objs in classification.items() for obj in objs for mimetype in ( obj['mimetype'] if is_list_or_tuple(obj['mimetype']) else ( obj['mimetype'], )) } suffixes = { obj['suffix']: status for status, objs in classification.items() for obj in objs } cls._mimetypes_lu, cls._suffixes_lu = mimetypes, suffixes cls._sigh_ftslu = True return cls._mimetypes_lu, cls._suffixes_lu
class RorInst(URIInstrumentation, RorId): @property def data(self): return self._data(self.suffix) @cache(Path(auth.get_path('cache-path'), 'ror_json'), create=True) def _data(self, suffix): # TODO data endpoint prefix ?? vs data endpoint pattern ... resp = requests.get(RorId(prefix='ror.api', suffix=suffix)) if resp.ok: return resp.json() @property def name(self): return self.data['name'] label = name # map their schema to ours def asExternalId(self, id_class): eids = self.data['external_ids'] if id_class._ror_key in eids: eid_record = eids[id_class._ror_key] if eid_record['preferred']: eid = eid_record['preferred'] else: eid_all = eid_record['all'] if isinstance( eid_all, str ): # https://github.com/ror-community/ror-api/issues/53 eid = eid_all else: eid = eid_all[0] return id_class(eid) _type_map = { 'Education': TEMP.Institution, 'Healthcare': TEMP.Institution, 'Facility': TEMP.CoreFacility, 'Nonprofit': TEMP.Nonprofit, 'Other': TEMP.Institution, } @property def institutionTypes(self): if 'types' in self.data: for t in self.data['types']: if t == 'Other': log.info(self.label) yield self._type_map[t] else: log.critical(self.data) raise TypeError('wat') @property def synonyms(self): d = self.data # FIXME how to deal with type conversion an a saner way ... yield from [rdflib.Literal(s) for s in d['aliases']] yield from [rdflib.Literal(s) for s in d['acronyms']] yield from [ rdflib.Literal(l['label'], lang=l['iso639']) for l in d['labels'] ] @property def triples_gen(self): """ produce a triplified version of the record """ s = self.u a = rdf.type yield s, a, owl.NamedIndividual for o in self.institutionTypes: yield s, a, o yield s, rdfs.label, rdflib.Literal(self.label) for o in self.synonyms: yield s, NIFRID.synonym, o # FIXME this looses information about synonym type
raise exc.NotInProjectError(f'{project_path}') defaults = { o.name: o.value if o.argcount else None for o in parse_defaults(clidoc) } args = { 'server': True, '--raw': False, '--latest': True, '--sort-count-desc': True, '--project-path': project_path, '--tab-table': False, '<path>': [], '--verbose': False, '--export-path': auth.get_path('export-path'), '--partial': False, '--open': False, } options = Options(args, defaults) report = Report(options) # set report paths that would normally be populated from Main report.cwd = options.project_path report.project_path = options.project_path report.project_id = project_path.cache.id # FIXME should not have to do this manually? report.anchor = project_path.cache report.summary = Summary(options.project_path) report._timestamp = None # FIXME report._folder_timestamp = None # FIXME
class ProtocolData(dat.HasErrors): # this class is best used as a helper class not as a __call__ class _instance_wanted_by = PioInst, PioUserInst def __init__(self, id=None): # FIXME lots of ways to use this class ... self.id = id # still needed for the converters use case :/ # FIXME protocol data shouldn't need do know anything about # what dataset is using it, >_< super().__init__(pipeline_stage=self.__class__) def protocol(self, uri): return self._get_protocol_json(uri) __call__ = protocol @classmethod def setup(cls, creds_file=None): if creds_file is None: try: creds_file = auth.get_path('protocols-io-api-creds-file') except KeyError as e: raise TypeError('creds_file is a required argument' ' unless you have it in secrets') from e _pio_creds = get_protocols_io_auth(creds_file) cls._pio_header = QuietDict( {'Authorization': 'Bearer ' + _pio_creds.access_token}) _inst = cls() for wants in cls._instance_wanted_by: wants._protocol_data = _inst @classmethod def cache_path(cls): return config.protocol_cache_path @property def protocol_uris_resolved(self): if not hasattr(self, '_c_protocol_uris_resolved'): self._c_protocol_uris_resolved = list(self._protocol_uris_resolved) return self._c_protocol_uris_resolved @property def _protocol_uris_resolved(self): # FIXME quite slow ... for start_uri in self.protocol_uris: log.debug(start_uri) for end_uri in resolution_chain(start_uri): pass else: yield end_uri @property def protocol_annotations(self): for uri in self.protocol_uris_resolved: yield from protc.byIri(uri, prefix=True) @property def protocol_jsons(self): for uri in self.protocol_uris_resolved: yield self._get_protocol_json(uri) @cache(auth.get_path('cache-path') / 'protocol_json', create=True) def get(self, uri): #juri = uri + '.json' logd.info(uri) log.debug('going to network for protocols') resp = requests.get(uri, headers=self._pio_header) #log.info(str(resp.request.headers)) if resp.ok: try: j = resp.json() # the api is reasonably consistent except BaseException as e: log.exception(e) breakpoint() raise e return j else: try: j = resp.json() sc = j['status_code'] em = j['error_message'] msg = f'protocol issue {uri} {resp.status_code} {sc} {em} {self.id!r}' logd.error(msg) self.addError(msg) # can't return here because of the cache except BaseException as e: log.exception(e) logd.error(f'protocol no access {uri} {self.id!r}') @cache(auth.get_path('cache-path') / 'protocol_json', create=True) def _get_protocol_json(self, uri): #juri = uri + '.json' logd.info(uri) pi = get_right_id(uri) if 'protocols.io' in pi: pioid = pi.slug # FIXME normalize before we ever get here ... log.info(pioid) else: msg = f'protocol uri is not from protocols.io {pi} {self.id}' logd.error(msg) self.addError(msg) return #uri_path = uri.rsplit('/', 1)[-1] apiuri = 'https://www.protocols.io/api/v3/protocols/' + pioid #'https://www.protocols.io/api/v3/groups/sparc/protocols' #apiuri = 'https://www.protocols.io/api/v3/filemanager/folders?top' #print(apiuri, header) log.debug('going to network for protocols') resp = requests.get(apiuri, headers=self._pio_header) #log.info(str(resp.request.headers)) if resp.ok: try: j = resp.json() # the api is reasonably consistent except BaseException as e: log.exception(e) breakpoint() raise e return j else: try: j = resp.json() sc = j['status_code'] em = j['error_message'] msg = f'protocol issue {uri} {resp.status_code} {sc} {em} {self.id!r}' logd.error(msg) self.addError(msg) # can't return here because of the cache except BaseException as e: log.exception(e) logd.error(f'protocol no access {uri} {self.id!r}')
class OrganData: """ retrieve SPARC investigator data """ url = ('https://commonfund.nih.gov/sites/default/' 'files/sparc_nervous_system_graphic/main.html') def organ(self, award_number): if award_number in self.manual and award_number not in self.sourced: log.warning(f'used manual organ mapping for {award_number}') try: return self.award_to_organ[award_number] except KeyError as e: logd.error(f'bad award_number {award_number}') __call__ = organ organ_lookup = { 'bladder': OntId('FMA:15900'), 'brain': OntId('UBERON:0000955'), #'computer': OntId(''), 'heart': OntId('FMA:7088'), 'kidneys': OntId('FMA:7203'), 'largeintestine': OntId('FMA:7201'), 'liver': OntId('FMA:7197'), 'lung': OntId('FMA:7195'), 'malerepro': OntId('UBERON:0000079'), #'othertargets': OntId(''), 'pancreas': OntId('FMA:7198'), 'smallintestine': OntId('FMA:7200'), 'spleen': OntId('FMA:7196'), 'stomach': OntId('FMA:7148'), 'vagus nerve': OntId('FMA:5731'), #'uterus': OntId('') '': None, } cache = auth.get_path('cache-path') / 'sparc-award-by-organ.json' old_cache = auth.get_path('cache-path') / 'award-mappings-old-to-new.json' def __init__(self, path=config.organ_html_path, organs_sheet=None): # FIXME bad passing in organs from bs4 import BeautifulSoup self._BeautifulSoup = BeautifulSoup self.path = path if not self.cache.exists(): self.overview() with open(self.cache, 'wt') as f: json.dump(self.normalized, f) with open(self.old_cache, 'wt') as f: json.dump(self.former_to_current, f) else: with open(self.cache, 'rt') as f: self.normalized = json.load(f) with open(self.old_cache, 'rt') as f: self.former_to_current = json.load(f) if organs_sheet is not None: self._org = organs_sheet bc = self._org.byCol self.manual = { award if award else (award_manual if award_manual else None): [OntId(t) for t in organ_term.split(' ') if t] for award, award_manual, organ_term in zip( bc.award, bc.award_manual, bc.organ_term) if organ_term } else: self.manual = {} self.sourced = {v: k for k, vs in self.normalized.items() for v in vs} self.award_to_organ = { **self.sourced, **self.manual } # manual override def overview(self): if self.path.exists(): with open(self.path, 'rb') as f: soup = self._BeautifulSoup(f.read(), 'lxml') else: resp = requests.get(self.url) soup = self._BeautifulSoup(resp.content, 'lxml') self.raw = {} self.former_to_current = {} for bsoup in soup.find_all( 'div', {'id': lambda v: v and v.endswith('-bubble')}): organ, *_rest = bsoup['id'].split('-') logd.debug(_rest) award_list = self.raw[organ] = [] for asoup in bsoup.find_all('a'): href = asoup['href'] log.debug(href) parts = urlparse(href) query = parse_qs(parts.query) if 'projectnumber' in query: award_list.extend(query['projectnumber']) elif 'aid' in query: #aid = [int(a) for a in query['aid']] #json = self.reporter(aid) award, former = self.reporter(href) award_list.append(award) if former is not None: award_list.append( former) # for this usecase this is ok self.former_to_current[former] = award elif query: log.debug(lj(query)) self.former_to_current = { nml.NormAward(nml.NormAward(k)): nml.NormAward(nml.NormAward(v)) for k, v in self.former_to_current.items() } self._normalized = {} self.normalized = {} for frm, to in ((self.raw, self._normalized), (self._normalized, self.normalized)): for organ, awards in frm.items(): if organ in self.organ_lookup: organ = self.organ_lookup[organ].iri to[organ] = [nml.NormAward(a) for a in awards] def _reporter(self, aids): # can't seem to get this to cooperate base = ('https://api.federalreporter.nih.gov' '/v1/projects/FetchBySmApplIds') resp = requests.post(base, json=aids, headers={ 'Accept': 'application/json', 'Content-Type': 'application/json' }) breakpoint() return resp.json() def reporter(self, href): resp = requests.get(href) soup = self._BeautifulSoup(resp.content, 'lxml') #id = soup.find_all('span', {'id': 'spnPNUMB'}) table = soup.find_all('table', {'summary': 'Details'}) if table: text = table[0].find_all('td')[1].text.strip() if 'Former' in text: award, rest = text.split(' ', 1) rest, former = text.rsplit(' ', 1) return [award, former] else: return [text, None] else: return ['', None]