def _process(self, contributor): # get member if we can find them he = dat.HasErrors(pipeline_stage=self.__class__.__name__ + '.data') if 'name' in contributor and 'first_name' in contributor: name = contributor['name'] if ';' in name: msg = f'Bad symbol in name {name!r}' he.addError(msg) logd.error(msg) fn = contributor['first_name'] ln = contributor['last_name'] if ' ' in fn: fn, mn = fn.split(' ', 1) contributor['middle_name'] = mn contributor['first_name'] = fn if ' ' in ln: msg = f'Malformed last_name {ln!r}' he.addError(msg) logd.error(msg) ln = ln.replace(' ', '-') failover = f'{fn}-{ln}' member = self.member(fn, ln) if member is not None: userid = OntId('https://api.blackfynn.io/users/' + member.id) contributor['blackfynn_user_id'] = userid else: member = None failover = 'no-orcid-no-name' log.warning(f'No name!' + lj(contributor)) orcid = None if 'contributor_orcid_id' in contributor: orcid = contributor['contributor_orcid_id'] if type(orcid) == str and 'orcid.org' in orcid: orcid = OrcidId(orcid) # FIXME reloading from json if isinstance(orcid, OrcidId): s = orcid else: # it's not an orcid or its a bad orcid orcid = None if orcid is None: if member is not None: s = userid else: log.debug(lj(contributor)) s = OntId(self.dsid + '/contributors/' + failover) contributor['id'] = s he.embedErrors(contributor)
def _indexes(cls, data): """ compute submission and curation error indexes """ errors = get_all_errors(data) submission_errors = [] curation_errors = [] for error in reversed(errors): if error in submission_errors or error in curation_errors: log.debug('error detected multiple times not counting ' 'subsequent occurances' + lj(error)) continue stage = error['pipeline_stage'] message = error['message'] if stage in cls._submission: submission_errors.append(error) elif stage in cls._curation: curation_errors.append(error) else: raise ValueError(f'Unhandled stage {stage} {message}') si = len(submission_errors) ci = len(curation_errors) data['status'] = {} data['status']['submission_index'] = si data['status']['curation_index'] = ci data['status']['error_index'] = si + ci data['status']['submission_errors'] = submission_errors data['status']['curation_errors'] = curation_errors return si + ci
def triples_gen(self): rm = self._source # FIXME there doesn't seem to be a section that tells me the name # of top level model so I have to know its name beforhand # the id is in the model, having the id in the resource map # prevents issues if these things get sent decoupled id = rm['id'] mid = id.replace(' ', '-') links = rm[id]['links'] #linknodes = [n for n in rm[id]['nodes'] if n['class'] == 'Link'] # visible confusion st = [] from_to = [] ot = None yield from self.apinatbase() for link in links: if 'conveyingType' in link: if link['conveyingType'] == 'ADVECTIVE': p_is = TEMP.isAdvectivelyConnectedTo p_from = TEMP.advectivelyConnectsFrom p_to = TEMP.advectivelyConnectsTo p_cmat = TEMP.advectivelyConnectsMaterial diffusive = False elif link['conveyingType'] == 'DIFFUSIVE': p_is = TEMP.isDiffusivelyConnectedTo p_from = TEMP.diffusivelyConnectsFrom p_to = TEMP.diffusivelyConnectsTo p_cmat = TEMP.diffusivelyConnectsMaterial diffusive = True else: log.critical(f'unhandled conveying type {link}') continue source = link['source'] target = link['target'] ok = True if len(from_to) == 2: # otherwise st = [] from_to = [] for i, e in enumerate((source, target)): ed = rm[e] if 'external' not in ed: if not i and from_to: # TODO make sure the intermediate ids match pass else: ok = False break else: st.append(e) from_to.append(OntId(ed['external'][0])) conveying = link['conveyingLyph'] cd = rm[conveying] if 'external' in cd: old_ot = ot ot = OntTerm(cd['external'][0]) yield ot.u, rdf.type, owl.Class yield ot.u, TEMP.internalId, rdflib.Literal(conveying) yield ot.u, rdfs.label, rdflib.Literal(ot.label) yield from self.materialTriples( ot.u, link, p_cmat) # FIXME locate this correctly if ok: u, d = from_to if st[0] == source: yield u, rdfs.label, rdflib.Literal( OntTerm(u).label) yield u, rdf.type, owl.Class yield from cmb.restriction.serialize( ot.u, p_from, u) if st[1] == target: yield d, rdfs.label, rdflib.Literal( OntTerm(d).label) yield d, rdf.type, owl.Class yield from cmb.restriction.serialize(ot.u, p_to, d) if old_ot is not None and old_ot != ot: yield from cmb.restriction.serialize( ot.u, p_from, old_ot.u) if diffusive: # we can try to hack this using named individuals # but it is not going to do exactly what is desired s_link = TEMP[f'ApiNATOMY/{mid}/{link["id"]}'] s_cd = TEMP[f'ApiNATOMY/{mid}/{cd["id"]}'] yield s_link, rdf.type, owl.NamedIndividual yield s_link, rdf.type, TEMP.diffusiveLink # FIXME I'm not sure these go in the model ... yield s_cd, rdf.type, owl.NamedIndividual if 'external' in cd and cd['external']: oid = OntId(cd['external'][0]) yield s_cd, rdf.type, oid.u ot = oid.asTerm() if ot.label: yield oid.u, rdfs.label, ot.label else: yield s_cd, rdf.type, TEMP.conveyingLyph for icd in cd['inCoalescences']: dcd = rm[icd] log.info(lj(dcd)) s_icd = TEMP[f'ApiNATOMY/{mid}/{dcd["id"]}'] yield s_cd, TEMP.partOfCoalescence, s_icd yield s_icd, rdf.type, owl.NamedIndividual yield s_icd, rdf.type, TEMP[ 'ApiNATOMY/Coalescence'] if 'external' in dcd and dcd['external']: oid = OntId(dcd['external'][0]) yield s_icd, rdf.type, oid.u ot = oid.asTerm() if ot.label: yield oid.u, rdfs.label, ot.label for lyphid in dcd['lyphs']: ild = rm[lyphid] log.info(lj(ild)) if 'external' in ild and ild['external']: yield s_icd, TEMP.hasLyphWithMaterial, OntId( ild['external'][0]) if not ok: logd.info(f'{source} {target} issue') continue for inid, e in zip(st, from_to): yield e.u, rdf.type, owl.Class yield e.u, rdfs.label, rdflib.Literal(OntTerm(e).label) yield e.u, TEMP.internalId, rdflib.Literal(inid) f, t = from_to yield from cmb.restriction.serialize(f.u, p_is, t.u)
def _process(self, contributor): # get member if we can find them he = dat.HasErrors(pipeline_stage=self.__class__.__name__ + '.data') if 'contributor_name' in contributor and 'first_name' in contributor: name = contributor['contributor_name'] if ';' in name: msg = f'Bad symbol in name {name!r}' he.addError(msg) logd.error(msg) fn = contributor['first_name'] ln = contributor['last_name'] if ' ' in fn: fn, mn = fn.split(' ', 1) mn, _mn = mn.rstrip('.'), mn if mn != _mn: he.addError(f'Middle initials don\'t need periods :) {name!r}', logfunc=logd.error) contributor['middle_name'] = mn contributor['first_name'] = fn if ' ' in ln: msg = f'Malformed last_name {ln!r}' he.addError(msg) logd.error(msg) ln = ln.replace(' ', '-') failover = f'{fn}-{ln}' member = self.member(fn, ln) if member is not None: userid = OntId('https://api.blackfynn.io/users/' + member.id) contributor['blackfynn_user_id'] = userid else: member = None failover = 'no-orcid-no-name' log.warning(f'No name!' + lj(contributor)) orcid = None if 'contributor_orcid_id' in contributor: orcid = contributor['contributor_orcid_id'] if type(orcid) == str and 'orcid.org' in orcid: orcid = idlib.Orcid(orcid) # FIXME reloading from json if isinstance(orcid, idlib.Orcid): s = orcid else: # it's not an orcid or its a bad orcid orcid = None if orcid is None: if member is not None: s = userid else: log.debug(lj(contributor)) s = OntId(self.dsid + '/contributors/' + failover) contributor['id'] = s he.embedErrors(contributor) # lifting + adding if 'contributor_affiliation' in contributor: ca = contributor['contributor_affiliation'] maybe_ror = self.lifters.affiliations(ca) if maybe_ror is not None: contributor['affiliation'] = maybe_ror
def _indexes(cls, data): """ compute submission and curation error indexes """ errors = get_all_errors(data) submission_errors = [] curation_errors = [] for error in reversed(errors): if error in submission_errors or error in curation_errors: log.debug('error detected multiple times not counting ' 'subsequent occurances' + lj(error)) continue if 'blame' not in error: breakpoint() blame = error['blame'] stage = error['pipeline_stage'] message = error['message'] blamed = False if blame is not None: if blame in cls._blame: blame_target = cls._blame[blame] if blame_target == cls._blame_stage: pass elif blame_target == cls._blame_everyone: submission_errors.append(error) curation_errors.append(error) blamed = True elif blame_target == cls._blame_submission: submission_errors.append(error) blamed = True elif blame_target == cls._blame_curation: curation_errors.append(error) blamed = True else: raise ValueError(f'Unhandled blame target {blame_target}\n{message}') else: raise ValueError(f'Unhandled blame type {blame}\n{message}') if stage in cls._submission: if not blamed: submission_errors.append(error) elif stage in cls._curation: if not blamed: curation_errors.append(error) else: if blame not in ('pipeline', 'submission', 'debug'): raise ValueError(f'Unhandled stage {stage}\n{message}') si = len(submission_errors) ci = len(curation_errors) if 'status' not in data: data['status'] = {} data['status']['submission_index'] = si data['status']['curation_index'] = ci data['status']['error_index'] = si + ci data['status']['submission_errors'] = submission_errors data['status']['curation_errors'] = curation_errors return si + ci