def map(attr, predicate): cell = getattr(self, attr)() value = cell.value if value: for iri, label in list(self.sheet.convert_cell(value, predicate=predicate)): if ',' not in value and label != value: log.warning(f'label mismatch {label!r} != {value!r}') if iri is None: if label == 'bed nucleus of stria terminalis juxtacapsular nucleus': iri = OntTerm('UBERON:0011173', label='anterior division of bed nuclei of stria terminalis') else: log.debug(f'nothing found for {label}') continue if isinstance(iri, tuple): op, *rest = iri # TODO need combinators in future version for union/intersection of object out = (op, *(NegPhenotype(r, predicate) if isinstance(r, LacksObject) else Phenotype(r, predicate) for r in rest if r is not None)) yield out continue elif isinstance(iri, LacksObject): p = NegPhenotype(iri.asURIRef(), predicate) else: p = Phenotype(iri, predicate) yield p.asIndicator()
def mapCell(cls, cell, syns=False): search_prefixes = ( 'UBERON', 'CHEBI', 'PR', 'NCBITaxon', 'NCBIGene', 'ilxtr', 'NIFEXT', 'SAO', 'NLXMOL', 'BIRNLEX', ) if ':' in cell and ' ' not in cell: log.debug(cell) if 'http' in cell: if cell.startswith('http'): t = OntTerm(iri=cell) else: return None, None # garbage with http inline else: t = OntTerm(cell, exclude_prefix=( 'FMA', )) # FIXME need better error message in ontquery return t.u, t.label result = [ r for r in cls.sgv.findByTerm( cell, searchSynonyms=syns, prefix=search_prefixes) if not r['deprecated'] ] #printD(cell, result) if not result: log.debug(f'{cell}') maybe = list(cls.query(label=cell, exclude_prefix=('FMA', ))) if maybe: t = maybe[0] return t.u, t.label elif not syns: return cls.mapCell(cell, syns=True) else: return None, None elif len(result) > 1: #printD('WARNING', result) result = select_by_curie_rank(result) else: result = result[0] return rdflib.URIRef(result['iri']), result['labels'][0]
def neuron_existing(self): curie = self.curie().value if curie: id_ = OntTerm(curie).u match = self.sheet.existing.get(id_) if match: return match al = self.alignment_label().value return self.sheet.existing.get(al if al else self.label().value)
def neuron_existing(self): curie = self.curie().value if curie: id_ = OntTerm(curie).u match = self.sheet.existing.get(id_) if match: match.id_ = id_ return match al = self.alignment_label().value nrn = self.sheet.existing.get(al if al else self.label().value) if nrn and curie: nrn.id_ = id_ return nrn
def entailed_molecular_phenotypes(self): cell = self.exhasmolecularphenotype() labels = cell.value.split(',') # FIXME hack yield OntTerm(curie='ilxtr:GABAReceptor').asPhenotype() yield OntTerm(curie='ilxtr:glutamateReceptor').asPhenotype() for label in labels: label = label.strip() term = self.sheet.sgv.findByTerm(label) if term: ot = OntTerm(iri=term[0]['iri']) p = ot.asPhenotype() yield p if ot.curie == 'NLXMOL:1006001': yield OntTerm(curie='ilxtr:GABAReceptor').asPhenotype() elif ot.curie == 'SAO:1164727693': yield OntTerm(curie='ilxtr:glutamateReceptor').asPhenotype()
def mapCell(cls, cell, syns=False, predicate=None): search_prefixes = ('UBERON', 'CHEBI', 'PR', 'NCBIGene', 'NCBITaxon', 'ilxtr', 'NIFEXT', 'SAO', 'NLXMOL', 'BIRNLEX',) if predicate and predicate in Phenotype._molecular_predicates: # uberon syns pollute molecular results so move it to one before birnlex ub, *rest, b = search_prefixes search_prefixes = (*rest, ub, b) if cell == 'contralateral': return ilxtr.Contralateral, cell # XXX FIXME only BSPO has this right now elif cell.lower() == 'gaba receptor role': return ilxtr.GABAReceptor, cell if ':' in cell and ' ' not in cell: log.debug(cell) if 'http' in cell: if cell.startswith('http'): t = OntTerm(iri=cell) else: return None, None # garbage with http inline else: t = OntTerm(cell, exclude_prefix=('FMA',)) # FIXME need better error message in ontquery return t.u, t.label if cell in ('Vertebrata', ): # search syns syns = True def rank_mask(r): """ create a boolean array testing if the current entry starts with the prefixes in order and what you will get out is arrays where the nth element is true if the nth prefix is matched which will then be sorted by n 1 0 0 0 0 0 0 1 \\ 1 0 0 0 0 0 0 0 \\ 0 1 0 0 0 0 0 0 \\ 0 0 1 0 0 0 0 0 \\ 0 0 0 1 0 0 0 0 \\ 0 0 0 0 1 0 0 0 \\ """ # why did it take so long to think of this? return ( *(r['curie'].startswith(p) for p in search_prefixes), 'labels' in r and cell in r['labels'], ) result = sorted([r for r in cls.sgv.findByTerm(cell, searchSynonyms=syns, prefix=search_prefixes) if not r['deprecated']], key=rank_mask, reverse=True) #printD(cell, result) if not result: log.debug(f'{cell}') maybe = list(cls.query(label=cell, exclude_prefix=('FMA',))) if maybe: t = maybe[0] return t.u, t.label elif not syns: return cls.mapCell(cell, syns=True, predicate=predicate) else: return None, None elif len(result) > 1: #printD('WARNING', result) result = result[0] #select_by_curie_rank(result) else: result = result[0] return rdflib.URIRef(result['iri']), result['labels'][0]
def loop_internal(j, header, cell): nonlocal id nonlocal current_neuron nonlocal do_release notes = list(process_note(get_note(i + 1, j, self.cells_index))) # + 1 since headers is removed if notes and not header.startswith('has'): _predicate = self.convert_other(header) if cell: _object = rdflib.Literal(cell) # FIXME curies etc. else: _object = rdf.nil other_notes[_predicate, _object] = notes if header == 'curie': id = OntId(cell).u if cell else None return elif header == 'label': if id == OntId('NIFEXT:66').u: breakpoint() label_neuron = cell if cell in self.existing: current_neuron = self.existing[cell] elif cell: # TODO self.new.append(cell) else: raise ValueError(cell) # wat return elif header == 'Status': # TODO if cell == 'Yes': do_release = True elif cell == 'Maybe': pass elif cell == 'Not yet': pass elif cell == 'Delete': pass else: pass return elif header == 'PMID': # TODO return elif header == 'Other reference': # TODO return elif header == 'Other label': # TODO return elif header == 'definition': return # FIXME single space differences between the spreadsheet and the source if cell: definition_neuron = rdflib.Literal(cell) elif header == 'synonyms': if cell: synonyms_neuron = [rdflib.Literal(s.strip()) # FIXME bare comma is extremely dangerous for s in cell.split(',')] return elif header in self.skip: return objects = [] if cell: predicate = self.convert_header(header) if predicate is None: log.debug(f'{(header, cell, notes)}') for object, label in self.convert_cell(cell): if predicate in NeuronCUT._molecular_predicates: if isinstance(object, tuple): op, *rest = object rest = [OntTerm(o).asIndicator().URIRef for o in rest] object = op, *rest elif object: log.debug(f'{object!r}') object = OntTerm(object).asIndicator().URIRef if isinstance(label, tuple): # LogicalPhenotype case _err = [] for l in label: if self.lower_check(l, cell): _err.append((cell, label)) if _err: self.errors.extend(_err) else: objects.append(object) elif self.lower_check(label, cell): self.errors.append((cell, label)) elif str(id) == object: self.errors.append((header, cell, object, label)) object = None else: objects.append(object) if notes: # FIXME this is a hack to only attach to the last value # since we can't distinguish at the moment wat[predicate, object] = notes if object is not None: # object aka iri can be none if we don't find anything object_notes[object] = notes else: predicate_notes[predicate] = notes # FIXME it might also be simpler in some cases # to have this be object_notes[object] = notes # because we are much less likely to have the same # phenotype appear attached to the different dimensions # FIXME comma sep is weak here because the # reference is technically ambiguous # might be an argument for the denormalized form ... # or perhaps having another sheet for cases like that else: return if predicate and objects: for object in objects: # FIXME has layer location phenotype if isinstance(object, tuple): op, *rest = object pes = (Phenotype(r, predicate) for r in rest) # FIXME nonhomogenous phenotypes phenotypes.append(LogicalPhenotype(op, *pes)) elif object: phenotypes.append(Phenotype(object, predicate)) else: self.errors.append((object, predicate, cell)) elif objects: self.errors.append((header, objects)) else: self.errors.append((header, cell))