class SwansonLabels(ParcOnt): # FIXME not labels... filename = 'swanson' name = 'Swanson 2014 partonomies' shortname = 'swannt' imports = parcCore, prefixes = {**makePrefixes('NIFRID', 'ilxtr', 'prov'), 'swanr':interlex_namespace('swanson/uris/readable/'), 'SWAN':interlex_namespace('swanson/uris/neuroanatomical-terminology/terms/'), 'SWAA':interlex_namespace('swanson/uris/neuroanatomical-terminology/appendix/'),} sources = SwansonAppendix, namespace = prefixes['SWAN'] root = LabelRoot(iri=nsExact(namespace), # FIXME this is not really a label in the strict sense label='Swanson label root', shortname=shortname, definingArtifacts=(s.artifact.iri for s in sources),) def _triples(self): for s, p, o in swanson().g: #if p != rdf.type and o != owl.Ontology: if s != rdflib.URIRef('http://ontology.neuinfo.org/NIF/ttl/generated/swanson_hierarchies.ttl'): if p == rdfs.subClassOf and o == ilxtr.swansonBrainRegionConcept: yield s, p, self.root.iri elif p == rdfs.label: yield s, p, Label(label=o, labelRoot=self.root).rdfs_label yield s, skos.prefLabel, o else: yield s, p, o
def __init__(self, input): prefixes = {**{'JAX': 'http://jaxmice.jax.org/strain/', 'MMRRC': 'http://www.mmrrc.org/catalog/getSDS.jsp?mmrrc_id=', 'AIBS': 'http://api.brain-map.org/api/v2/data/TransgenicLine/'}, **makePrefixes('definition', 'ilxtr', 'owl')} self.g = makeGraph('transgenic-lines', prefixes=prefixes) self.neuron_data = input
class PaxRegion(RegionsBase): __pythonOnly = True # TODO path = 'ttl/generated/parcellation/' filename = 'paxinos-rat-regions' name = 'Paxinos & Watson Rat Parcellation Regions' shortname = 'paxratr' comment = ('Intersection between labels and atlases for all regions ' 'delineated using Paxinos and Watson\'s methodology.') prefixes = {**makePrefixes('NIFRID', 'ilxtr', 'prov', 'ILXREPLACE')} # sources need to go in the order with which we want the labels to take precedence (ie in this case 6e > 4e) #sources = PaxSrAr_6(), PaxSr_6(), PaxSrAr_4(), PaxTree_6() # tree has been successfully used for crossreferencing, additional terms need to be left out at the moment (see in_tree_not_in_six) root = RegionRoot( iri=TEMP[ 'FIXME'], # FIXME these should probably be EquivalentTo Parcellation Region HasLabel some label HasAtlas some atlas... label='Paxinos rat parcellation region root', shortname=shortname, ) # atlas version # label identifier # figures things = {} @classmethod def addthing(cls, thing, value): cls.things[thing] = value
class PaxMouseLabels(PaxLabels): """ Compilation of all labels used to name mouse brain regions in atlases created using Paxinos and Franklin\'s methodology.""" # TODO FIXME align indexes where possible to paxrat??? filename = 'paxinos-mus-labels' name = 'Paxinos & Franklin Mouse Parcellation Labels' shortname = 'paxmus' namespace = PAXMUS prefixes = { **makePrefixes('NIFRID', 'ilxtr', 'prov', 'dcterms'), 'PAXMUS': str(PAXMUS), 'paxmusver': str(paxmusver), } sources = PaxMFix, PaxMSrAr_2, PaxMSrAr_3 root = LabelRoot( iri=nsExact(namespace), # PAXMUS['0'], label='Paxinos mouse parcellation label root', shortname=shortname, definingArtifactsS=(Artifacts.PaxMouseAt.iri, ), ) _merge = { '4/5Cb': '4&5Cb', '5N': 'Mo5', '12N': '12', 'AngT': 'Ang', 'ANS': 'Acc', 'ASt': 'AStr', 'hif': 'hf', 'MnM': 'MMn', 'MoDG': 'Mol', 'och': 'ox', 'PHA': 'PH', # FIXME PH is reused in 3rd 'ST': 'BST', 'STIA': 'BSTIA', 'STLD': 'BSTLD', 'STLI': 'BSTLI', 'STLJ': 'BSTLJ', 'STLP': 'BSTLP', 'STLV': 'BSTLV', 'STMA': 'BSTMA', 'STMP': 'BSTMP', 'STMPI': 'BSTMPI', 'STMPL': 'BSTMPL', 'STMPM': 'BSTMPM', 'STMV': 'BSTMV', 'STS': 'BSTS', }
def main(): source = 'https://raw.githubusercontent.com/BlueBrain/nat/master/nat/data/modelingDictionary.csv' delimiter = ';' resp = requests.get(source) rows = [ r for r in csv.reader(resp.text.split('\n'), delimiter=delimiter) if r and r[0][0] != '#' ] header = [ 'Record_ID', 'parent_category', 'name', 'description', 'required_tags' ] PREFIXES = makePrefixes('owl', 'skos', 'ILX', 'definition') graph = makeGraph('measures', prefixes=PREFIXES) class nat(rowParse): def Record_ID(self, value): print(value) self.old_id = value self._id = TEMP[value] def parent_category(self, value): self.super_old_id = value self.super_id = TEMP[value] def name(self, value): self.hidden = value self.label = value.replace('_', ' ') def description(self, value): self.definition = value def required_tags(self, value): pass def _row_post(self): graph.add_class(self._id, self.super_id, label=self.label) graph.add_trip(self._id, 'skos:hiddenLabel', self.hidden) graph.add_trip(self._id, 'definition:', self.definition) asdf = nat(rows, header) graph.write() if __name__ == '__main__': breakpoint()
def main(): import rdflib from pyontutils.core import makeGraph, makePrefixes, log from pyontutils.config import auth ub = auth.get_path('ontology-local-repo') / 'ttl/bridge/uberon-bridge.ttl' ncrb = auth.get_path( 'ontology-local-repo') / 'ttl/NIF-Neuron-Circuit-Role-Bridge.ttl' if not ub.exists() or not ncrb.exists(): # just skip this if we can't file the files log.warning(f'missing file {ub} or {ncrb}') return graph = rdflib.Graph() graph.parse(ub.as_posix(), format='turtle') graph.parse(ncrb.as_posix(), format='ttl') ecgraph = rdflib.Graph() oec = EquivalentClass() test = tuple(oec.parse(graph=graph)) ft = oc_.full_combinator(test[0][0], test[0][1]) ftng = makeGraph('thing3', prefixes=makePrefixes('owl', 'TEMP')) *ft.serialize(ftng.g), ftng.write() _roundtrip = list(test[0][1](test[0][0])) roundtrip = oc_(test[0][0], test[0][1]) # FIXME not quite there yet... for t in roundtrip: ecgraph.add(t) ecng = makeGraph('thing2', graph=ecgraph, prefixes=makePrefixes('owl', 'TEMP')) ecng.write() if __name__ == '__main__': breakpoint() return r = Restriction( rdfs.subClassOf) #, scope=owl.allValuesFrom)#NIFRID.has_proper_part) l = tuple(r.parse(graph=graph)) for t in r.triples: graph.remove(t) ng = makeGraph('thing', graph=graph) ng.write() #print(l) restriction = Restriction(None) #rdf.first) ll = List(lift_rules={owl.Restriction: restriction}) trips = tuple(ll.parse(graph=graph)) #subClassOf = PredicateCombinator(rdfs.subClassOf) # TODO should be able to do POCombinator(rdfs.subClassOf, 0bjectCombinator) subClassOf = POCombinator(rdfs.subClassOf, ObjectCombinator) superDuperClass = subClassOf( TEMP.superDuperClass) # has to exist prior to triples ec = oec( TEMP.ec1, TEMP.ec2, restriction(TEMP.predicate0, TEMP.target1), restriction(TEMP.predicate1, TEMP.target2), ) egraph = rdflib.Graph() acombinator = annotation((TEMP.testSubject, rdf.type, owl.Class), (TEMP.hoh, 'FUN')) ft = flattenTriples(( acombinator((TEMP.annotation, 'annotation value')), acombinator((TEMP.anotherAnnotation, 'annotation value again')), oc_(TEMP.c1, superDuperClass), oc_(TEMP.c2, superDuperClass), oc_(TEMP.c3, superDuperClass), oc_(TEMP.c4, superDuperClass), oc_(TEMP.c5, superDuperClass), oc_(TEMP.wat, subClassOf(TEMP.watParent)), oc_(TEMP.testSubject), ec(TEMP.testSubject), oc_(TEMP.more, oec(TEMP.ec3, restriction(TEMP.predicate10, TEMP.target10))), ), ) [egraph.add(t) for t in ft] eng = makeGraph('thing1', graph=egraph, prefixes=makePrefixes('owl', 'TEMP')) eng.write() if __name__ == '__main__': breakpoint()
from pyontutils.core import NIFRID, oboInOwl from IPython import embed sgg = Graph(cache=True) sgv = Vocabulary(cache=True) Query = namedtuple('Query', ['root', 'relationshipType', 'direction', 'depth']) CON = oboInOwl.consider DBX = oboInOwl.hasDbXref # FIXME also behaves as objectProperty :/ AID = oboInOwl.hasAlternativeId IRBC = NIFRID.isReplacedByClass PREFIXES = makePrefixes( 'UBERON', 'ro', 'owl', 'skos', ) NIFPREFIXES = makePrefixes( 'NIFGA', 'oboInOwl', 'replacedBy', ) NIFPREFIXES.update(PREFIXES) nifga_path = devconfig.ontology_local_repo + '/ttl/NIF-GrossAnatomy.ttl' uberon_path = devconfig.ontology_local_repo + '/ttl/external/uberon.owl' uberon_bridge_path = 'http://purl.obolibrary.org/obo/uberon/bridge/uberon-bridge-to-nifstd.owl' #bridge_path = os.path.expanduser('~/git/NIF-Ontology/ttl/uberon-bridge-to-nifstd.ttl') # scigraph's got us
class FSL(LabelsBase): """ Ontology file containing labels from the FMRIB Software Library (FSL) atlases collection. All identifiers use the number of the index specified in the source xml file. """ path = 'ttl/generated/parcellation/' filename = 'fsl' name = 'Terminologies from FSL atlases' shortname = 'fsl' imports = parcCore, prefixes = { **makePrefixes('ilxtr'), **ParcOnt.prefixes, 'FSLATS': str(FSLATS), } sources = tuple() # set by prepare() roots = tuple() # set by prepare() class Artifacts(Collector): """ Artifacts for FSL """ collects = Artifact def _triples(self): for source in self.sources: for index, label in source: iri = source.root.namespace[str(index)] yield from Label(labelRoot=source.root, label=label, iri=iri) @classmethod def prepare(cls): ATLAS_PATH = '/usr/share/fsl/data/atlases/' shortnames = { 'JHU White-Matter Tractography Atlas': 'JHU WM', 'Oxford-Imanova Striatal Structural Atlas': 'OISS', 'Talairach Daemon Labels': 'Talairach', 'Subthalamic Nucleus Atlas': 'SNA', 'JHU ICBM-DTI-81 White-Matter Labels': 'JHU ICBM WM', 'Juelich Histological Atlas': 'Juelich', 'MNI Structural Atlas': 'MNI Struct', } prefixes = { 'Cerebellar Atlas in MNI152 space after normalization with FLIRT': 'CMNIfl', 'Cerebellar Atlas in MNI152 space after normalization with FNIRT': 'CMNIfn', 'Sallet Dorsal Frontal connectivity-based parcellation': 'DFCBP', 'Neubert Ventral Frontal connectivity-based parcellation': 'VFCBP', 'Mars Parietal connectivity-based parcellation': 'PCBP', } for xmlfile in glob.glob(ATLAS_PATH + '*.xml'): filename = os.path.splitext(os.path.basename(xmlfile))[0] tree = etree.parse(xmlfile) parcellation_name = tree.xpath('header//name')[0].text # namespace namespace = rdflib.Namespace(FSLATS[filename + '/labels/']) # shortname shortname = tree.xpath('header//shortname') if shortname: shortname = shortname[0].text else: shortname = shortnames[parcellation_name] artifact_shortname = shortname shortname = shortname.replace(' ', '') # Artifact artifact = Terminology( iri=FSLATS[filename], label=parcellation_name, docUri='http://fsl.fmrib.ox.ac.uk/fsl/fslwiki/Atlases', species=NCBITaxon['9606'], devstage=UBERON[ '0000113'], # FIXME mature vs adult vs when they actually did it... region=UBERON['0000955'], shortname=artifact_shortname) setattr(cls.Artifacts, shortname, artifact) # LabelRoot root = LabelRoot(iri=nsExact(namespace), label=parcellation_name + ' label root', shortname=shortname, definingArtifacts=(artifact.iri, )) root.namespace = namespace cls.roots += root, # prefix if parcellation_name in prefixes: prefix = 'fsl' + prefixes[parcellation_name] else: prefix = 'fsl' + shortname cls.prefixes[prefix] = root.iri # Source @classmethod def loadData(cls, _tree=tree): out = [] for node in _tree.xpath('data//label'): index, label = node.get('index'), node.text out.append((index, label)) return out source = type( 'FSLsource_' + shortname.replace(' ', '_'), (Source, ), dict( iri=rdflib.URIRef('file://' + xmlfile), source=xmlfile, source_original=True, artifact=artifact, root= root, # used locally since we have more than one root per ontology here loadData=loadData)) cls.sources += source, super().prepare()
def main(): abagraph = rdflib.Graph() abagraph.parse( (gitf / 'NIF-Ontology/ttl/generated/parcellation/mbaslim.ttl').as_posix(), format='turtle') abagraph.parse( (gitf / 'NIF-Ontology/ttl/bridge/aba-bridge.ttl').as_posix(), format='turtle') nses = {k: rdflib.Namespace(v) for k, v in abagraph.namespaces()} #nses['ABA'] = nses['MBA'] # enable quick check against the old xrefs syn_iri = nses['NIFRID']['synonym'] acro_iri = nses['NIFRID']['acronym'] abasyns = {} abalabs = {} abaacro = {} ABA_PREFIX = 'MBA:' #ABA_PREFIX = 'ABA:' # all bad for sub in abagraph.subjects(rdflib.RDF.type, rdflib.OWL.Class): if not sub.startswith(nses[ABA_PREFIX[:-1]]['']): continue subkey = ABA_PREFIX + sub.rsplit('/', 1)[1] sub = rdflib.URIRef(sub) abalabs[subkey] = [ o for o in abagraph.objects(rdflib.URIRef(sub), rdflib.RDFS.label) ][0].toPython() syns = [] for s in abagraph.objects(sub, syn_iri): syns.append(s.toPython()) abasyns[subkey] = syns abaacro[subkey] = [ a.toPython() for a in abagraph.objects(sub, acro_iri) ] url = 'http://api.brain-map.org/api/v2/tree_search/Structure/997.json?descendants=true' resp = requests.get(url).json() ids = set([ABA_PREFIX + str(r['id']) for r in resp['msg']]) Query = namedtuple('Query', ['id', 'relationshipType', 'direction', 'depth']) #uberon = Query('UBERON:0000955', 'http://purl.obolibrary.org/obo/BFO_0000050', 'INCOMING', 9) uberon = Query('UBERON:0001062', 'subClassOf', 'INCOMING', 10) # anatomical entity output = g.getNeighbors(**uberon._asdict()) # TODO figure out the superclass that can actually get all the brain parts meta_edge = 'http://www.geneontology.org/formats/oboInOwl#hasDbXref' u_a_map = {} a_u_map = {} uberon_syns = {} uberon_labs = {} syn_types = { 'http://www.geneontology.org/formats/oboInOwl#hasExactSynonym': 'Exact', 'http://www.geneontology.org/formats/oboInOwl#hasNarrowSynonym': 'Narrow', 'http://www.geneontology.org/formats/oboInOwl#hasRelatedSynonym': 'Related', 'http://www.geneontology.org/formats/oboInOwl#hasBroadSynonym': 'Broad', } for node in output['nodes']: curie = node['id'] uberon_labs[curie] = node['lbl'] uberon_syns[curie] = {} if 'synonym' in node['meta']: for stype in syn_types: if stype in node['meta']: uberon_syns[curie][stype] = node['meta'][stype] if meta_edge in node['meta']: xrefs = node['meta'][meta_edge] mba_ref = [r for r in xrefs if r.startswith(ABA_PREFIX)] u_a_map[curie] = mba_ref if mba_ref: for mba in mba_ref: a_u_map[mba] = curie else: u_a_map[curie] = None def obo_output( ): # oh man obo_io is a terrible interface for writing obofiles :/ for aid in abalabs: # set aids not in uberon to none if aid not in a_u_map: a_u_map[aid] = None e = OboFile() n = OboFile() r = OboFile() b = OboFile() name_order = 'Exact', 'Narrow', 'Related', 'Broad' rev = {v: k for k, v in syn_types.items()} # sillyness syn_order = [rev[n] for n in name_order] files_ = { rev['Broad']: b, rev['Exact']: e, rev['Narrow']: n, rev['Related']: r } for aid, uid in sorted(a_u_map.items()): id_line = 'id: ' + aid lines = [] lines.append(id_line) lines.append('name: ' + abalabs[aid]) if uid in uberon_syns: syns = uberon_syns[uid] else: syns = {} for syn_type in syn_order: f = files_[syn_type] if syn_types[syn_type] == 'Exact' and uid is not None: syn_line = 'synonym: "' + uberon_labs[ uid] + '" ' + syn_types[syn_type].upper( ) + ' [from label]' lines.append(syn_line) if syn_type in syns: for syn in sorted(syns[syn_type]): syn_line = 'synonym: "' + syn + '" ' + syn_types[ syn_type].upper() + ' []' lines.append(syn_line) block = '\n'.join(lines) term = Term(block, f) e.filename = 'e-syns.obo' n.filename = 'en-syns.obo' r.filename = 'enr-syns.obo' b.filename = 'enrb-syns.obo' for f in files_.values(): h = Header('format-version: 1.2\nontology: %s\n' % f.filename) h.append_to_obofile(f) f.write(f.filename) #embed() #obo_output() def make_record(uid, aid): # edit this to change the format to_format = ('{uberon_id: <20}{uberon_label:}\n' '{aba_id: <20}{aba_label}\n' '------ABA SYNS------\n' '{aba_syns}\n' '-----UBERON SYNS-----\n' '{uberon_syns}\n') uberon_syn_rec = uberon_syns[uid] insert_uberon = [] for edge, syns in sorted(uberon_syn_rec.items()): insert_uberon.append('--{abv}--\n{syns}'.format( abv=syn_types[edge], syns='\n'.join(sorted(syns)))) kwargs = { 'uberon_id': uid, 'uberon_label': uberon_labs[uid], 'aba_id': aid, 'aba_label': abalabs[aid], 'aba_syns': '\n'.join(sorted(abasyns[aid] + abaacro[aid])), 'uberon_syns': '\n'.join(insert_uberon) } return to_format.format(**kwargs) #text = '\n\n'.join([make_record(uid, aid[0]) for uid, aid in sorted(u_a_map.items()) if aid]) #with open('aba_uberon_syn_review.txt', 'wt') as f: #f.write(text) print('total uberon terms checked:', len(uberon_labs)) print('total aba terms: ', len(abalabs)) print('total uberon with aba xref:', len([a for a in u_a_map.values() if a])) ubridge = createOntology('uberon-parcellation-mappings', 'Uberon Parcellation Mappings', makePrefixes('owl', 'ilx', 'UBERON', 'MBA')) for u, arefs in u_a_map.items(): if arefs: # TODO check for bad assumptions here ubridge.add_trip(u, 'ilx:delineatedBy', arefs[0]) ubridge.add_trip(arefs[0], 'ilx:delineates', u) ubridge.write() if __name__ == '__main__': embed()
class PaxRatLabels(PaxLabels): """ Compilation of all labels used to name rat brain regions in atlases created using Paxinos and Watson\'s methodology.""" filename = 'paxinos-rat-labels' name = 'Paxinos & Watson Rat Parcellation Labels' shortname = 'paxrat' namespace = PAXRAT prefixes = { **makePrefixes('NIFRID', 'ilxtr', 'prov', 'dcterms'), 'PAXRAT': str(PAXRAT), 'paxratver': str(paxratver), } # sources need to go in the order with which we want the labels to take precedence (ie in this case 6e > 4e) sources = PaxFix, PaxSrAr_6, PaxSr_6, PaxSrAr_4, PaxFix6, PaxFix4 #, PaxTree_6() # tree has been successfully used for crossreferencing, additional terms need to be left out at the moment (see in_tree_not_in_six) root = LabelRoot( iri=nsExact(namespace), # PAXRAT['0'], label='Paxinos rat parcellation label root', shortname=shortname, #definingArtifactsS=None,#Artifacts.PaxRatAt.iri, definingArtifactsS=(Artifacts.PaxRatAt.iri, ), ) _fixes = [] _dupes = { # for 4e the numbers in the index are to the cranial nerve nuclei entries '3N': DupeRecord(alt_abbrevs=['3'], structures=['oculomotor nucleus'], figures={}, artiris=[Artifacts.PaxRat4.iri]), '4N': DupeRecord(alt_abbrevs=['4'], structures=['trochlear nucleus'], figures={}, artiris=[Artifacts.PaxRat4.iri]), '6N': DupeRecord(alt_abbrevs=['6'], structures=['abducens nucleus'], figures={}, artiris=[Artifacts.PaxRat4.iri]), '7N': DupeRecord(alt_abbrevs=['7'], structures=['facial nucleus'], figures={}, artiris=[Artifacts.PaxRat4.iri]), '10N': DupeRecord(alt_abbrevs=['10'], structures=['dorsal motor nucleus of vagus'], figures={}, artiris=[Artifacts.PaxRat4.iri]), # FIXME need comments about the index entries '1Cb': DupeRecord(alt_abbrevs=['1'], structures=['cerebellar lobule 1'], figures={}, artiris=[Artifacts.PaxRat4.iri]), '2Cb': DupeRecord(alt_abbrevs=['2'], structures=['cerebellar lobule 2'], figures={}, artiris=[Artifacts.PaxRat4.iri]), '2/3Cb': DupeRecord(alt_abbrevs=['2&3'], structures=['cerebellar lobules 2&3'], figures={}, artiris=[Artifacts.PaxRat4.iri]), '3Cb': DupeRecord(alt_abbrevs=['3'], structures=['cerebellar lobule 3'], figures={}, artiris=[Artifacts.PaxRat4.iri]), '4Cb': DupeRecord(alt_abbrevs=['4'], structures=['cerebellar lobule 4'], figures={}, artiris=[Artifacts.PaxRat4.iri]), '4/5Cb': DupeRecord(alt_abbrevs=['4&5'], structures=['cerebellar lobules 4&5'], figures={}, artiris=[Artifacts.PaxRat4.iri]), '5Cb': DupeRecord(alt_abbrevs=['5'], structures=['cerebellar lobule 5'], figures={}, artiris=[Artifacts.PaxRat4.iri]), '6Cb': DupeRecord(alt_abbrevs=['6'], structures=['cerebellar lobule 6'], figures={}, artiris=[Artifacts.PaxRat4.iri]), '6aCb': DupeRecord(alt_abbrevs=['6a'], structures=['cerebellar lobule 6a'], figures={}, artiris=[Artifacts.PaxRat4.iri]), '6bCb': DupeRecord(alt_abbrevs=['6b'], structures=['cerebellar lobule 6b'], figures={}, artiris=[Artifacts.PaxRat4.iri]), '6cCb': DupeRecord(alt_abbrevs=['6c'], structures=['cerebellar lobule 6c'], figures={}, artiris=[Artifacts.PaxRat4.iri]), '7Cb': DupeRecord(alt_abbrevs=['7'], structures=['cerebellar lobule 7'], figures={}, artiris=[Artifacts.PaxRat4.iri]), '8Cb': DupeRecord(alt_abbrevs=['8'], structures=['cerebellar lobule 8'], figures={}, artiris=[Artifacts.PaxRat4.iri]), '9Cb': DupeRecord(alt_abbrevs=['9'], structures=['cerebellar lobule 9'], figures={}, artiris=[Artifacts.PaxRat4.iri]), '10Cb': DupeRecord(alt_abbrevs=['10'], structures=['cerebellar lobule 10'], figures={}, artiris=[Artifacts.PaxRat4.iri]), } _merge = { # abbrevs that have identical structure names '5N':'Mo5', '12N':'12', 'ANS':'Acc', 'ASt':'AStr', 'AngT':'Ang', 'MnM':'MMn', 'MoDG':'Mol', 'PDPO':'PDP', 'PTg':'PPTg', 'STIA':'BSTIA', 'STL':'BSTL', 'STLD':'BSTLD', 'STLI':'BSTLI', 'STLJ':'BSTLJ', 'STLP':'BSTLP', 'STLV':'BSTLV', 'STM':'BSTM', 'STMA':'BSTMA', 'STMP':'BSTMP', 'STMPI':'BSTMPI', 'STMPL':'BSTMPL', 'STMPM':'BSTMPM', 'STMV':'BSTMV', 'hif':'hf', 'och':'ox', } def curate(self): fr, err4 = PaxSrAr_4() sx, err6 = PaxSrAr_6() sx2, _ = PaxSr_6() tr, err6t = PaxTree_6() sfr = set(fr) ssx = set(sx) ssx2 = set(sx2) str_ = set(tr) in_four_not_in_six = sfr - ssx in_six_not_in_four = ssx - sfr in_tree_not_in_six = str_ - ssx in_six_not_in_tree = ssx - str_ in_six2_not_in_six = ssx2 - ssx in_six_not_in_six2 = ssx - ssx2 print( len(in_four_not_in_six), len(in_six_not_in_four), len(in_tree_not_in_six), len(in_six_not_in_tree), len(in_six2_not_in_six), len(in_six_not_in_six2), ) tr_struct_abrv = {} for abrv, ((struct, *extra), _, parent) in tr.items(): tr_struct_abrv[struct] = abrv if abrv in sx: #print(abrv, struct, parent) if struct and struct not in sx[abrv][0]: print( f'Found new label from tr for {abrv}:\n{struct}\n{sx[abrv][0]}\n' ) # can't run these for tr yet #reduced = set(tr_struct_abrv.values()) #print(sorted(_ for _ in tr if _ not in reduced)) #assert len(tr_struct_abrv) == len(tr), 'mapping between abrvs and structs is not 1:1 for tr' sx2_struct_abrv = {} for abrv, ((struct, *extra), _) in sx2.items(): sx2_struct_abrv[struct] = abrv if abrv in sx: if struct and struct not in sx[abrv][0]: print( f'Found new label from sx2 for {abrv}:\n{struct}\n{sx[abrv][0]}\n' ) reduced = set(sx2_struct_abrv.values()) print(sorted( _ for _ in reduced if _ not in sx2)) # ah inconsistent scoping rules in class defs... assert len(sx2_struct_abrv) == len(sx2), 'there is a duplicate struct' sx_struct_abrv = {} for abrv, ((struct, *extra), _) in sx.items(): sx_struct_abrv[struct] = abrv reduced = set(sx_struct_abrv.values()) print(sorted(_ for _ in reduced if _ not in sx)) assert len(sx_struct_abrv) == len(sx), 'there is a duplicate struct' # TODO test whether any of the tree members that were are going to exclude have children that we are going to include names_match_not_abbervs = {} tree_no_name = { _: tr[_] for _ in sorted(in_tree_not_in_six) if not tr[_][0][0] } tree_with_name = { _: tr[_] for _ in sorted(in_tree_not_in_six) if tr[_][0][0] } not_in_tree_with_figures = { _: sx[_] for _ in sorted(in_six_not_in_tree) if sx[_][-1] } a = f'{"abv":<25} | {"structure name":<60} | parent abv\n' + '\n'.join( f'{k:<25} | {v[0][0]:<60} | {v[-1]}' for k, v in tree_with_name.items()) b = f'{"abv":<25} | {"structure name":<15} | parent abv\n' + '\n'.join( f'{k:<25} | {"":<15} | {v[-1]}' for k, v in tree_no_name.items()) c = f'abv | {"structure name":<60} | figures (figure ranges are tuples)\n' + '\n'.join( f'{k:<6} | {v[0][0]:<60} | {v[-1]}' for k, v in not_in_tree_with_figures.items()) with open( os.path.expanduser( '~/ni/dev/nifstd/paxinos/tree-with-name.txt'), 'wt') as f: f.write(a) with open( os.path.expanduser('~/ni/dev/nifstd/paxinos/tree-no-name.txt'), 'wt') as f: f.write(b) with open( os.path.expanduser( '~/ni/dev/nifstd/paxinos/not-in-tree-with-figures.txt'), 'wt') as f: f.write(c) #match_name_not_abrev = set(v[0][0] for v in tree_with_name.values()) & set(v[0][0] for v in sx.values()) _match_name_not_abrev = {} for a, (alts, (s, *extra), f, *_) in PaxRatLabels().records()[0].items(): if s not in _match_name_not_abrev: _match_name_not_abrev[s] = [a] elif a not in _match_name_not_abrev[s]: _match_name_not_abrev[s].append(a) match_name_not_abrev = { k: v for k, v in _match_name_not_abrev.items() if len(v) > 1 } abrv_match_not_name = { k: v[0] for k, v in PaxRatLabels().records()[0].items() if len(v[0]) > 1 } _ = [ print(k, *v[0]) for k, v in PaxRatLabels().records()[0].items() if len(v[0]) > 1 ] breakpoint()