Python makePrefixes示例，pyontutils.core.makePrefixes Python示例

示例#1

0

显示文件

文件： swanson.py 项目： tgbugs/pyontutils

class SwansonLabels(ParcOnt):  # FIXME not labels...
    filename = 'swanson'
    name = 'Swanson 2014 partonomies'
    shortname = 'swannt'
    imports = parcCore,
    prefixes = {**makePrefixes('NIFRID', 'ilxtr', 'prov'),
                'swanr':interlex_namespace('swanson/uris/readable/'),
                'SWAN':interlex_namespace('swanson/uris/neuroanatomical-terminology/terms/'),
                'SWAA':interlex_namespace('swanson/uris/neuroanatomical-terminology/appendix/'),}
    sources = SwansonAppendix,
    namespace = prefixes['SWAN']
    root = LabelRoot(iri=nsExact(namespace),  # FIXME this is not really a label in the strict sense
                     label='Swanson label root',
                     shortname=shortname,
                     definingArtifacts=(s.artifact.iri for s in sources),)

    def _triples(self):
        for s, p, o in swanson().g:
            #if p != rdf.type and o != owl.Ontology:
            if s != rdflib.URIRef('http://ontology.neuinfo.org/NIF/ttl/generated/swanson_hierarchies.ttl'):

                if p == rdfs.subClassOf and o == ilxtr.swansonBrainRegionConcept:
                    yield s, p, self.root.iri
                elif p == rdfs.label:
                    yield s, p, Label(label=o, labelRoot=self.root).rdfs_label
                    yield s, skos.prefLabel, o
                else:
                    yield s, p, o

示例#2

0

显示文件

 def __init__(self, input):
     prefixes = {**{'JAX': 'http://jaxmice.jax.org/strain/',
                 'MMRRC': 'http://www.mmrrc.org/catalog/getSDS.jsp?mmrrc_id=',
                 'AIBS': 'http://api.brain-map.org/api/v2/data/TransgenicLine/'},
                 **makePrefixes('definition', 'ilxtr', 'owl')}
     self.g = makeGraph('transgenic-lines', prefixes=prefixes)
     self.neuron_data = input

示例#3

0

显示文件

class PaxRegion(RegionsBase):
    __pythonOnly = True  # TODO
    path = 'ttl/generated/parcellation/'
    filename = 'paxinos-rat-regions'
    name = 'Paxinos & Watson Rat Parcellation Regions'
    shortname = 'paxratr'
    comment = ('Intersection between labels and atlases for all regions '
               'delineated using Paxinos and Watson\'s methodology.')

    prefixes = {**makePrefixes('NIFRID', 'ilxtr', 'prov', 'ILXREPLACE')}
    # sources need to go in the order with which we want the labels to take precedence (ie in this case 6e > 4e)
    #sources = PaxSrAr_6(), PaxSr_6(), PaxSrAr_4(), PaxTree_6()  # tree has been successfully used for crossreferencing, additional terms need to be left out at the moment (see in_tree_not_in_six)
    root = RegionRoot(
        iri=TEMP[
            'FIXME'],  # FIXME these should probably be EquivalentTo Parcellation Region HasLabel some label HasAtlas some atlas...
        label='Paxinos rat parcellation region root',
        shortname=shortname,
    )
    # atlas version
    # label identifier
    # figures

    things = {}

    @classmethod
    def addthing(cls, thing, value):
        cls.things[thing] = value

示例#4

0

显示文件

class PaxMouseLabels(PaxLabels):
    """ Compilation of all labels used to name mouse brain regions
        in atlases created using Paxinos and Franklin\'s methodology."""

    # TODO FIXME align indexes where possible to paxrat???

    filename = 'paxinos-mus-labels'
    name = 'Paxinos & Franklin Mouse Parcellation Labels'
    shortname = 'paxmus'
    namespace = PAXMUS

    prefixes = {
        **makePrefixes('NIFRID', 'ilxtr', 'prov', 'dcterms'),
        'PAXMUS': str(PAXMUS),
        'paxmusver': str(paxmusver),
    }
    sources = PaxMFix, PaxMSrAr_2, PaxMSrAr_3
    root = LabelRoot(
        iri=nsExact(namespace),  # PAXMUS['0'],
        label='Paxinos mouse parcellation label root',
        shortname=shortname,
        definingArtifactsS=(Artifacts.PaxMouseAt.iri, ),
    )

    _merge = {
        '4/5Cb': '4&5Cb',
        '5N': 'Mo5',
        '12N': '12',
        'AngT': 'Ang',
        'ANS': 'Acc',
        'ASt': 'AStr',
        'hif': 'hf',
        'MnM': 'MMn',
        'MoDG': 'Mol',
        'och': 'ox',
        'PHA': 'PH',  # FIXME PH is reused in 3rd
        'ST': 'BST',
        'STIA': 'BSTIA',
        'STLD': 'BSTLD',
        'STLI': 'BSTLI',
        'STLJ': 'BSTLJ',
        'STLP': 'BSTLP',
        'STLV': 'BSTLV',
        'STMA': 'BSTMA',
        'STMP': 'BSTMP',
        'STMPI': 'BSTMPI',
        'STMPL': 'BSTMPL',
        'STMPM': 'BSTMPM',
        'STMV': 'BSTMV',
        'STS': 'BSTS',
    }

示例#5

0

显示文件

文件： gen_nat_models.py 项目： tgbugs/pyontutils

def main():
    source = 'https://raw.githubusercontent.com/BlueBrain/nat/master/nat/data/modelingDictionary.csv'
    delimiter = ';'

    resp = requests.get(source)
    rows = [
        r for r in csv.reader(resp.text.split('\n'), delimiter=delimiter)
        if r and r[0][0] != '#'
    ]
    header = [
        'Record_ID', 'parent_category', 'name', 'description', 'required_tags'
    ]

    PREFIXES = makePrefixes('owl', 'skos', 'ILX', 'definition')
    graph = makeGraph('measures', prefixes=PREFIXES)

    class nat(rowParse):
        def Record_ID(self, value):
            print(value)
            self.old_id = value
            self._id = TEMP[value]

        def parent_category(self, value):
            self.super_old_id = value
            self.super_id = TEMP[value]

        def name(self, value):
            self.hidden = value
            self.label = value.replace('_', ' ')

        def description(self, value):
            self.definition = value

        def required_tags(self, value):
            pass

        def _row_post(self):
            graph.add_class(self._id, self.super_id, label=self.label)
            graph.add_trip(self._id, 'skos:hiddenLabel', self.hidden)
            graph.add_trip(self._id, 'definition:', self.definition)

    asdf = nat(rows, header)
    graph.write()
    if __name__ == '__main__':
        breakpoint()

示例#6

0

显示文件

def main():
    import rdflib
    from pyontutils.core import makeGraph, makePrefixes, log
    from pyontutils.config import auth

    ub = auth.get_path('ontology-local-repo') / 'ttl/bridge/uberon-bridge.ttl'
    ncrb = auth.get_path(
        'ontology-local-repo') / 'ttl/NIF-Neuron-Circuit-Role-Bridge.ttl'
    if not ub.exists() or not ncrb.exists():
        # just skip this if we can't file the files
        log.warning(f'missing file {ub} or {ncrb}')
        return

    graph = rdflib.Graph()
    graph.parse(ub.as_posix(), format='turtle')
    graph.parse(ncrb.as_posix(), format='ttl')

    ecgraph = rdflib.Graph()
    oec = EquivalentClass()
    test = tuple(oec.parse(graph=graph))

    ft = oc_.full_combinator(test[0][0], test[0][1])
    ftng = makeGraph('thing3', prefixes=makePrefixes('owl', 'TEMP'))
    *ft.serialize(ftng.g),
    ftng.write()

    _roundtrip = list(test[0][1](test[0][0]))
    roundtrip = oc_(test[0][0], test[0][1])  # FIXME not quite there yet...
    for t in roundtrip:
        ecgraph.add(t)
    ecng = makeGraph('thing2',
                     graph=ecgraph,
                     prefixes=makePrefixes('owl', 'TEMP'))
    ecng.write()
    if __name__ == '__main__':
        breakpoint()
        return
    r = Restriction(
        rdfs.subClassOf)  #, scope=owl.allValuesFrom)#NIFRID.has_proper_part)
    l = tuple(r.parse(graph=graph))
    for t in r.triples:
        graph.remove(t)
    ng = makeGraph('thing', graph=graph)
    ng.write()
    #print(l)
    restriction = Restriction(None)  #rdf.first)
    ll = List(lift_rules={owl.Restriction: restriction})
    trips = tuple(ll.parse(graph=graph))
    #subClassOf = PredicateCombinator(rdfs.subClassOf)  # TODO should be able to do POCombinator(rdfs.subClassOf, 0bjectCombinator)
    subClassOf = POCombinator(rdfs.subClassOf, ObjectCombinator)
    superDuperClass = subClassOf(
        TEMP.superDuperClass)  # has to exist prior to triples
    ec = oec(
        TEMP.ec1,
        TEMP.ec2,
        restriction(TEMP.predicate0, TEMP.target1),
        restriction(TEMP.predicate1, TEMP.target2),
    )
    egraph = rdflib.Graph()
    acombinator = annotation((TEMP.testSubject, rdf.type, owl.Class),
                             (TEMP.hoh, 'FUN'))
    ft = flattenTriples((
        acombinator((TEMP.annotation, 'annotation value')),
        acombinator((TEMP.anotherAnnotation, 'annotation value again')),
        oc_(TEMP.c1, superDuperClass),
        oc_(TEMP.c2, superDuperClass),
        oc_(TEMP.c3, superDuperClass),
        oc_(TEMP.c4, superDuperClass),
        oc_(TEMP.c5, superDuperClass),
        oc_(TEMP.wat, subClassOf(TEMP.watParent)),
        oc_(TEMP.testSubject),
        ec(TEMP.testSubject),
        oc_(TEMP.more,
            oec(TEMP.ec3, restriction(TEMP.predicate10, TEMP.target10))),
    ), )
    [egraph.add(t) for t in ft]
    eng = makeGraph('thing1',
                    graph=egraph,
                    prefixes=makePrefixes('owl', 'TEMP'))
    eng.write()
    if __name__ == '__main__':
        breakpoint()

示例#7

0

显示文件

from pyontutils.core import NIFRID, oboInOwl
from IPython import embed

sgg = Graph(cache=True)
sgv = Vocabulary(cache=True)

Query = namedtuple('Query', ['root', 'relationshipType', 'direction', 'depth'])

CON = oboInOwl.consider
DBX = oboInOwl.hasDbXref  # FIXME also behaves as objectProperty :/
AID = oboInOwl.hasAlternativeId
IRBC = NIFRID.isReplacedByClass

PREFIXES = makePrefixes(
    'UBERON',
    'ro',
    'owl',
    'skos',
)
NIFPREFIXES = makePrefixes(
    'NIFGA',
    'oboInOwl',
    'replacedBy',
)

NIFPREFIXES.update(PREFIXES)

nifga_path = devconfig.ontology_local_repo + '/ttl/NIF-GrossAnatomy.ttl'
uberon_path = devconfig.ontology_local_repo + '/ttl/external/uberon.owl'
uberon_bridge_path = 'http://purl.obolibrary.org/obo/uberon/bridge/uberon-bridge-to-nifstd.owl'
#bridge_path = os.path.expanduser('~/git/NIF-Ontology/ttl/uberon-bridge-to-nifstd.ttl')  # scigraph's got us

示例#8

0

显示文件

文件： fsl.py 项目： tgbugs/pyontutils

class FSL(LabelsBase):
    """ Ontology file containing labels from the FMRIB Software Library (FSL)
    atlases collection. All identifiers use the number of the index specified
    in the source xml file. """

    path = 'ttl/generated/parcellation/'
    filename = 'fsl'
    name = 'Terminologies from FSL atlases'
    shortname = 'fsl'
    imports = parcCore,
    prefixes = {
        **makePrefixes('ilxtr'),
        **ParcOnt.prefixes,
        'FSLATS': str(FSLATS),
    }
    sources = tuple()  # set by prepare()
    roots = tuple()  # set by prepare()

    class Artifacts(Collector):
        """ Artifacts for FSL """
        collects = Artifact

    def _triples(self):
        for source in self.sources:
            for index, label in source:
                iri = source.root.namespace[str(index)]
                yield from Label(labelRoot=source.root, label=label, iri=iri)

    @classmethod
    def prepare(cls):
        ATLAS_PATH = '/usr/share/fsl/data/atlases/'

        shortnames = {
            'JHU White-Matter Tractography Atlas': 'JHU WM',
            'Oxford-Imanova Striatal Structural Atlas': 'OISS',
            'Talairach Daemon Labels': 'Talairach',
            'Subthalamic Nucleus Atlas': 'SNA',
            'JHU ICBM-DTI-81 White-Matter Labels': 'JHU ICBM WM',
            'Juelich Histological Atlas': 'Juelich',
            'MNI Structural Atlas': 'MNI Struct',
        }

        prefixes = {
            'Cerebellar Atlas in MNI152 space after normalization with FLIRT':
            'CMNIfl',
            'Cerebellar Atlas in MNI152 space after normalization with FNIRT':
            'CMNIfn',
            'Sallet Dorsal Frontal connectivity-based parcellation': 'DFCBP',
            'Neubert Ventral Frontal connectivity-based parcellation': 'VFCBP',
            'Mars Parietal connectivity-based parcellation': 'PCBP',
        }

        for xmlfile in glob.glob(ATLAS_PATH + '*.xml'):
            filename = os.path.splitext(os.path.basename(xmlfile))[0]

            tree = etree.parse(xmlfile)
            parcellation_name = tree.xpath('header//name')[0].text

            # namespace
            namespace = rdflib.Namespace(FSLATS[filename + '/labels/'])

            # shortname
            shortname = tree.xpath('header//shortname')
            if shortname:
                shortname = shortname[0].text
            else:
                shortname = shortnames[parcellation_name]

            artifact_shortname = shortname
            shortname = shortname.replace(' ', '')

            # Artifact
            artifact = Terminology(
                iri=FSLATS[filename],
                label=parcellation_name,
                docUri='http://fsl.fmrib.ox.ac.uk/fsl/fslwiki/Atlases',
                species=NCBITaxon['9606'],
                devstage=UBERON[
                    '0000113'],  # FIXME mature vs adult vs when they actually did it...
                region=UBERON['0000955'],
                shortname=artifact_shortname)
            setattr(cls.Artifacts, shortname, artifact)

            # LabelRoot
            root = LabelRoot(iri=nsExact(namespace),
                             label=parcellation_name + ' label root',
                             shortname=shortname,
                             definingArtifacts=(artifact.iri, ))
            root.namespace = namespace
            cls.roots += root,

            # prefix
            if parcellation_name in prefixes:
                prefix = 'fsl' + prefixes[parcellation_name]
            else:
                prefix = 'fsl' + shortname

            cls.prefixes[prefix] = root.iri

            # Source
            @classmethod
            def loadData(cls, _tree=tree):
                out = []
                for node in _tree.xpath('data//label'):
                    index, label = node.get('index'), node.text
                    out.append((index, label))
                return out

            source = type(
                'FSLsource_' + shortname.replace(' ', '_'),
                (Source, ),
                dict(
                    iri=rdflib.URIRef('file://' + xmlfile),
                    source=xmlfile,
                    source_original=True,
                    artifact=artifact,
                    root=
                    root,  # used locally since we have more than one root per ontology here
                    loadData=loadData))
            cls.sources += source,

        super().prepare()

示例#9

0

显示文件

文件： aba_uberon.py 项目： memartone/pyontutils

def main():
    abagraph = rdflib.Graph()
    abagraph.parse(
        (gitf /
         'NIF-Ontology/ttl/generated/parcellation/mbaslim.ttl').as_posix(),
        format='turtle')
    abagraph.parse(
        (gitf / 'NIF-Ontology/ttl/bridge/aba-bridge.ttl').as_posix(),
        format='turtle')
    nses = {k: rdflib.Namespace(v) for k, v in abagraph.namespaces()}
    #nses['ABA'] = nses['MBA']  # enable quick check against the old xrefs
    syn_iri = nses['NIFRID']['synonym']
    acro_iri = nses['NIFRID']['acronym']
    abasyns = {}
    abalabs = {}
    abaacro = {}
    ABA_PREFIX = 'MBA:'
    #ABA_PREFIX = 'ABA:'  # all bad
    for sub in abagraph.subjects(rdflib.RDF.type, rdflib.OWL.Class):
        if not sub.startswith(nses[ABA_PREFIX[:-1]]['']):
            continue
        subkey = ABA_PREFIX + sub.rsplit('/', 1)[1]
        sub = rdflib.URIRef(sub)
        abalabs[subkey] = [
            o for o in abagraph.objects(rdflib.URIRef(sub), rdflib.RDFS.label)
        ][0].toPython()
        syns = []
        for s in abagraph.objects(sub, syn_iri):
            syns.append(s.toPython())
        abasyns[subkey] = syns

        abaacro[subkey] = [
            a.toPython() for a in abagraph.objects(sub, acro_iri)
        ]

    url = 'http://api.brain-map.org/api/v2/tree_search/Structure/997.json?descendants=true'
    resp = requests.get(url).json()

    ids = set([ABA_PREFIX + str(r['id']) for r in resp['msg']])
    Query = namedtuple('Query',
                       ['id', 'relationshipType', 'direction', 'depth'])
    #uberon = Query('UBERON:0000955', 'http://purl.obolibrary.org/obo/BFO_0000050', 'INCOMING', 9)
    uberon = Query('UBERON:0001062', 'subClassOf', 'INCOMING',
                   10)  # anatomical entity
    output = g.getNeighbors(**uberon._asdict())

    # TODO figure out the superclass that can actually get all the brain parts

    meta_edge = 'http://www.geneontology.org/formats/oboInOwl#hasDbXref'

    u_a_map = {}
    a_u_map = {}
    uberon_syns = {}
    uberon_labs = {}
    syn_types = {
        'http://www.geneontology.org/formats/oboInOwl#hasExactSynonym':
        'Exact',
        'http://www.geneontology.org/formats/oboInOwl#hasNarrowSynonym':
        'Narrow',
        'http://www.geneontology.org/formats/oboInOwl#hasRelatedSynonym':
        'Related',
        'http://www.geneontology.org/formats/oboInOwl#hasBroadSynonym':
        'Broad',
    }
    for node in output['nodes']:
        curie = node['id']
        uberon_labs[curie] = node['lbl']
        uberon_syns[curie] = {}
        if 'synonym' in node['meta']:
            for stype in syn_types:
                if stype in node['meta']:
                    uberon_syns[curie][stype] = node['meta'][stype]

        if meta_edge in node['meta']:
            xrefs = node['meta'][meta_edge]
            mba_ref = [r for r in xrefs if r.startswith(ABA_PREFIX)]
            u_a_map[curie] = mba_ref
            if mba_ref:
                for mba in mba_ref:
                    a_u_map[mba] = curie
        else:
            u_a_map[curie] = None

    def obo_output(
    ):  # oh man obo_io is a terrible interface for writing obofiles :/
        for aid in abalabs:  # set aids not in uberon to none
            if aid not in a_u_map:
                a_u_map[aid] = None

        e = OboFile()
        n = OboFile()
        r = OboFile()
        b = OboFile()
        name_order = 'Exact', 'Narrow', 'Related', 'Broad'
        rev = {v: k for k, v in syn_types.items()}  # sillyness
        syn_order = [rev[n] for n in name_order]

        files_ = {
            rev['Broad']: b,
            rev['Exact']: e,
            rev['Narrow']: n,
            rev['Related']: r
        }
        for aid, uid in sorted(a_u_map.items()):
            id_line = 'id: ' + aid
            lines = []
            lines.append(id_line)
            lines.append('name: ' + abalabs[aid])
            if uid in uberon_syns:
                syns = uberon_syns[uid]
            else:
                syns = {}

            for syn_type in syn_order:
                f = files_[syn_type]
                if syn_types[syn_type] == 'Exact' and uid is not None:
                    syn_line = 'synonym: "' + uberon_labs[
                        uid] + '" ' + syn_types[syn_type].upper(
                        ) + ' [from label]'
                    lines.append(syn_line)
                if syn_type in syns:
                    for syn in sorted(syns[syn_type]):
                        syn_line = 'synonym: "' + syn + '" ' + syn_types[
                            syn_type].upper() + ' []'
                        lines.append(syn_line)
                block = '\n'.join(lines)
                term = Term(block, f)

        e.filename = 'e-syns.obo'
        n.filename = 'en-syns.obo'
        r.filename = 'enr-syns.obo'
        b.filename = 'enrb-syns.obo'
        for f in files_.values():
            h = Header('format-version: 1.2\nontology: %s\n' % f.filename)
            h.append_to_obofile(f)
            f.write(f.filename)
        #embed()

    #obo_output()

    def make_record(uid, aid):  # edit this to change the format
        to_format = ('{uberon_id: <20}{uberon_label:}\n'
                     '{aba_id: <20}{aba_label}\n'
                     '------ABA  SYNS------\n'
                     '{aba_syns}\n'
                     '-----UBERON SYNS-----\n'
                     '{uberon_syns}\n')
        uberon_syn_rec = uberon_syns[uid]
        insert_uberon = []
        for edge, syns in sorted(uberon_syn_rec.items()):
            insert_uberon.append('--{abv}--\n{syns}'.format(
                abv=syn_types[edge], syns='\n'.join(sorted(syns))))

        kwargs = {
            'uberon_id': uid,
            'uberon_label': uberon_labs[uid],
            'aba_id': aid,
            'aba_label': abalabs[aid],
            'aba_syns': '\n'.join(sorted(abasyns[aid] + abaacro[aid])),
            'uberon_syns': '\n'.join(insert_uberon)
        }
        return to_format.format(**kwargs)

    #text = '\n\n'.join([make_record(uid, aid[0]) for uid, aid in sorted(u_a_map.items()) if aid])

    #with open('aba_uberon_syn_review.txt', 'wt') as f:
    #f.write(text)

    print('total uberon terms checked:', len(uberon_labs))
    print('total aba terms:           ', len(abalabs))
    print('total uberon with aba xref:',
          len([a for a in u_a_map.values() if a]))

    ubridge = createOntology('uberon-parcellation-mappings',
                             'Uberon Parcellation Mappings',
                             makePrefixes('owl', 'ilx', 'UBERON', 'MBA'))
    for u, arefs in u_a_map.items():
        if arefs:
            # TODO check for bad assumptions here
            ubridge.add_trip(u, 'ilx:delineatedBy', arefs[0])
            ubridge.add_trip(arefs[0], 'ilx:delineates', u)

    ubridge.write()
    if __name__ == '__main__':
        embed()

示例#10

0

显示文件

class PaxRatLabels(PaxLabels):
    """ Compilation of all labels used to name rat brain regions
        in atlases created using Paxinos and Watson\'s methodology."""

    filename = 'paxinos-rat-labels'
    name = 'Paxinos & Watson Rat Parcellation Labels'
    shortname = 'paxrat'
    namespace = PAXRAT

    prefixes = {
        **makePrefixes('NIFRID', 'ilxtr', 'prov', 'dcterms'),
        'PAXRAT': str(PAXRAT),
        'paxratver': str(paxratver),
    }
    # sources need to go in the order with which we want the labels to take precedence (ie in this case 6e > 4e)
    sources = PaxFix, PaxSrAr_6, PaxSr_6, PaxSrAr_4, PaxFix6, PaxFix4  #, PaxTree_6()  # tree has been successfully used for crossreferencing, additional terms need to be left out at the moment (see in_tree_not_in_six)
    root = LabelRoot(
        iri=nsExact(namespace),  # PAXRAT['0'],
        label='Paxinos rat parcellation label root',
        shortname=shortname,
        #definingArtifactsS=None,#Artifacts.PaxRatAt.iri,
        definingArtifactsS=(Artifacts.PaxRatAt.iri, ),
    )

    _fixes = []

    _dupes = {
        # for 4e the numbers in the index are to the cranial nerve nuclei entries
        '3N':
        DupeRecord(alt_abbrevs=['3'],
                   structures=['oculomotor nucleus'],
                   figures={},
                   artiris=[Artifacts.PaxRat4.iri]),
        '4N':
        DupeRecord(alt_abbrevs=['4'],
                   structures=['trochlear nucleus'],
                   figures={},
                   artiris=[Artifacts.PaxRat4.iri]),
        '6N':
        DupeRecord(alt_abbrevs=['6'],
                   structures=['abducens nucleus'],
                   figures={},
                   artiris=[Artifacts.PaxRat4.iri]),
        '7N':
        DupeRecord(alt_abbrevs=['7'],
                   structures=['facial nucleus'],
                   figures={},
                   artiris=[Artifacts.PaxRat4.iri]),
        '10N':
        DupeRecord(alt_abbrevs=['10'],
                   structures=['dorsal motor nucleus of vagus'],
                   figures={},
                   artiris=[Artifacts.PaxRat4.iri]),

        # FIXME need comments about the index entries
        '1Cb':
        DupeRecord(alt_abbrevs=['1'],
                   structures=['cerebellar lobule 1'],
                   figures={},
                   artiris=[Artifacts.PaxRat4.iri]),
        '2Cb':
        DupeRecord(alt_abbrevs=['2'],
                   structures=['cerebellar lobule 2'],
                   figures={},
                   artiris=[Artifacts.PaxRat4.iri]),
        '2/3Cb':
        DupeRecord(alt_abbrevs=['2&3'],
                   structures=['cerebellar lobules 2&3'],
                   figures={},
                   artiris=[Artifacts.PaxRat4.iri]),
        '3Cb':
        DupeRecord(alt_abbrevs=['3'],
                   structures=['cerebellar lobule 3'],
                   figures={},
                   artiris=[Artifacts.PaxRat4.iri]),
        '4Cb':
        DupeRecord(alt_abbrevs=['4'],
                   structures=['cerebellar lobule 4'],
                   figures={},
                   artiris=[Artifacts.PaxRat4.iri]),
        '4/5Cb':
        DupeRecord(alt_abbrevs=['4&5'],
                   structures=['cerebellar lobules 4&5'],
                   figures={},
                   artiris=[Artifacts.PaxRat4.iri]),
        '5Cb':
        DupeRecord(alt_abbrevs=['5'],
                   structures=['cerebellar lobule 5'],
                   figures={},
                   artiris=[Artifacts.PaxRat4.iri]),
        '6Cb':
        DupeRecord(alt_abbrevs=['6'],
                   structures=['cerebellar lobule 6'],
                   figures={},
                   artiris=[Artifacts.PaxRat4.iri]),
        '6aCb':
        DupeRecord(alt_abbrevs=['6a'],
                   structures=['cerebellar lobule 6a'],
                   figures={},
                   artiris=[Artifacts.PaxRat4.iri]),
        '6bCb':
        DupeRecord(alt_abbrevs=['6b'],
                   structures=['cerebellar lobule 6b'],
                   figures={},
                   artiris=[Artifacts.PaxRat4.iri]),
        '6cCb':
        DupeRecord(alt_abbrevs=['6c'],
                   structures=['cerebellar lobule 6c'],
                   figures={},
                   artiris=[Artifacts.PaxRat4.iri]),
        '7Cb':
        DupeRecord(alt_abbrevs=['7'],
                   structures=['cerebellar lobule 7'],
                   figures={},
                   artiris=[Artifacts.PaxRat4.iri]),
        '8Cb':
        DupeRecord(alt_abbrevs=['8'],
                   structures=['cerebellar lobule 8'],
                   figures={},
                   artiris=[Artifacts.PaxRat4.iri]),
        '9Cb':
        DupeRecord(alt_abbrevs=['9'],
                   structures=['cerebellar lobule 9'],
                   figures={},
                   artiris=[Artifacts.PaxRat4.iri]),
        '10Cb':
        DupeRecord(alt_abbrevs=['10'],
                   structures=['cerebellar lobule 10'],
                   figures={},
                   artiris=[Artifacts.PaxRat4.iri]),
    }

    _merge = {  # abbrevs that have identical structure names
        '5N':'Mo5',
        '12N':'12',
        'ANS':'Acc',
        'ASt':'AStr',
        'AngT':'Ang',
        'MnM':'MMn',
        'MoDG':'Mol',
        'PDPO':'PDP',
        'PTg':'PPTg',
        'STIA':'BSTIA',
        'STL':'BSTL',
        'STLD':'BSTLD',
        'STLI':'BSTLI',
        'STLJ':'BSTLJ',
        'STLP':'BSTLP',
        'STLV':'BSTLV',
        'STM':'BSTM',
        'STMA':'BSTMA',
        'STMP':'BSTMP',
        'STMPI':'BSTMPI',
        'STMPL':'BSTMPL',
        'STMPM':'BSTMPM',
        'STMV':'BSTMV',
        'hif':'hf',
        'och':'ox',
    }

    def curate(self):
        fr, err4 = PaxSrAr_4()
        sx, err6 = PaxSrAr_6()
        sx2, _ = PaxSr_6()
        tr, err6t = PaxTree_6()

        sfr = set(fr)
        ssx = set(sx)
        ssx2 = set(sx2)
        str_ = set(tr)
        in_four_not_in_six = sfr - ssx
        in_six_not_in_four = ssx - sfr
        in_tree_not_in_six = str_ - ssx
        in_six_not_in_tree = ssx - str_
        in_six2_not_in_six = ssx2 - ssx
        in_six_not_in_six2 = ssx - ssx2

        print(
            len(in_four_not_in_six),
            len(in_six_not_in_four),
            len(in_tree_not_in_six),
            len(in_six_not_in_tree),
            len(in_six2_not_in_six),
            len(in_six_not_in_six2),
        )
        tr_struct_abrv = {}
        for abrv, ((struct, *extra), _, parent) in tr.items():
            tr_struct_abrv[struct] = abrv
            if abrv in sx:
                #print(abrv, struct, parent)
                if struct and struct not in sx[abrv][0]:
                    print(
                        f'Found new label from tr for {abrv}:\n{struct}\n{sx[abrv][0]}\n'
                    )

        # can't run these for tr yet
        #reduced = set(tr_struct_abrv.values())
        #print(sorted(_ for _ in tr if _ not in reduced))
        #assert len(tr_struct_abrv) == len(tr), 'mapping between abrvs and structs is not 1:1 for tr'

        sx2_struct_abrv = {}
        for abrv, ((struct, *extra), _) in sx2.items():
            sx2_struct_abrv[struct] = abrv
            if abrv in sx:
                if struct and struct not in sx[abrv][0]:
                    print(
                        f'Found new label from sx2 for {abrv}:\n{struct}\n{sx[abrv][0]}\n'
                    )

        reduced = set(sx2_struct_abrv.values())
        print(sorted(
            _ for _ in reduced
            if _ not in sx2))  # ah inconsistent scoping rules in class defs...
        assert len(sx2_struct_abrv) == len(sx2), 'there is a duplicate struct'

        sx_struct_abrv = {}
        for abrv, ((struct, *extra), _) in sx.items():
            sx_struct_abrv[struct] = abrv

        reduced = set(sx_struct_abrv.values())
        print(sorted(_ for _ in reduced if _ not in sx))
        assert len(sx_struct_abrv) == len(sx), 'there is a duplicate struct'

        # TODO test whether any of the tree members that were are going to exclude have children that we are going to include

        names_match_not_abbervs = {}

        tree_no_name = {
            _: tr[_]
            for _ in sorted(in_tree_not_in_six) if not tr[_][0][0]
        }
        tree_with_name = {
            _: tr[_]
            for _ in sorted(in_tree_not_in_six) if tr[_][0][0]
        }
        not_in_tree_with_figures = {
            _: sx[_]
            for _ in sorted(in_six_not_in_tree) if sx[_][-1]
        }
        a = f'{"abv":<25} | {"structure name":<60} | parent abv\n' + '\n'.join(
            f'{k:<25} | {v[0][0]:<60} | {v[-1]}'
            for k, v in tree_with_name.items())
        b = f'{"abv":<25} | {"structure name":<15} | parent abv\n' + '\n'.join(
            f'{k:<25} | {"":<15} | {v[-1]}' for k, v in tree_no_name.items())
        c = f'abv    | {"structure name":<60} | figures (figure ranges are tuples)\n' + '\n'.join(
            f'{k:<6} | {v[0][0]:<60} | {v[-1]}'
            for k, v in not_in_tree_with_figures.items())
        with open(
                os.path.expanduser(
                    '~/ni/dev/nifstd/paxinos/tree-with-name.txt'), 'wt') as f:
            f.write(a)
        with open(
                os.path.expanduser('~/ni/dev/nifstd/paxinos/tree-no-name.txt'),
                'wt') as f:
            f.write(b)
        with open(
                os.path.expanduser(
                    '~/ni/dev/nifstd/paxinos/not-in-tree-with-figures.txt'),
                'wt') as f:
            f.write(c)
        #match_name_not_abrev = set(v[0][0] for v in tree_with_name.values()) & set(v[0][0] for v in sx.values())

        _match_name_not_abrev = {}
        for a, (alts, (s, *extra), f,
                *_) in PaxRatLabels().records()[0].items():
            if s not in _match_name_not_abrev:
                _match_name_not_abrev[s] = [a]
            elif a not in _match_name_not_abrev[s]:
                _match_name_not_abrev[s].append(a)

        match_name_not_abrev = {
            k: v
            for k, v in _match_name_not_abrev.items() if len(v) > 1
        }

        abrv_match_not_name = {
            k: v[0]
            for k, v in PaxRatLabels().records()[0].items() if len(v[0]) > 1
        }
        _ = [
            print(k, *v[0]) for k, v in PaxRatLabels().records()[0].items()
            if len(v[0]) > 1
        ]
        breakpoint()