示例#1
0
    def __init__(self, path):
        super().__init__()
        self.path = path
        tabular = Tabular(self.path)
        self.skip_rows = tuple(key for keys in self.verticals.values() for key in keys)
        self.t = tabular
        l = list(tabular)
        if not l:
            # FIXME bad design, this try block is a workaround for bad handling of empty lists
            raise exc.NoDataError(self.path)

        self.orig_header, *rest = l
        header = Header(self.orig_header).data

        self.fail = False
        if self.to_index:
            for head in self.to_index:
                if head not in header:
                    log.error(f'\'{self.t.path}\' malformed header!')
                    self.fail = True

        if self.fail:
            self.bc = byCol(rest, header)
        else:
            self.bc = byCol(rest, header, to_index=self.to_index)
示例#2
0
def main():
    #from neurondm.models.cuts import main as cuts_main
    #cuts_config, *_ = cuts_main()
    from IPython import embed
    from neurondm.compiled.common_usage_types import config as cuts_config
    cuts_neurons = cuts_config.neurons()
    expect_pes = {n.id_:len(n.pes) for n in cuts_neurons}

    sheet = CutsV1()
    config, errors, new, release = sheet_to_neurons(sheet.values, sheet.notes_index, expect_pes)
    #sheet.show_notes()
    config.write_python()
    config.write()
    #config = Config(config.name)
    #config.load_existing()  # FIXME this is a hack to get get a load_graph
    from neurondm import Config, NeuronCUT
    release_config = Config('cut-release')
    [NeuronCUT(*n, id_=n.id_, label=n.origLabel, override=True).adopt_meta(n) for n in release]
    release_config.write_python()
    release_config.write()
    from neurondm.models.cuts import export_for_review
    review_rows = export_for_review(config, [], [], [], filename='cut-rt-test.csv', with_curies=True)
    from pyontutils.utils import byCol
    valuesC = byCol(sheet.values[1:],
                    header=[v.replace(' ', '_') for v in sheet.values[0]],
                    to_index=['label'])
    reviewC = byCol(review_rows[1:], header=[v.replace(' ', '_') for v in review_rows[0]], to_index=['label'])
    def grow(r):
        log.debug(r)
        # TODO implement on the object to allow joining on an index?
        # man this would be easier with sql >_< probably pandas too
        # but so many dependencies ... also diffing issues etc
        return valuesC.searchIndex('label', r.label)

    def key(field_value):
        field, value = field_value
        try:
            return valuesC.header._fields.index(field)  # TODO warn on field mismatch
        except ValueError as e:
            print('ERROR!!!!!!!!!!!', field, value)
            return None

    def replace(r, *cols):
        """ replace and reorder """
        # FIXME _super_ inefficient
        vrow = grow(r)
        for field, value in sorted(zip(r._fields, r), key=key):
            if field in cols:
                value = getattr(vrow, field)

            yield '' if value is None else value  # completely overwrite the sheet

    rows = [list(replace(r, 'Status', 'definition', 'synonyms', 'PMID')) for r in reviewC]
    #resp = update_sheet_values('neurons-cut', 'Roundtrip', rows)
    embed()
示例#3
0
    def fetch(self, fetch_grid=None, filter_cell=None):
        """ update remote values (called automatically at __init__) """
        #self.__class__.fetch_count += 1
        #log.debug(f'fetch count: {self.__class__.fetch_count}')
        self._stash_uncommitted()
        if fetch_grid is None:
            fetch_grid = self.fetch_grid

        self.metadata()

        values, grid, cells_index = get_sheet_values(
            self.name,
            self.sheet_name,
            spreadsheet_service=self._spreadsheet_service,
            fetch_grid=fetch_grid,
            filter_cell=filter_cell,
            SPREADSHEET_ID=self._sheet_id())

        self.raw_values = values
        self._values = [list(r) for r in
                        zip(*itertools.zip_longest(*self.raw_values,
                                                   fillvalue=''))]
        try:
            self.byCol = byCol(self.values, to_index=self.index_columns)
        except ValueError as e:
            log.error(e)
            log.warning('Sheet has malformed header, not setting byCol')
        except IndexError as e:
            log.error(e)
            log.warning('Sheet has no header, not setting byCol')

        self.grid = grid
        self.cells_index = cells_index

        self._reapply_uncommitted()
示例#4
0
    def __init__(self, path):
        super().__init__()
        self.path = path
        if self._is_json:
            with open(self.path, 'rt') as f:
                try:
                    self._data_raw = json.load(f)
                except json.decoder.JSONDecodeError as e:
                    if not f.buffer.tell():
                        raise exc.NoDataError(self.path)
                    else:
                        raise exc.BadDataError(self.path) from e

            if isinstance(self._data_raw, dict):
                # FIXME this breaks downstream assumptions
                self._data_cache = {self.rename_key(k):tos(self.normalize(k, v))  # FIXME FIXME
                                    for k, v in self._data_raw.items()}

            return

        tabular = Tabular(self.path)
        self.skip_rows = tuple(key for keys in self.verticals.values() for key in keys)
        self.t = tabular
        l = list(tabular)
        if not l:
            # FIXME bad design, this try block is a workaround for bad handling of empty lists
            raise exc.NoDataError(self.path)

        self.orig_header, *rest = l
        header = Header(self.orig_header).data

        self.fail = False
        if self.to_index:
            for head in self.to_index:
                if head not in header:
                    log.error(f'\'{self.t.path}\' malformed header!')
                    self.fail = True

        if self.fail:
            try:
                self.bc = byCol(rest, header)
            except ValueError as e:
                raise exc.BadDataError(self.path) from e
        else:
            self.bc = byCol(rest, header, to_index=self.to_index)
示例#5
0
    def fetch(self, fetch_grid=None):
        """ update remote values (called automatically at __init__) """
        if fetch_grid is None:
            fetch_grid = self.fetch_grid

        values, grid, notes_index = get_sheet_values(self.name, self.sheet_name,
                                                     spreadsheet_service=self._spreadsheet_service,
                                                     fetch_grid=fetch_grid)

        self.raw_values = values
        self.values = [list(r) for r in zip(*itertools.zip_longest(*self.raw_values, fillvalue=''))]
        try:
            self.byCol = byCol(self.values, to_index=self.index_columns)
        except ValueError as e:
            log.warning('Sheet has malformed header, not setting byCol')

        self.grid = grid
        self.notes_index = notes_index
示例#6
0
def main():
    branch=auth.get('neurons-branch')
    remote = OntId('NIFTTL:') if branch == 'master' else OntId(f'NIFRAW:{branch}/')

    ont_config = ontneurons(remote)
    ont_neurons = ont_config.neurons()

    bn_config = Config('basic-neurons',
                       # FIXME this should probably be pulled in automatically
                       # from the import statements, and it doesn't work even as is
                       # also a chicken and an egg problem here
                       imports=[remote.iri + 'ttl/generated/swanson.ttl'])

    #RDFL = oq.plugin.get('rdflib')  # FIXME ick
    #rdfl = RDFL(bn_config.core_graph, OntId)
    #OntTerm.query.ladd(rdfl)  # FIXME ick
    bn_config.load_existing()
    bn_neurons = bn_config.neurons()
    #OntTerm.query._services = OntTerm.query._services[:-1]  # FIXME ick

    ndl_config = Config('neuron_data_lifted')
    ndl_config.load_existing()  # FIXME this is extremely slow
    ndl_neurons = sorted(ndl_config.neurons())

    resources = auth.get_path('resources')
    cutcsv = resources / 'cut-development.csv'
    with open(cutcsv.as_posix(), 'rt') as f:
        rows = [l for l in csv.reader(f)]

    bc = byCol(rows)

    (_, *labels), *_ = zip(*bc)
    labels_set0 = set(labels)
    ns = []
    skipped = []
    bamscok = (NIFSTD.BAMSC1125,)
    for n in (ont_neurons + ndl_neurons):
        if n.id_ and 'BAMSC' in n.id_:
            if n.id_ not in bamscok:
                skipped.append(n)
                continue

        l = str(n.origLabel)
        if l is not None:
            for replace, match in rename_rules.items():  # HEH
                l = l.replace(match, replace)

        if l in labels:
            n._origLabel = l
            ns.append(n)

    ns = sorted(ns)
    sns = set(n.origLabel for n in ns)

    labels_set1 = labels_set0 - sns

    agen = [c.label for c in bc if c.autogenerated]
    sagen = set(agen)
    added = [c.label for c in bc if c.added]
    sadded = set(added)
    ans = []
    sans = set()
    missed = set()
    _bl = []  # XXX NOTE THE CONTINUE BELOW
    for n in bn_neurons:
        continue  # we actually get all of these with uberon, will map between them later
        # can't use capitalize here because there are proper names that stay uppercase
        l = n.label.replace('(swannt) ',
                            '').replace('Intrinsic',
                                        'intrinsic').replace('Projection',
                                                             'projection')

        for replace, match in rename_rules.items():  # HEH
            l = l.replace(match, replace)

        if l in agen:
            n._origLabel = l
            ans.append(n)
            sans.add(l)

        else:
            missed.add(l)

        _bl.append(l)

    agen_missing = sagen - sans
    labels_set2 = labels_set1 - sans

    nlx_labels = [c.label for c in bc if c.neurolex]
    snlx_labels = set(nlx_labels)

    class SourceCUT(resSource):
        sourceFile = 'nifstd/resources/cut-development.csv'  # FIXME relative to git workingdir...
        source_original = True

    sources = SourceCUT(),
    swanr = rdflib.Namespace(interlex_namespace('swanson/uris/readable/'))
    SWAN = interlex_namespace('swanson/uris/neuroanatomical-terminology/terms/')
    SWAA = interlex_namespace('swanson/uris/neuroanatomical-terminology/appendix/')
    config = Config('cut-development-raw', sources=sources, source_file=relative_path(__file__),
                    prefixes={'swanr': swanr,
                              'SWAN': SWAN,
                              'SWAA': SWAA,})
    ins = [None if OntId(n.id_).prefix == 'TEMP' else n.id_ for n in ns]
    ians = [None] * len(ans)

    with NeuronCUT(CUT.Mammalia):
        mamns = [NeuronCUT(*zap(n.pes), id_=i, label=n._origLabel, override=bool(i)).adopt_meta(n)
                 for i, n in zip(ins + ians, ns + ans)]

    smatch, rem = get_smatch(labels_set2)

    labels_set3 = labels_set2 - smatch
    added_unmapped = sadded & labels_set3

    # TODO preserve the names from neuronlex on import ...
    Neuron.write()
    Neuron.write_python()
    raw_neurons = config.neurons()
    # do this before creating the new config
    # even though we are in theory tripling number of neurons in the current config graph
    # it won't show up in the next config (and this is why we need to reengineer)
    raw_neurons_ind_undep = [n.asUndeprecated().asIndicator() for n in raw_neurons]
    config = Config('cut-development', sources=sources, source_file=relative_path(__file__),
                    prefixes={'swanr': swanr,
                              'SWAN': SWAN,
                              'SWAA': SWAA,})
    # FIXME the call to asUndprecated currenlty triggers addition
    # to the current config and output graph as a side effect (ick!)
    ids_updated_neurons = [n.asUndeprecated() for n in raw_neurons]
    assert len(ids_updated_neurons) == len(raw_neurons)
    Neuron.write()
    Neuron.write_python()
    progress = (len(labels_set0), len(sns), len(sans), len(smatch),
                len(labels_set1), len(labels_set2), len(labels_set3))
    prog_report = ('\nProgress:\n'
                   f'total:            {progress[0]}\n'
                   f'from nlx:         {progress[1]}\n'
                   f'from basic:       {progress[2]}\n'
                   f'from match:       {progress[3]}\n'
                   f'TODO after nlx:   {progress[4]}\n'
                   f'TODO after basic: {progress[5]}\n'
                   f'TODO after match: {progress[6]}\n')
    print(prog_report)
    assert progress[0] == progress[1] + progress[4], 'neurolex does not add up'
    assert progress[4] == progress[2] + progress[5], 'basic does not add up'

    lnlx = set(n.lower() for n in snlx_labels)
    sos = set(n.origLabel.lower() if n.origLabel else None for n in ndl_neurons)  # FIXME load origLabel
    nlx_review = lnlx - sos
    nlx_missing = sorted(nlx_review)
    print(f'\nNeuroLex listed as source but no mapping (n = {len(nlx_review)}):')
    _ = [print(l) for l in nlx_missing]

    partial = {k:v for k, v in rem.items() if v and v not in terminals}
    print(f'\nPartially mapped (n = {len(partial)}):')
    if partial:
        mk = max((len(k) for k in partial.keys())) + 2
        for k, v in sorted(partial.items()):
            print(f'{k:<{mk}} {v!r}')
            #print(f'{k!r:<{mk}}{v!r}')
        #pprint(partial, width=200)
    unmapped = sorted(labels_set3)
    print(f'\nUnmapped (n = {len(labels_set3)}):')
    _ = [print(l) for l in unmapped]

    no_location = [n for n in Neuron.neurons()
                   if noneMembers((ilxtr.hasSomaLocatedIn, ilxtr.hasSomaLocatedInLayer), *n.unique_predicates)]
    if __name__ == '__main__':
        review_rows = export_for_review(config, unmapped, partial, nlx_missing)
        breakpoint()

    return config, unmapped, partial, nlx_missing
示例#7
0
def main():
    #from neurondm.models.cuts import main as cuts_main
    #cuts_config, *_ = cuts_main()

    from neurondm.compiled.common_usage_types import config as cuts_config
    cuts_neurons = cuts_config.neurons()
    expect_pes = {n.id_:n.pes for n in cuts_neurons}

    sheet = CutsV1()
    _neurons = list(sheet.neurons(expect_pes))
    config = sheet.config
    errors = sheet.errors
    new = sheet.new
    release = sheet.release

    #sheet.show_notes()
    config.write_python()
    config.write()
    #config = Config(config.name)
    #config.load_existing()  # FIXME this is a hack to get get a load_graph

    # FIXME we need this because _bagExisting doesn't deal with unionOf right now
    def trything(f):
        @wraps(f)
        def inner(*args, **kwargs):
            try:
                return f(*args, **kwargs)
            except:
                pass

        return inner

    from neurondm import Config, NeuronCUT

    failed_config = Config('cut-failed')
    [trything(NeuronCUT)(*pes, id_=id_) for id_, pes in sheet.failed.items()]
    failed_config.write_python()
    failed_config.write()

    release_config = Config('cut-release')
    [NeuronCUT(*n, id_=n.id_, label=n.origLabel, override=True).adopt_meta(n) for n in release]
    release_config.write_python()
    release_config.write()

    from neurondm.models.cuts import export_for_review
    review_rows = export_for_review(config, [], [], [], filename='cut-rt-test.csv', with_curies=True)
    from pyontutils.utils import byCol
    valuesC = byCol(sheet.values[1:],
                    header=[v.replace(' ', '_') for v in sheet.values[0]],
                    to_index=['label'])
    reviewC = byCol(review_rows[1:], header=[v.replace(' ', '_') for v in review_rows[0]], to_index=['label'])
    def grow(r):
        log.debug(r)
        # TODO implement on the object to allow joining on an index?
        # man this would be easier with sql >_< probably pandas too
        # but so many dependencies ... also diffing issues etc
        if r.label is not None:
            return valuesC.searchIndex('label', r.label)

    def key(field_value):
        field, value = field_value
        try:
            return 0, valuesC.header._fields.index(field)  # TODO warn on field mismatch
        except ValueError as e:
            log.error(f'{field} {value}')
            return 1, 0

    def replace(r, *cols):
        """ replace and reorder """
        # FIXME _super_ inefficient
        vrow = grow(r)
        log.debug('\n'.join(r._fields))
        log.debug('\n'.join(str(_) for _ in r))
        for field, value in sorted(zip(r._fields, r), key=key):
            if field in cols:
                value = getattr(vrow, field)

            yield '' if value is None else value  # completely overwrite the sheet

    breakpoint()
    rows = [list(replace(r, 'Status', 'definition', 'synonyms', 'PMID')) for r in reviewC]
    #resp = update_sheet_values('neurons-cut', 'Roundtrip', rows)
    if __name__ == '__main__':
        breakpoint()
示例#8
0
def main():
    ndl_config = Config('neuron_data_lifted')
    ndl_config.load_existing()
    ndl_neurons = ndl_config.neurons()
    bn_config = Config('basic-neurons')
    bn_config.load_existing()
    bn_neurons = bn_config.neurons()

    resources = Path(devconfig.resources)
    cutcsv = resources / 'common-usage-types.csv'
    with open(cutcsv.as_posix(), 'rt') as f:
        rows = [l for l in csv.reader(f)]

    bc = byCol(rows)

    (_, *labels), *_ = zip(*bc)
    labels_set0 = set(labels)
    ns = []
    for n in ndl_neurons:
        l = str(n.origLabel)
        if l is not None:
            for replace, match in rename_rules.items():  # HEH
                l = l.replace(match, replace)

        if l in labels:
            n._origLabel = l
            ns.append(n)

    sns = set(n.origLabel for n in ns)

    labels_set1 = labels_set0 - sns

    agen = [c.label for c in bc if c.autogenerated]
    sagen = set(agen)
    added = [c.label for c in bc if c.added]
    sadded = set(added)
    ans = []
    sans = set()
    missed = set()
    for n in bn_neurons:
        continue  # we actually get all of these with uberon, will map between them later
        # can't use capitalize here because there are proper names that stay uppercase
        l = n.label.replace('(swannt) ',
                            '').replace('Intrinsic',
                                        'intrinsic').replace('Projection',
                                                             'projection')

        for replace, match in rename_rules.items():  # HEH
            l = l.replace(match, replace)

        if l in agen:
            n._origLabel = l
            ans.append(n)
            sans.add(l)

        else:
            missed.add(l)

    agen_missing = sagen - sans
    labels_set2 = labels_set1 - sans

    nlx_labels = [c.label for c in bc if c.neurolex]
    snlx_labels = set(nlx_labels)

    class SourceCUT(resSource):
        sourceFile = 'nifstd/resources/common-usage-types.csv'  # FIXME relative to git workingdir...
        source_original = True

    sources = SourceCUT(),
    swanr = rdflib.Namespace(interlex_namespace('swanson/uris/readable/'))
    config = Config('common-usage-types-raw', sources=sources, source_file=relative_path(__file__),
                    prefixes={'swanr':swanr,
                              'SWAN':interlex_namespace('swanson/uris/neuroanatomical-terminology/terms/'),
                              'SWAA':interlex_namespace('swanson/uris/neuroanatomical-terminology/appendix/'),})
    ins = [None if OntId(n.id_).prefix == 'TEMP' else n.id_ for n in ns]
    ians = [None] * len(ans)
    def zap(pes):
        for pe in pes:
            if pe not in (Phenotype('BIRNLEX:212', ilxtr.hasTaxonRank),
                          Phenotype('NCBITaxon:7742', ilxtr.hasTaxonRank),
                          Phenotype('BIRNLEX:252', ilxtr.hasTaxonRank),
                          Phenotype('BIRNLEX:516', ilxtr.hasTaxonRank),):
                yield pe

    with Neuron(CUT.Mammalia):
        mamns = [NeuronCUT(*zap(n.pes), id_=i, label=n._origLabel, override=bool(i)).adopt_meta(n)
                 for i, n in zip(ins + ians, ns + ans)]

    contains_rules = make_contains_rules()

    skip = set()
    smatch = set()
    rem = {}
    for l in labels_set2:
        pes = tuple()
        l_rem = l
        for match, pheno in contains_rules.items():
            t = None
            if match not in skip and pheno == OntTerm:
                try:
                    t = OntTerm(term=match)
                    print('WTF', match, t)
                    if t.validated:
                        pheno = Phenotype(t.u, ilxtr.hasSomaLocatedIn)
                    else:
                        pheno = None
                except oq.exceptions.NotFoundError:
                    skip.add(match)
                    pheno = None
            if match in skip and pheno == OntTerm:
                pheno = None

            if match in l_rem and pheno:
                l_rem = l_rem.replace(match, '').strip()
                pes += (pheno,)
            
        if l_rem in exact_rules:
            pes += (exact_rules[l_rem],)
            l_rem = ''

        if l_rem == '  neuron':
            l_rem = ''
        elif l_rem.endswith('  cell'):
            l_rem = l_rem[:-len('  cell')]
            #print('l_rem no cell:', l_rem)
        elif l_rem.endswith('  neuron'):
            l_rem = l_rem[:-len('  neuron')]
            #print('l_rem no neuron:', l_rem)

        hrm = [pe for pe in pes if pe.e == ilxtr.hasSomaLocatedIn]
        if '  ' in l_rem:
            #print('l_rem:', l_rem)
            #embed()
            maybe_region, rest = l_rem.split('  ', 1)
        elif noneMembers(l_rem, *terminals) and not hrm:
            maybe_region, rest = l_rem, ''
            #print('MR:', maybe_region)
        else:
            #print(hrm)
            maybe_region = None

        if maybe_region:
            prefix_rank = ('UBERON', 'SWAN', 'BIRNLEX', 'SAO', 'NLXANAT')
            def key(ot):
                ranked = ot.prefix in prefix_rank
                arg = ot._query_result._QueryResult__query_args['term'].lower()
                return (not ranked,
                        prefix_rank.index(ot.prefix) if ranked else 0,
                        not (arg == ot.label.lower()))

            #t = OntTerm(term=maybe_region)
            # using query avoids the NoExplicitIdError
            ots = sorted((qr.OntTerm for qr in OntTerm.query(term=maybe_region,
                                                             exclude_prefix=('FMA',))), key=key)
            if not ots:
                log.error(f'No match for {maybe_region!r}')
            else:
                t = ots[0]
                if 'oboInOwl:id' in t.predicates:  # uberon replacement
                    t = OntTerm(t.predicates['oboInOwl:id'])

                t.set_next_repr('curie', 'label')
                log.info(f'Match for {maybe_region!r} was {t!r}')
                if t.validated:
                    l_rem = rest
                    pheno = Phenotype(t.u, ilxtr.hasSomaLocatedIn)  # FIXME
                    pes += (pheno,)

        if pes:
            smatch.add(l)
            rem[l] = l_rem

            with Neuron(CUT.Mammalia):
                NeuronCUT(*zap(pes), id_=make_cut_id(l), label=l, override=True)

    labels_set3 = labels_set2 - smatch
    added_unmapped = sadded & labels_set3

    # TODO preserve the names from neuronlex on import ...
    Neuron.write()
    Neuron.write_python()
    raw_neurons = config.neurons()
    config = Config('common-usage-types', sources=sources, source_file=relative_path(__file__),
                    prefixes={'swanr':swanr,
                              'SWAN':interlex_namespace('swanson/uris/neuroanatomical-terminology/terms/'),
                              'SWAA':interlex_namespace('swanson/uris/neuroanatomical-terminology/appendix/'),})
    ids_updated_neurons = [n.asUndeprecated() for n in raw_neurons]
    assert len(ids_updated_neurons) == len(raw_neurons)
    Neuron.write()
    Neuron.write_python()
    progress = len(labels_set0), len(sns), len(sans), len(smatch), len(labels_set1), len(labels_set2), len(labels_set3)
    print('\nProgress:\n'
          f'total:            {progress[0]}\n'
          f'from nlx:         {progress[1]}\n'
          f'from basic:       {progress[2]}\n'
          f'from match:       {progress[3]}\n'
          f'TODO after nlx:   {progress[4]}\n'
          f'TODO after basic: {progress[5]}\n'
          f'TODO after match: {progress[6]}\n')
    assert progress[0] == progress[1] + progress[4], 'neurolex does not add up'
    assert progress[4] == progress[2] + progress[5], 'basic does not add up'

    lnlx = set(n.lower() for n in snlx_labels)
    sos = set(n.origLabel.lower() if n.origLabel else None for n in ndl_neurons)  # FIXME load origLabel
    nlx_review = lnlx - sos
    nlx_missing = sorted(nlx_review)
    print(f'\nNeuroLex listed as source but no mapping (n = {len(nlx_review)}):')
    _ = [print(l) for l in nlx_missing]

    partial = {k:v for k, v in rem.items() if v and v not in terminals}
    print(f'\nPartially mapped (n = {len(partial)}):')
    if partial:
        mk = max((len(k) for k in partial.keys())) + 2
        for k, v in sorted(partial.items()):
            print(f'{k:<{mk}} {v!r}')
            #print(f'{k!r:<{mk}}{v!r}')
        #pprint(partial, width=200)
    unmapped = sorted(labels_set3)
    print(f'\nUnmapped (n = {len(labels_set3)}):')
    _ = [print(l) for l in unmapped]

    if __name__ == '__main__':
        rows = export_for_review(config, unmapped, partial, nlx_missing)
        embed()

    return config, unmapped, partial, nlx_missing