示例#1
0
def load(args):
    """
    clics load /path/to/concepticon-data /path/to/glottolog
    """
    if len(args.args) != 2:
        raise ParserError(
            'concepticon and glottolog repos locations must be specified!')
    concepticon = Path(args.args[0])
    if not concepticon.exists():
        raise ParserError('concepticon repository does not exist')
    glottolog = Path(args.args[1])
    if not glottolog.exists():
        raise ParserError('glottolog repository does not exist')

    args.api.db.create(exists_ok=True)
    args.log.info('loading datasets into {0}'.format(args.api.db.fname))
    in_db = args.api.db.datasets
    for ds in iter_datasets():
        if args.unloaded and ds.id in in_db:
            args.log.info('skipping {0} - already loaded'.format(ds.id))
            continue
        args.log.info('loading {0}'.format(ds.id))
        args.api.db.load(ds)
    args.log.info('loading Concepticon data')
    args.api.db.load_concepticon_data(Concepticon(str(concepticon)))
    args.log.info('loading Glottolog data')
    args.api.db.load_glottolog_data(Glottolog(str(glottolog)))
    return
示例#2
0
def load(args):
    """
    clics load /path/to/concepticon-data /path/to/glottolog
    """
    if len(args.args) != 2:
        raise ParserError('concepticon and glottolog repos locations must be specified!')
    concepticon = Path(args.args[0])
    if not concepticon.exists():
        raise ParserError('concepticon repository does not exist')
    glottolog = Path(args.args[1])
    if not glottolog.exists():
        raise ParserError('glottolog repository does not exist')

    args.api.db.create(exists_ok=True)
    args.log.info('loading datasets into {0}'.format(args.api.db.fname))
    in_db = args.api.db.datasets
    for ds in iter_datasets():
        if args.unloaded and ds.id in in_db:
            args.log.info('skipping {0} - already loaded'.format(ds.id))
            continue
        args.log.info('loading {0}'.format(ds.id))
        args.api.db.load(ds)
    args.log.info('loading Concepticon data')
    args.api.db.load_concepticon_data(Concepticon(str(concepticon)))
    args.log.info('loading Glottolog data')
    args.api.db.load_glottolog_data(Glottolog(str(glottolog)))
    return
示例#3
0
def list_(args):
    """List datasets available for loading

    clics --lexibank-repos=PATH/TO/lexibank-data list
    """
    if args.unloaded:
        i = 0
        for i, ds in enumerate(iter_datasets()):
            print(ds.cldf_dir)
        if not i:
            print('No datasets installed')  # pragma: no cover
    else:
        table = Table('#', 'Dataset', 'Glosses', 'Concepticon', 'Varieties',
                      'Glottocodes', 'Families')
        try:
            concept_counts = {
                r[0]: r[1:]
                for r in args.api.db.fetchall('concepts_by_dataset')
            }
        except sqlite3.OperationalError:  # pragma: no cover
            print('No datasets loaded yet')
            return

        varieties = args.api.db.varieties
        var_counts = {}
        for dsid, vs in groupby(varieties, lambda v: v.source):
            vs = list(vs)
            var_counts[dsid] = (len(vs), len(set(v.glottocode for v in vs)),
                                len(set(v.family for v in vs)))

        for count, d in enumerate(args.api.db.datasets):
            table.append([
                count + 1,
                d.replace('lexibank-', ''),
                concept_counts[d][1],
                concept_counts[d][0],
                var_counts[d][0],
                var_counts[d][1],
                var_counts[d][2],
            ])
        table.append([
            '', 'TOTAL', 0,
            args.api.db.fetchone("""\
select
    count(distinct p.concepticon_id) from parametertable as p, formtable as f, languagetable as l
where
    f.parameter_id = p.id and f.dataset_id = p.dataset_id
    and f.language_id = l.id and f.dataset_id = l.dataset_id
    and l.glottocode is not null
    and l.family != 'Bookkeeping'
""")[0],
            len(varieties),
            len(set(v.glottocode for v in varieties)),
            len(set(v.family for v in varieties))
        ])
        print(table.render(tablefmt='simple'))
示例#4
0
def list_(args):
    """List datasets available for loading

    clics --lexibank-repos=PATH/TO/lexibank-data list
    """
    if args.unloaded:
        i = 0
        for i, ds in enumerate(iter_datasets()):
            print(ds.cldf_dir)
        if not i:
            print('No datasets installed')  # pragma: no cover
    else:
        table = Table(
            '#', 'Dataset', 'Glosses', 'Concepticon', 'Varieties', 'Glottocodes', 'Families')
        try:
            concept_counts = {r[0]: r[1:] for r in args.api.db.fetchall('concepts_by_dataset')}
        except sqlite3.OperationalError:  # pragma: no cover
            print('No datasets loaded yet')
            return

        varieties = args.api.db.varieties
        var_counts = {}
        for dsid, vs in groupby(varieties, lambda v: v.source):
            vs = list(vs)
            var_counts[dsid] = (
                len(vs), len(set(v.glottocode for v in vs)), len(set(v.family for v in vs)))

        for count, d in enumerate(args.api.db.datasets):
            table.append([
                count + 1,
                d.replace('lexibank-', ''),
                concept_counts[d][1],
                concept_counts[d][0],
                var_counts[d][0],
                var_counts[d][1],
                var_counts[d][2],
            ])
        table.append([
            '',
            'TOTAL',
            0,
            args.api.db.fetchone(
                """\
select
    count(distinct p.concepticon_id) from parametertable as p, formtable as f, languagetable as l
where
    f.parameter_id = p.id and f.dataset_id = p.dataset_id
    and f.language_id = l.id and f.dataset_id = l.dataset_id
    and l.glottocode is not null
    and l.family != 'Bookkeeping'
""")[0],
            len(varieties),
            len(set(v.glottocode for v in varieties)),
            len(set(v.family for v in varieties))
        ])
        print(table.render(tablefmt='simple'))
示例#5
0
 def datasets(self):
     return sorted(iter_datasets(glottolog=self.glottolog,
                                 concepticon=self.concepticon,
                                 verbose=True),
                   key=lambda d: d.id)