def load(args): """ clics load /path/to/concepticon-data /path/to/glottolog """ if len(args.args) != 2: raise ParserError( 'concepticon and glottolog repos locations must be specified!') concepticon = Path(args.args[0]) if not concepticon.exists(): raise ParserError('concepticon repository does not exist') glottolog = Path(args.args[1]) if not glottolog.exists(): raise ParserError('glottolog repository does not exist') args.api.db.create(exists_ok=True) args.log.info('loading datasets into {0}'.format(args.api.db.fname)) in_db = args.api.db.datasets for ds in iter_datasets(): if args.unloaded and ds.id in in_db: args.log.info('skipping {0} - already loaded'.format(ds.id)) continue args.log.info('loading {0}'.format(ds.id)) args.api.db.load(ds) args.log.info('loading Concepticon data') args.api.db.load_concepticon_data(Concepticon(str(concepticon))) args.log.info('loading Glottolog data') args.api.db.load_glottolog_data(Glottolog(str(glottolog))) return
def load(args): """ clics load /path/to/concepticon-data /path/to/glottolog """ if len(args.args) != 2: raise ParserError('concepticon and glottolog repos locations must be specified!') concepticon = Path(args.args[0]) if not concepticon.exists(): raise ParserError('concepticon repository does not exist') glottolog = Path(args.args[1]) if not glottolog.exists(): raise ParserError('glottolog repository does not exist') args.api.db.create(exists_ok=True) args.log.info('loading datasets into {0}'.format(args.api.db.fname)) in_db = args.api.db.datasets for ds in iter_datasets(): if args.unloaded and ds.id in in_db: args.log.info('skipping {0} - already loaded'.format(ds.id)) continue args.log.info('loading {0}'.format(ds.id)) args.api.db.load(ds) args.log.info('loading Concepticon data') args.api.db.load_concepticon_data(Concepticon(str(concepticon))) args.log.info('loading Glottolog data') args.api.db.load_glottolog_data(Glottolog(str(glottolog))) return
def list_(args): """List datasets available for loading clics --lexibank-repos=PATH/TO/lexibank-data list """ if args.unloaded: i = 0 for i, ds in enumerate(iter_datasets()): print(ds.cldf_dir) if not i: print('No datasets installed') # pragma: no cover else: table = Table('#', 'Dataset', 'Glosses', 'Concepticon', 'Varieties', 'Glottocodes', 'Families') try: concept_counts = { r[0]: r[1:] for r in args.api.db.fetchall('concepts_by_dataset') } except sqlite3.OperationalError: # pragma: no cover print('No datasets loaded yet') return varieties = args.api.db.varieties var_counts = {} for dsid, vs in groupby(varieties, lambda v: v.source): vs = list(vs) var_counts[dsid] = (len(vs), len(set(v.glottocode for v in vs)), len(set(v.family for v in vs))) for count, d in enumerate(args.api.db.datasets): table.append([ count + 1, d.replace('lexibank-', ''), concept_counts[d][1], concept_counts[d][0], var_counts[d][0], var_counts[d][1], var_counts[d][2], ]) table.append([ '', 'TOTAL', 0, args.api.db.fetchone("""\ select count(distinct p.concepticon_id) from parametertable as p, formtable as f, languagetable as l where f.parameter_id = p.id and f.dataset_id = p.dataset_id and f.language_id = l.id and f.dataset_id = l.dataset_id and l.glottocode is not null and l.family != 'Bookkeeping' """)[0], len(varieties), len(set(v.glottocode for v in varieties)), len(set(v.family for v in varieties)) ]) print(table.render(tablefmt='simple'))
def list_(args): """List datasets available for loading clics --lexibank-repos=PATH/TO/lexibank-data list """ if args.unloaded: i = 0 for i, ds in enumerate(iter_datasets()): print(ds.cldf_dir) if not i: print('No datasets installed') # pragma: no cover else: table = Table( '#', 'Dataset', 'Glosses', 'Concepticon', 'Varieties', 'Glottocodes', 'Families') try: concept_counts = {r[0]: r[1:] for r in args.api.db.fetchall('concepts_by_dataset')} except sqlite3.OperationalError: # pragma: no cover print('No datasets loaded yet') return varieties = args.api.db.varieties var_counts = {} for dsid, vs in groupby(varieties, lambda v: v.source): vs = list(vs) var_counts[dsid] = ( len(vs), len(set(v.glottocode for v in vs)), len(set(v.family for v in vs))) for count, d in enumerate(args.api.db.datasets): table.append([ count + 1, d.replace('lexibank-', ''), concept_counts[d][1], concept_counts[d][0], var_counts[d][0], var_counts[d][1], var_counts[d][2], ]) table.append([ '', 'TOTAL', 0, args.api.db.fetchone( """\ select count(distinct p.concepticon_id) from parametertable as p, formtable as f, languagetable as l where f.parameter_id = p.id and f.dataset_id = p.dataset_id and f.language_id = l.id and f.dataset_id = l.dataset_id and l.glottocode is not null and l.family != 'Bookkeeping' """)[0], len(varieties), len(set(v.glottocode for v in varieties)), len(set(v.family for v in varieties)) ]) print(table.render(tablefmt='simple'))
def datasets(self): return sorted(iter_datasets(glottolog=self.glottolog, concepticon=self.concepticon, verbose=True), key=lambda d: d.id)