def bibtex(): bib = references_path("bibtex", "iso6393.bib") glottolog_ref_ids = {} if bib.exists(): with bib.open(encoding="utf8") as fp: for rec in fp.read().split("@misc"): if rec.strip(): rec = Source.from_bibtex("@misc" + rec) if "glottolog_ref_id" in rec: glottolog_ref_ids[rec.id] = rec["glottolog_ref_id"] with bib.open("w", encoding="utf8") as fp: for id_, rows in groupby(iter_change_requests(), lambda c: c["CR Number"]): fp.write(change_request_as_source(id_, list(rows), glottolog_ref_ids).bibtex()) fp.write("\n\n")
def bibtex(): bib = references_path('bibtex', 'iso6393.bib') glottolog_ref_ids = {} if bib.exists(): with bib.open(encoding='utf8') as fp: for rec in fp.read().split('@misc'): if rec.strip(): rec = Source.from_bibtex('@misc' + rec) if 'glottolog_ref_id' in rec: glottolog_ref_ids[rec.id] = rec['glottolog_ref_id'] with bib.open('w', encoding='utf8') as fp: for id_, rows in groupby(iter_change_requests(), lambda c: c['CR Number']): fp.write( change_request_as_source(id_, list(rows), glottolog_ref_ids).bibtex()) fp.write('\n\n')
# _bibfiles.py - ordered collection of bibfiles with load/save api import datetime from six import string_types from pyglottolog.util import references_path, read_ini from pyglottolog.monsterlib import _bibtex from pyglottolog.monsterlib._bibfiles_db import Database __all__ = ['Collection', 'BibFile', 'Database'] DIR = references_path('bibtex') class Collection(list): """Directory with an INI-file with settings for BibTeX files inside.""" _encoding = 'utf-8-sig' @classmethod def _bibfiles(cls, directory): """Read the INI-file, yield bibfile instances for sections.""" cfg = read_ini(directory.parent.joinpath('BIBFILES.ini')) for s in cfg.sections(): if not s.endswith('.bib'): continue filepath = directory.joinpath(s) assert filepath.exists() sortkey = cfg.get(s, 'sortkey') if sortkey.lower() == 'none':
def main(repos=DATA_DIR, rebuild=False): bibfiles = _bibfiles.Collection(references_path('bibtex', repos=repos)) previous = references_path('monster.csv', repos=repos) replacements = build_path('monster-replacements.json', repos=repos) monster = _bibfiles.BibFile( build_path('monster-utf8.bib', repos=repos), encoding='utf-8', sortkey='bibkey') tree = languoids_path('tree', repos=repos) hht = HHTypes(repos=repos) print('%s open/rebuild bibfiles db' % time.ctime()) db = bibfiles.to_sqlite( build_path('_bibfiles.sqlite3', repos=repos).as_posix(), rebuild=rebuild) print('%s compile_monster' % time.ctime()) m = dict(db.merged()) print('%s load hh.bib' % time.ctime()) hhbib = bibfiles['hh.bib'].load() # Annotate with macro_area from lgcode when lgcode is assigned manually print('%s macro_area_from_lgcode' % time.ctime()) m = macro_area_from_lgcode(m, tree) # Annotate with hhtype print('%s annotate hhtype' % time.ctime()) m = markconservative( m, hht.triggers, hhbib, hht, build_path('monstermark-hht.txt', repos=repos), rank=lambda l: hht[l]) ltriggers = languoids.load_triggers(tree=tree) # Annotate with lgcode print('%s annotate lgcode' % time.ctime()) m = markconservative( m, ltriggers['lgcode'], hhbib, hht, build_path('monstermark-lgc.txt', repos=repos)) # Annotate with inlg print('%s add_inlg_e' % time.ctime()) m = add_inlg_e(m, ltriggers['inlg']) # Print some statistics stats = Counter() print(time.ctime()) for t, f in m.values(): stats.update(['entry']) for field in ['lgcode', 'hhtype', 'macro_area']: if field in f: stats.update([field]) print("# entries", stats['entry']) for field in ['lgcode', 'hhtype', 'macro_area']: print("with " + field, stats[field]) # Update the CSV with the previous mappings for later reference print('%s update_previous' % time.ctime()) db.to_csvfile(previous) print('%s save_replacements' % time.ctime()) db.to_replacements(replacements) # Trickling back print('%s trickle' % time.ctime()) db.trickle(bibfiles) # Save print('%s save as utf8' % time.ctime()) monster.save(m, verbose=False) print('%s done.' % time.ctime())
from pyglottolog import languoids from pyglottolog.util import references_path, parse_conjunctions, read_ini from pyglottolog._bibtex_undiacritic import undiacritic __all__ = [ 'add_inlg_e', 'keyid', 'wrds', 'setd', 'setd3', 'indextrigs', 'lstat', 'lstat_witness', 'hhtype_to_n', 'expl_to_hhtype', 'lgcode', 'read_csv_dict', 'write_csv_rows', 'load_triggers', 'pitems', ] HHTYPE = references_path('alt4hhtype.ini') def read_csv_dict(filename): return {row[0]: row for row in csv_iterrows(filename)} def csv_iterrows(filename, fieldnames=None, dialect='excel'): with open(filename) as fd: reader = csv.reader(fd, dialect=dialect) if fieldnames is None: fieldnames = next(reader) make_row = namedtuple('Row', fieldnames)._make for row in reader: yield make_row(row)
def __init__(self, repos=None): ini = read_ini(references_path('hhtype.ini', repos=repos)) self._types = sorted([HHType(s, ini) for s in ini.sections()], reverse=True) self._type_by_id = {t.id: t for t in self._types}
def __init__(self, repos=None): ini = read_ini(references_path("hhtype.ini", repos=repos)) self._types = sorted([HHType(s, ini) for s in ini.sections()], reverse=True) self._type_by_id = {t.id: t for t in self._types}
# _bibfiles.py - ordered collection of bibfiles with load/save api import os import io import datetime from clldutils.inifile import INI from pyglottolog.util import references_path import _bibtex from _bibfiles_db import Database __all__ = ['Collection', 'BibFile', 'Database'] DIR = references_path('bibtex').as_posix() CONFIG = 'BIBFILES.ini' class Collection(list): """Directory with an INI-file with settings for BibTeX files inside.""" _encoding = 'utf-8-sig' @classmethod def _bibfiles(cls, directory, config, endwith): """Read the INI-file, yield bibfile instances for sections.""" cfg = INI(interpolation=None) cfg.read(os.path.join(directory, '..', config)) for s in cfg.sections(): if not s.endswith(endwith): continue
import json import sqlite3 import difflib import operator import itertools import contextlib import collections from pyglottolog.util import references_path, build_path import _bibtex __all__ = ['Database'] DBFILE = build_path('_bibfiles.sqlite3').as_posix() BIBFILE = build_path('monster-utf8.bib').as_posix() CSVFILE = references_path('monster.csv').as_posix() REPLACEMENTSFILE = build_path('monster-replacements.json').as_posix() UNION_FIELDS = {'fn', 'asjp_name', 'isbn'} IGNORE_FIELDS = {'crossref', 'numnote', 'glotto_id'} class Database(object): """Bibfile collection parsed into an sqlite3 file.""" @staticmethod def _get_bibfiles(bibfiles): if bibfiles is None: from _bibfiles import Collection return Collection()