Пример #1
0
    def test_add_inlg_e(self):
        from pyglottolog.monsterlib._libmonster import add_inlg_e, INLG

        res = add_inlg_e(
            {1: ('article', {'title': 'Grammar of language'})},
            load_triggers(tree=self.tree)[INLG],
            verbose=False)
        assert_equal(res[1][1][INLG], 'language [abc]')
Пример #2
0
    def test_add_inlg_e(self):
        from pyglottolog.monsterlib._libmonster import add_inlg_e, INLG

        res = add_inlg_e({1: ('article', {
            'title': 'Grammar of language'
        })},
                         self.api.triggers[INLG],
                         verbose=False)
        assert_equal(res[1][1][INLG], 'language [abc]')
Пример #3
0
def main(repos=DATA_DIR, rebuild=False):
    bibfiles = _bibfiles.Collection(references_path('bibtex', repos=repos))
    previous = references_path('monster.csv', repos=repos)
    replacements = build_path('monster-replacements.json', repos=repos)
    monster = _bibfiles.BibFile(
        build_path('monster-utf8.bib', repos=repos), encoding='utf-8', sortkey='bibkey')
    tree = languoids_path('tree', repos=repos)
    hht = HHTypes(repos=repos)

    print('%s open/rebuild bibfiles db' % time.ctime())
    db = bibfiles.to_sqlite(
        build_path('_bibfiles.sqlite3', repos=repos).as_posix(),
        rebuild=rebuild)

    print('%s compile_monster' % time.ctime())
    m = dict(db.merged())

    print('%s load hh.bib' % time.ctime())
    hhbib = bibfiles['hh.bib'].load()

    # Annotate with macro_area from lgcode when lgcode is assigned manually
    print('%s macro_area_from_lgcode' % time.ctime())
    m = macro_area_from_lgcode(m, tree)

    # Annotate with hhtype
    print('%s annotate hhtype' % time.ctime())
    m = markconservative(
        m,
        hht.triggers,
        hhbib,
        hht,
        build_path('monstermark-hht.txt', repos=repos),
        rank=lambda l: hht[l])

    ltriggers = languoids.load_triggers(tree=tree)

    # Annotate with lgcode
    print('%s annotate lgcode' % time.ctime())
    m = markconservative(
        m,
        ltriggers['lgcode'],
        hhbib,
        hht,
        build_path('monstermark-lgc.txt', repos=repos))

    # Annotate with inlg
    print('%s add_inlg_e' % time.ctime())
    m = add_inlg_e(m, ltriggers['inlg'])

    # Print some statistics
    stats = Counter()
    print(time.ctime())
    for t, f in m.values():
        stats.update(['entry'])
        for field in ['lgcode', 'hhtype', 'macro_area']:
            if field in f:
                stats.update([field])
    print("# entries", stats['entry'])
    for field in ['lgcode', 'hhtype', 'macro_area']:
        print("with " + field, stats[field])

    # Update the CSV with the previous mappings for later reference
    print('%s update_previous' % time.ctime())
    db.to_csvfile(previous)

    print('%s save_replacements' % time.ctime())
    db.to_replacements(replacements)

    # Trickling back
    print('%s trickle' % time.ctime())
    db.trickle(bibfiles)

    # Save
    print('%s save as utf8' % time.ctime())
    monster.save(m, verbose=False)

    print('%s done.' % time.ctime())
Пример #4
0
def compile(api, log=None, rebuild=False):
    log = log or logging.getLogger('pyglottolog')
    previous = api.references_path('monster.csv')
    replacements = api.references_path('replacements.json')
    monster = BibFile(fname=api.build_path('monster-utf8.bib'),
                      encoding='utf-8',
                      sortkey='bibkey')

    log.info('%s open/rebuild bibfiles db' % time.ctime())
    db = api.bibfiles.to_sqlite(api.build_path('_bibfiles.sqlite3'),
                                rebuild=rebuild)

    log.info('%s compile_monster' % time.ctime())
    m = dict(db.merged())

    log.info('%s load hh.bib' % time.ctime())
    hhbib = api.bibfiles['hh.bib'].load()

    # Annotate with macro_area from lgcode when lgcode is assigned manually
    log.info('%s macro_area_from_lgcode' % time.ctime())
    m = macro_area_from_lgcode(m, api.macroarea_map)

    # Annotate with hhtype
    log.info('%s annotate hhtype' % time.ctime())
    m = markconservative(m,
                         api.hhtypes.triggers,
                         hhbib,
                         api.hhtypes,
                         api.build_path('monstermark-hht.txt'),
                         rank=lambda l: api.hhtypes[l])

    # Annotate with lgcode
    log.info('%s annotate lgcode' % time.ctime())
    m = markconservative(m, api.triggers['lgcode'], hhbib, api.hhtypes,
                         api.build_path('monstermark-lgc.txt'))

    # Annotate with inlg
    log.info('%s add_inlg_e' % time.ctime())
    m = add_inlg_e(m, api.triggers['inlg'])

    # Print some statistics
    stats = Counter()
    log.info(time.ctime())
    for t, f in m.values():
        stats.update(['entry'])
        for field in ['lgcode', 'hhtype', 'macro_area']:
            if field in f:
                stats.update([field])
    log.info("# entries {0}".format(stats['entry']))
    for field in ['lgcode', 'hhtype', 'macro_area']:
        log.info("with {0}: {1}".format(field, stats[field]))

    # Update the CSV with the previous mappings for later reference
    log.info('%s update_previous' % time.ctime())
    db.to_csvfile(previous)

    log.info('%s save_replacements' % time.ctime())
    db.to_replacements(replacements)

    # Trickling back
    log.info('%s trickle' % time.ctime())
    db.trickle()

    # Save
    log.info('%s save as utf8' % time.ctime())
    monster.save(m)

    log.info('%s done.' % time.ctime())
Пример #5
0
def main(repos=DATA_DIR, rebuild=False):
    bibfiles = _bibfiles.Collection(references_path('bibtex', repos=repos))
    previous = references_path('monster.csv', repos=repos)
    replacements = build_path('monster-replacements.json', repos=repos)
    monster = _bibfiles.BibFile(
        build_path('monster-utf8.bib', repos=repos), encoding='utf-8', sortkey='bibkey')
    tree = languoids_path('tree', repos=repos)
    hht = HHTypes(repos=repos)

    print('%s open/rebuild bibfiles db' % time.ctime())
    db = bibfiles.to_sqlite(
        build_path('_bibfiles.sqlite3', repos=repos).as_posix(),
        rebuild=rebuild)

    print('%s compile_monster' % time.ctime())
    m = dict(db.merged())

    print('%s load hh.bib' % time.ctime())
    hhbib = bibfiles['hh.bib'].load()

    # Annotate with macro_area from lgcode when lgcode is assigned manually
    print('%s macro_area_from_lgcode' % time.ctime())
    m = macro_area_from_lgcode(m, tree)

    # Annotate with hhtype
    print('%s annotate hhtype' % time.ctime())
    m = markconservative(
        m,
        hht.triggers,
        hhbib,
        hht,
        build_path('monstermark-hht.txt', repos=repos),
        rank=lambda l: hht[l])

    ltriggers = languoids.load_triggers(tree=tree)

    # Annotate with lgcode
    print('%s annotate lgcode' % time.ctime())
    m = markconservative(
        m,
        ltriggers['lgcode'],
        hhbib,
        hht,
        build_path('monstermark-lgc.txt', repos=repos))

    # Annotate with inlg
    print('%s add_inlg_e' % time.ctime())
    m = add_inlg_e(m, ltriggers['inlg'])

    # Print some statistics
    stats = Counter()
    print(time.ctime())
    for t, f in m.values():
        stats.update(['entry'])
        for field in ['lgcode', 'hhtype', 'macro_area']:
            if field in f:
                stats.update([field])
    print("# entries", stats['entry'])
    for field in ['lgcode', 'hhtype', 'macro_area']:
        print("with " + field, stats[field])

    # Update the CSV with the previous mappings for later reference
    print('%s update_previous' % time.ctime())
    db.to_csvfile(previous)

    print('%s save_replacements' % time.ctime())
    db.to_replacements(replacements)

    # Trickling back
    print('%s trickle' % time.ctime())
    db.trickle(bibfiles)

    # Save
    print('%s save as utf8' % time.ctime())
    monster.save(m, verbose=False)

    print('%s done.' % time.ctime())