Exemplo n.º 1
0
def lucvergpop(request, lucverg_metadata):
    conn = TessMongoConnection('localhost', 27017, None, None, 'lucvergtest')
    for metadata in lucverg_metadata:
        text = Text.json_decode(metadata)
        tessfile = TessFile(text.path, metadata=text)

        conn.insert(text)

        tokens, tags, features = \
            LatinTokenizer(conn).tokenize(
                tessfile.read(), text=tessfile.metadata)

        feature_cache = {
            (f.feature, f.token): f
            for f in conn.find(Feature.collection, language=text.language)
        }
        features_for_insert = []
        features_for_update = []

        for f in features:
            if (f.feature, f.token) not in feature_cache:
                features_for_insert.append(f)
                feature_cache[(f.feature, f.token)] = f
            else:
                f.id = feature_cache[(f.feature, f.token)].id
                features_for_update.append(f)
        conn.insert(features_for_insert)
        conn.update(features_for_update)

        unitizer = Unitizer()
        lines, _ = unitizer.unitize(tokens, tags, tessfile.metadata)

        conn.insert_nocheck(lines)
    yield conn
    obliterate(conn)
Exemplo n.º 2
0
def main():
    args = parse_args()
    if args.password:
        password = getpass.getpass(prompt='Tesserae MongoDB Password: '******'utf-8') as ifh:
        raw_updates = json.load(ifh)
    connection.update([Text.json_decode(t) for t in raw_updates])