def main(): vocab.init() sentence = 0 for l in sys.stdin: sentence += 1 # if not l: assert(0) # Skip blank lines if not string.strip(l): continue tree = parsetree.read_tree(l) assert tree != None if remove_quotation_marks: tree.prune_labels(["``", "''"]) for n in tree.leaves(): if lowercase_vocabulary: n.headword = string.lower(n.headword) tree = parsetree.refresh(tree) vocab.add(tree) del tree if sentence % 1000 == 0: debug(1, "Sentence #%d done" % sentence) elif sentence % 100 == 0: debug(2, "Sentence #%d done" % sentence) vocab.write()
def init(schema, coll, pgConn, host, resourceUtil, workspacePath): global collections collections = coll; ensureIndexes(collections) files.init(collections, workspacePath) ckan.init(pgConn, schema) query.init(collections, host) vocab.init(schema, collections) delete.init(collections, workspacePath) workspace.init(collections, resourceUtil, workspacePath) mapreduce.init(collections, schema) push.init(collections)