def main():
	vocab.init()

	sentence = 0
	for l in sys.stdin:
		sentence += 1
#		if not l: assert(0)
		# Skip blank lines
		if not string.strip(l): continue

		tree = parsetree.read_tree(l)
		assert tree != None
		if remove_quotation_marks:
			tree.prune_labels(["``", "''"])

		for n in tree.leaves():
			if lowercase_vocabulary:
				n.headword = string.lower(n.headword)
		tree = parsetree.refresh(tree)

		vocab.add(tree)
		del tree

		if sentence % 1000 == 0:
			debug(1, "Sentence #%d done" % sentence)
		elif sentence % 100 == 0:
			debug(2, "Sentence #%d done" % sentence)

	vocab.write()
示例#2
0
def init(schema, coll, pgConn, host, resourceUtil, workspacePath):
    global collections
    collections = coll;

    ensureIndexes(collections)

    files.init(collections, workspacePath)
    ckan.init(pgConn, schema)
    query.init(collections, host)
    vocab.init(schema, collections)
    delete.init(collections, workspacePath)
    workspace.init(collections, resourceUtil, workspacePath)
    mapreduce.init(collections, schema)
    push.init(collections)