def main(): mrsty_file=sys.argv[3] original_filename=sys.argv[2] data_store_name=sys.argv[1] original_file=Text(bz2.BZ2File(original_filename, 'r')) print "Loading semantic types from %s" % mrsty_file stypes=SemanticTypes() stypes.build_from_mrsty_file(MRSTYTable(bz2.BZ2File(mrsty_file))) print "Semantic types loaded." print "Turning the data from %s into %s. Please wait." % ( original_filename, data_store_name) data_store=StringDBDict(data_store_name, sync_every_transactions=0, write_out_every_transactions=200000, file_mode='c') data_store.sync_every=0 build_concept_dictionary(original_file, data_store, stypes) data_store.sync_every=100 print "Conversion done."
trees[term] = TreeNode(term, role, synonyms, set(position)) return trees if __name__ == "__main__": # The pickling and unpickling make this horribly slow, so we'll trade some # memory for speed in the build process and later turn the dictionary into # a DB-backed one. tree_storage = {} for treefile in sys.argv[2:]: treesfile = bz2.BZ2File(treefile, 'rU') print "Reading %s..." % treefile tree_storage = build_tree_from_descriptor_file(treesfile, tree_storage) print "Tree built. It has %d unique terms." % len(tree_storage) print "For example... arm=", tree_storage['arm'], " and eye=", \ tree_storage['eye'] print "Done generating tree." print "Storing tree in", sys.argv[1] tree_on_disk = StringDBDict(persistent_file=sys.argv[1], sync_every_transactions=0, write_out_every_transactions=0, file_mode='c') write_counter = 0 for k, v in tree_storage.iteritems(): tree_on_disk[k] = v write_counter += 1 if write_counter % 1000 == 0: print "Stored", write_counter, "terms." tree_on_disk.sync_every = 1 print "Done storing."
trees[term]=TreeNode(term, role, synonyms, set(position)) return trees if __name__=="__main__": # The pickling and unpickling make this horribly slow, so we'll trade some # memory for speed in the build process and later turn the dictionary into # a DB-backed one. tree_storage={} for treefile in sys.argv[2:]: treesfile=bz2.BZ2File(treefile, 'rU') print "Reading %s..." % treefile tree_storage=build_tree_from_descriptor_file(treesfile, tree_storage) print "Tree built. It has %d unique terms." % len(tree_storage) print "For example... arm=", tree_storage['arm'], " and eye=", \ tree_storage['eye'] print "Done generating tree." print "Storing tree in", sys.argv[1] tree_on_disk=StringDBDict(persistent_file=sys.argv[1], sync_every_transactions=0, write_out_every_transactions=0, file_mode='c') write_counter=0 for k,v in tree_storage.iteritems(): tree_on_disk[k]=v write_counter+=1 if write_counter % 1000 == 0: print "Stored", write_counter, "terms." tree_on_disk.sync_every=1 print "Done storing."