Пример #1
0
def main():

    mrsty_file=sys.argv[3]
    original_filename=sys.argv[2]
    data_store_name=sys.argv[1]
    original_file=Text(bz2.BZ2File(original_filename, 'r'))
    print "Loading semantic types from %s" % mrsty_file
    stypes=SemanticTypes()
    stypes.build_from_mrsty_file(MRSTYTable(bz2.BZ2File(mrsty_file)))
    print "Semantic types loaded."
    print "Turning the data from %s into %s. Please wait." % (
            original_filename, data_store_name)
    data_store=StringDBDict(data_store_name, 
                            sync_every_transactions=0,
                            write_out_every_transactions=200000,
                            file_mode='c')
    data_store.sync_every=0
    build_concept_dictionary(original_file, data_store, stypes)
    data_store.sync_every=100
    print "Conversion done."
Пример #2
0
            trees[term] = TreeNode(term, role, synonyms, set(position))
    return trees


if __name__ == "__main__":
    # The pickling and unpickling make this horribly slow, so we'll trade some
    # memory for speed in the build process and later turn the dictionary into
    # a DB-backed one.
    tree_storage = {}
    for treefile in sys.argv[2:]:
        treesfile = bz2.BZ2File(treefile, 'rU')
        print "Reading %s..." % treefile
        tree_storage = build_tree_from_descriptor_file(treesfile, tree_storage)

    print "Tree built. It has %d unique terms." % len(tree_storage)
    print "For example... arm=", tree_storage['arm'], " and eye=", \
          tree_storage['eye']
    print "Done generating tree."
    print "Storing tree in", sys.argv[1]
    tree_on_disk = StringDBDict(persistent_file=sys.argv[1],
                                sync_every_transactions=0,
                                write_out_every_transactions=0,
                                file_mode='c')
    write_counter = 0
    for k, v in tree_storage.iteritems():
        tree_on_disk[k] = v
        write_counter += 1
        if write_counter % 1000 == 0:
            print "Stored", write_counter, "terms."
    tree_on_disk.sync_every = 1
    print "Done storing."
Пример #3
0
            trees[term]=TreeNode(term, role, synonyms, set(position))
    return trees


if __name__=="__main__":
    # The pickling and unpickling make this horribly slow, so we'll trade some
    # memory for speed in the build process and later turn the dictionary into
    # a DB-backed one.
    tree_storage={}
    for treefile in sys.argv[2:]:
        treesfile=bz2.BZ2File(treefile, 'rU')
        print "Reading %s..." % treefile
        tree_storage=build_tree_from_descriptor_file(treesfile, tree_storage)
    
    print "Tree built. It has %d unique terms." % len(tree_storage)
    print "For example... arm=", tree_storage['arm'], " and eye=", \
          tree_storage['eye']
    print "Done generating tree."
    print "Storing tree in", sys.argv[1]
    tree_on_disk=StringDBDict(persistent_file=sys.argv[1], 
                              sync_every_transactions=0,
                              write_out_every_transactions=0,
                              file_mode='c')
    write_counter=0
    for k,v in tree_storage.iteritems():
        tree_on_disk[k]=v
        write_counter+=1
        if write_counter % 1000 == 0:
            print "Stored", write_counter, "terms."
    tree_on_disk.sync_every=1
    print "Done storing."