Пример #1
0
def process():
    print("Starting up...")
    offset = localnode.index_offset
    nnodes = len( localnode.nodes() )
    words_index = build_master_index()
    n = -1
    new_index = dict()
    print("Processing hash by hash...")
    it = gen_files()
    outfile, outfile_name = it.next()
    for hash32 in words_index:
        n += 1
        if n % nnodes != offset:
            continue

        data = load_hash32(hash32, words_index)
        if len(data) == 0:
            continue
        # hex decimal
        for word, content in data.iteritems():
            print("writing data for: %s" % word)
            start_pos, end_pos, has_space = write_data_main(outfile, word, content)
            new_index[word] = {
              "file" : outfile_name,
              "start" : start_pos,
              "chunk_size" : end_pos - start_pos
            }
            print("Bytes: %d" % end_pos)
            if not has_space:
                print("%s is full" % outfile)
                outfile, outfile_name = it.send(outfile)
                print("moving on to %s" % outfile)

    it.send(outfile)

    return new_index
Пример #2
0
    index = 0
    for (word, count) in words.iteritems():
        index += count
        if threshold <= index:
            return word


def usage(cmd):
    print "Usage:", cmd, "word", "depth"
    print " - word: string, starting word for mumbler"
    print " - depth: int, how many words to generate"


if __name__ == "__main__":
    if len(sys.argv) >= 3:
        start_word = sys.argv[1]
        try:
            start_count = int(sys.argv[2])
        except ValueError:
            usage(sys.argv[0])
            sys.exit(1)
        words_index = memcached
        if len(sys.argv) >= 4 and sys.argv[3] == "--slow":
            print ("Loading index files from disk, this takes a while...")
            words_index = build_master_index()
    else:
        usage(sys.argv[0])
        sys.exit(1)

    print mumbler(start_word, start_count - 1, words_index)