def process(): print("Starting up...") offset = localnode.index_offset nnodes = len( localnode.nodes() ) words_index = build_master_index() n = -1 new_index = dict() print("Processing hash by hash...") it = gen_files() outfile, outfile_name = it.next() for hash32 in words_index: n += 1 if n % nnodes != offset: continue data = load_hash32(hash32, words_index) if len(data) == 0: continue # hex decimal for word, content in data.iteritems(): print("writing data for: %s" % word) start_pos, end_pos, has_space = write_data_main(outfile, word, content) new_index[word] = { "file" : outfile_name, "start" : start_pos, "chunk_size" : end_pos - start_pos } print("Bytes: %d" % end_pos) if not has_space: print("%s is full" % outfile) outfile, outfile_name = it.send(outfile) print("moving on to %s" % outfile) it.send(outfile) return new_index
index = 0 for (word, count) in words.iteritems(): index += count if threshold <= index: return word def usage(cmd): print "Usage:", cmd, "word", "depth" print " - word: string, starting word for mumbler" print " - depth: int, how many words to generate" if __name__ == "__main__": if len(sys.argv) >= 3: start_word = sys.argv[1] try: start_count = int(sys.argv[2]) except ValueError: usage(sys.argv[0]) sys.exit(1) words_index = memcached if len(sys.argv) >= 4 and sys.argv[3] == "--slow": print ("Loading index files from disk, this takes a while...") words_index = build_master_index() else: usage(sys.argv[0]) sys.exit(1) print mumbler(start_word, start_count - 1, words_index)