# distributed_pagerank.py import mr s = 0.85 n = 100 # number of pages cluster = mr.cluster(1) # set up a distributed hash describing the random web cluster.create_dict("web.dict",xrange(n)) #cluster.map("initialize_web","web.dict",[n]) cluster.mr("test_mr",[],[],"web.dict","web.dict","test") iteration = 1 change = 2 # initial estimate of error tolerance = 1.0/n # desired final bound on error #while change > tolerance: # print "Iteration: "+str(iteration) # Run the MapReduce job used to compute the inner product # between the vector of dangling pages and the estimated # PageRank. # cluster.mr("ip",[n,s],[n,s],"web","web") # ip = cluster.get_value("ip_out",0) # Needed in case there are no dangling pages, in which case # MapReduce returns ip as None. # if ip == None: ip = 0
# test_mapreduce.py # # Used to test the mr.py MapReduce library's mapreduce functionality. # # The program has two parameters: n, and cluster_size, defined in the first # two lines of code. The program computes the sum of j*j over the range j # from 1 to n, using a MapReduce job running on cluster_size machines. # # The result should be n*(n+1)*(2n+1)/6 n = 500 cluster_size = 1 import mr cluster = mr.cluster(cluster_size) cluster.create_dict("integers.dict",xrange(n)) cluster.create_dict("sum_of_squares.dict",[0]) cluster.mr("test_mapreduce_fn.py",[],[],"integers.dict","sum_of_squares.dict","answer") print "Computing the sum of squares of integers from 1 to "+str(n)+"." print "The correct answer should be "+str(n*(n+1)*(2*n+1)/6)+"." print "The MapReduce job returns "+str(cluster.get_dict_value("sum_of_squares.dict",0)["answer"])+"." cluster.shutdown()