def solve_puzzle(master, output, height, width, slaves): global HEIGHT, WIDTH, level HEIGHT = height WIDTH = width level = 0 sc = SparkContext(master, "python") sol = Sliding.solution(WIDTH, HEIGHT) """ YOUR CODE HERE """ sol = Sliding.board_to_hash(WIDTH, HEIGHT, sol) new_visited = [(sol, level)] new_visited = sc.parallelize(new_visited) num = 1 #while there are still (k, v) pairs at the current level while num: #use += as we do not retain board sets not at the global level #in our mapping function new_visited += new_visited.flatMap(bfs_map) if level % 4 == 3: # only reduce and filter every other iteration for performance reasons new_visited = new_visited.reduceByKey(bfs_reduce) new_visited = new_visited.partitionBy( PARTITION_COUNT) #figure out how to use hash num = new_visited.filter(filter_func).count( ) # count the number of elements in the RDD at the current level level += 1 # Debuggin purposes print("\n\n\nLevel " + str(level) + '\n\n\n') """ YOUR OUTPUT CODE HERE """ new_visited.coalesce(slaves).saveAsTextFile(output) sc.stop()
def get_children(_sc, position, graph): """ Returns the children of a position. Arguments: _sc : SparkContext The Spark Context configurations. position : list A position. Example: ('A', 'B', 'C', '-') graph : RDD RDD Object wich represents the graph. """ # print type(position) if "tuple" not in str(type(position)): print "not tuple" children = [Sliding.children(w, h, x) for x in position.collect()] print "start parallelizing" children = _sc.parallelize(sum(children, [])) print "children parallelized" return children children = _sc.parallelize(Sliding.children( w, h, position)) # obtain the children from position obtained return children
def solve_puzzle(master, output, height, width, slaves): global HEIGHT, WIDTH, level HEIGHT=height WIDTH=width level = 0 sc = SparkContext(master, "python") """ YOUR CODE HERE """ sol_board = Sliding.solution(WIDTH, HEIGHT) sol = Sliding.board_to_hash(WIDTH, HEIGHT, sol_board) all_sols = sc.parallelize([(sol, level)]) #create an RDD before_count = 1 k = 0 #counter for iterations of partitionBy c = 0 #counter for iterations of count() while True: level += 1 all_sols = all_sols.flatMap(bfs_map) if k%4 == 0: #every 4 iterations, use parititionBy all_sols = all_sols.partitionBy(PARTITION_COUNT) all_sols = all_sols.reduceByKey(bfs_reduce) if c%2 == 0: #every 2 iterations, use count() after_count = all_sols.count() if before_count == after_count: break before_count = after_count k += 1 c += 1 """ YOUR OUTPUT CODE HERE """ all_sols = all_sols.map(lambda a: (a[1], a[0])).sortByKey() all_sols.coalesce(slaves).saveAsTextFile(output) sc.stop()
def solve_puzzle(master, output, height, width, slaves): global HEIGHT, WIDTH, level HEIGHT=height WIDTH=width level = 0 sc = SparkContext(master, "python") """ YOUR CODE HERE """ """ YOUR MAP REDUCE PROCESSING CODE HERE """ solution=Sliding.solution(WIDTH, HEIGHT) sol = Sliding.board_to_hash(WIDTH, HEIGHT, solution) data = sc.parallelize([(sol,level),]) counter = 0 curLen = 1 while(counter < curLen): level += 1 data = data.flatMap(bfs_flat_map) if (level% 12 == 0): data = data.partitionBy(PARTITION_COUNT) data = data.reduceByKey(bfs_reduce) if (level% 6 == 0): counter = curLen curLen = data.count() """ YOUR OUTPUT CODE HERE """ data.coalesce(slaves).saveAsTextFile(output) sc.stop()
def bfs_map(value): """ YOUR CODE HERE """ return_list = [value] children = Sliding.children(HEIGHT, WIDTH, Sliding.hash_to_board(WIDTH, HEIGHT, value[0])) for child in children: return_list.append((Sliding.board_to_hash(WIDTH, HEIGHT, child), value[1]+1)) return return_list
def solve_puzzle(master, output, height, width, slaves): global HEIGHT, WIDTH, level HEIGHT = height WIDTH = width level = 0 sc = SparkContext(master, "python") """ YOUR CODE HERE """ """ YOUR MAP REDUCE PROCESSING CODE HERE """ solution = Sliding.solution(WIDTH, HEIGHT) sol = Sliding.board_to_hash(WIDTH, HEIGHT, solution) data = sc.parallelize([ (sol, level), ]) counter = 0 curLen = 1 while (counter < curLen): level += 1 data = data.flatMap(bfs_flat_map) if (level % 12 == 0): data = data.partitionBy(PARTITION_COUNT) data = data.reduceByKey(bfs_reduce) if (level % 6 == 0): counter = curLen curLen = data.count() """ YOUR OUTPUT CODE HERE """ data.coalesce(slaves).saveAsTextFile(output) sc.stop()
def solve_puzzle(master, output, height, width, slaves): global HEIGHT, WIDTH, level HEIGHT=height WIDTH=width level = 0 sc = SparkContext(master, "python") sol = Sliding.solution(WIDTH, HEIGHT) """ YOUR CODE HERE """ sol = Sliding.board_to_hash(WIDTH, HEIGHT, sol) new_visited = [(sol, level)] new_visited = sc.parallelize(new_visited) num = 1 #while there are still (k, v) pairs at the current level while num: #use += as we do not retain board sets not at the global level #in our mapping function new_visited += new_visited.flatMap(bfs_map) if level % 4 == 3: # only reduce and filter every other iteration for performance reasons new_visited = new_visited.reduceByKey(bfs_reduce) new_visited = new_visited.partitionBy(PARTITION_COUNT) #figure out how to use hash num = new_visited.filter(filter_func).count() # count the number of elements in the RDD at the current level level += 1 # Debuggin purposes print("\n\n\nLevel " + str(level) + '\n\n\n') """ YOUR OUTPUT CODE HERE """ new_visited.coalesce(slaves).saveAsTextFile(output) sc.stop()
def solve_puzzle(master, output, height, width, slaves): global HEIGHT, WIDTH, level HEIGHT=height WIDTH=width level = 0 sc = SparkContext(master, "python") """ YOUR CODE HERE """ sol = Sliding.board_to_hash(WIDTH, HEIGHT, Sliding.solution(WIDTH, HEIGHT)) RDD = sc.parallelize([(sol,level)]) count = RDD.count() RDD_count = 0 search = True k = 1 """ YOUR MAP REDUCE PROCESSING CODE HERE """ while search: if k % 3== 0: RDD = RDD.flatMap(bfs_map).partitionBy(PARTITION_COUNT).reduceByKey(bfs_reduce) #PUT PARTITION_COUNT FOR 16 else: RDD = RDD.flatMap(bfs_map).reduceByKey(bfs_reduce) if k % 2 == 0: RDD_count = RDD.count() if RDD_count == count: search = False count = RDD_count k = k + 1 level = level + 1 """ YOUR OUTPUT CODE HERE """ RDD = RDD.map(swap_map) RDD.coalesce(slaves).saveAsTextFile(output) #outputLst = RDD.collect() #for elem in outputLst: #output(str(elem[0]) + " " + str(elem[1])) #output the elements sc.stop()
def solve_puzzle(master, output, height, width, slaves): global HEIGHT, WIDTH, level HEIGHT = height WIDTH = width level = 0 sc = SparkContext(master, "python") """ YOUR CODE HERE """ # Global constants that will be shared across all map and reduce instances. # You can also reference these in any helper functions you write. global HEIGHT, WIDTH, level # Initialize global constants HEIGHT = height WIDTH = width level = 0 # this "constant" will change, but it remains constant for every MapReduce job # The solution configuration for this sliding puzzle. You will begin exploring the tree from this node sol = Sliding.solution(WIDTH, HEIGHT) """ YOUR MAP REDUCE PROCESSING CODE HERE """ soln = sc.parallelize([(Sliding.board_to_hash(WIDTH, HEIGHT, sol), 0)]) num = 1 temp = 0 while (temp != num): if (level % 16 == 0): soln = soln.partitionBy(PARTITION_COUNT, hash) level = level + 1 soln = soln.flatMap(bfs_map).reduceByKey(bfs_reduce) temp = num num = soln.count() """ YOUR OUTPUT CODE HERE """ soln.coalesce(slaves).saveAsTextFile(output) sc.stop()
def bfs_map(value): #value is the (puzzle, level) tuple """ YOUR CODE HERE """ lst = [(value)] if value[1] == level: children = Sliding.children(WIDTH,HEIGHT,Sliding.hash_to_board(WIDTH, HEIGHT, value[0])) for child in children: lst.append((Sliding.board_to_hash(WIDTH, HEIGHT, child),level+1)) return lst return lst
def bfs_map(value): """ YOUR CODE HERE """ result = [] if value[1] == level - 1: result = Sliding.children(WIDTH, HEIGHT, Sliding.hash_to_board(WIDTH, HEIGHT, value[0])) for i in range(0, len(result)): result[i] = (Sliding.board_to_hash(WIDTH, HEIGHT, result[i]), level) result.append(value) return result
def bfs_map(value): """ YOUR CODE HERE """ mapVal = [] mapVal.append((value[0], value[1])) if value[1] == level: pos = Sliding.hash_to_board(WIDTH, HEIGHT, value[0]) for cpos in Sliding.children(WIDTH, HEIGHT, pos): cpos2 = Sliding.board_to_hash(WIDTH, HEIGHT, cpos) mapVal.append((cpos2,level+1)) return mapVal
def bfs_map(value): """ YOUR CODE HERE """ if (value[1] != (level - 1)): return [value] else: children = Sliding.children(WIDTH, HEIGHT, Sliding.hash_to_board(WIDTH,HEIGHT,value[0])) childList = [value] for child in children: childList.append((Sliding.board_to_hash(WIDTH,HEIGHT,child), level)) return childList
def bfs_map(value): items = [] if value[1] < level: items.append((value[0],value[1])) if value[1] == level-1: children_board = Sliding.hash_to_board(WIDTH, HEIGHT, value[0]) children = Sliding.children(WIDTH, HEIGHT, children_board) for child in children: items.append((Sliding.board_to_hash(WIDTH, HEIGHT, child), value[1] + 1)) return items
def solve_sliding_puzzle(master, output, height, width): """ Solves a sliding puzzle of the provided height and width. master: specifies master url for the spark context output: function that accepts string to write to the output file height: height of puzzle width: width of puzzle """ # Set up the spark context. Use this to create your RDD sc = SparkContext(master, "python") # Global constants that will be shared across all map and reduce instances. # You can also reference these in any helper functions you write. global HEIGHT, WIDTH, level, prev_len, PARTITION_COUNT # Initialize global constants HEIGHT = height WIDTH = width level = 0 # this "constant" will change, but it remains constant for every MapReduce job # The solution configuration for this sliding puzzle. You will begin exploring the tree from this node sol = Sliding.solution(WIDTH, HEIGHT) level_nodes = sc.parallelize([(Sliding.board_to_hash(WIDTH, HEIGHT, sol), 0)]) PARTITION_COUNT = 16 prev_len = 0 count = 0 while True: level_nodes = level_nodes.flatMap(bfs_map).reduceByKey(bfs_reduce) next_len = level_nodes.count() if next_len == prev_len: break prev_len = next_len count += 1 if count == 10: count = 0 level_nodes = level_nodes.partitionBy(PARTITION_COUNT) """ YOUR MAP REDUCE PROCESSING CODE HERE """ # level = [] # def add_to_string(obj): # output(str(obj)) level_nodes = level_nodes.map(lambda x : (x[1], x[0])) output_string = "" for l in level_nodes.sortByKey(True).collect(): output_string += str(l) + "\n" output(output_string) # level_nodes.sortByKey(True).coalesce(1).saveAsTextFile("output") # level_nodes.foreach(add_to_string) """ YOUR OUTPUT CODE HERE """ sc.stop()
def solve_puzzle(master, output, height, width, slaves): global HEIGHT, WIDTH, level HEIGHT = height WIDTH = width level = 0 sc = SparkContext(master, "python") """ YOUR CODE HERE """ sol = Sliding.solution(WIDTH, HEIGHT) rdd = sc.parallelize([(sol, level)]) prev_count = 0 count = rdd.count() k = 0 i = 0 #put this here since I am assuming this part? #hashID = Sliding.board_to_hash(WIDTH, HEIGHT, sol) #board(obj) to hash(int) #either sol or value[0], is this here? while prev_count < count: rdd = rdd.flatMap(bfs_map) if k % 4 == 0: rdd = rdd.partitionBy(16, partitionHash) rdd = rdd.reduceByKey(bfs_reduce) level += 1 if i % 4 == 0: prev_count = count count = rdd.count() k += 1 i += 1 #nodes is an rdd #nodes.coalesce(NUM_WORKERS).saveAsTextFile(str(WIDTH) + "x" + str(HEIGHT) + "-output") # Let NUM_WORKERS be the number of workers (6 or 12) # replace num_workers with slaves? #rdd.coalesce(slaves).saveAsTextFile(output) # Let NUM_WORKERS be the number of workers (6 or 12), this is the new way IS IT SLAVES # for top line is NUM_WORKERS GLOBAL VARIABLE PARTITION_COUNT, or is it 6, 12 depending on some sort of if condition # ask manny for clarrification #hash_to_board(WIDTH, HEIGHT, hashID) #hash(int) to board(obj) #should be what we stored in hashID, this should be at top in map function right # do I save this instead as rdd? ask manny #hashID = board_to_hash(WIDTH, HEIGHT, value[0]) #board(obj) to hash(int) #either sol or value[0], is this here? #not sure if need to do #rdd = rdd.collect() #positions = rdd.collect() #positions = sorted(positions, key=lambda kv: kv[1]) #for pos in positions: #output = hashID = Sliding.board_to_hash( WIDTH, HEIGHT, sol) #board(obj) to hash(int) #either sol or value[0], is this here? slaves = 6 rdd.coalesce(slaves).saveAsTextFile( output ) # Let NUM_WORKERS be the number of workers (6 or 12), this is the new way IS IT SLAVES sc.stop()
def bfs_map(value): #value is the (puzzle, level) tuple """ YOUR CODE HERE """ lst = [(value)] if value[1] == level: children = Sliding.children( WIDTH, HEIGHT, Sliding.hash_to_board(WIDTH, HEIGHT, value[0])) for child in children: lst.append((Sliding.board_to_hash(WIDTH, HEIGHT, child), level + 1)) return lst return lst
def bfs_map(value): """ YOUR CODE HERE """ result = [] if value[1] == level - 1: result = Sliding.children( WIDTH, HEIGHT, Sliding.hash_to_board(WIDTH, HEIGHT, value[0])) for i in range(0, len(result)): result[i] = (Sliding.board_to_hash(WIDTH, HEIGHT, result[i]), level) result.append(value) return result
def bfs_map(arg): """ YOUR CODE HERE """ if arg[1] == level: children = Sliding.children( WIDTH, HEIGHT, Sliding.hash_to_board(WIDTH, HEIGHT, arg[0])) toreturn = [arg] for position in children: toreturn.append((Sliding.board_to_hash(WIDTH, HEIGHT, position), level + 1)) return toreturn else: return [arg]
def bfs_map(value): """ YOUR CODE HERE """ if (value[1] != (level - 1)): return [value] else: children = Sliding.children( WIDTH, HEIGHT, Sliding.hash_to_board(WIDTH, HEIGHT, value[0])) childList = [value] for child in children: childList.append((Sliding.board_to_hash(WIDTH, HEIGHT, child), level)) return childList
def bfs_flat_map(value): """ YOUR CODE HERE """ re = [] value = (value[0], value[1]) re.append(value) if value[1] == (level-1): #check if its the previous level children = Sliding.children(WIDTH, HEIGHT, value[0]) #children is a list of children for each in children: each = Sliding.board_to_hash(WIDTH, HEIGHT, each) #instead of storing boards as keys, we store the corresponding hashed ints as keys re.append(tuple((each, level))) return re
def bfs_flat_map(value): """ YOUR CODE HERE """ re = [] value = (value[0], value[1]) re.append(value) if value[1] == (level - 1): #check if its the previous level children = Sliding.children(WIDTH, HEIGHT, value[0]) #children is a list of children for each in children: each = Sliding.board_to_hash(WIDTH, HEIGHT, each) #instead of storing boards as keys, we store the corresponding hashed ints as keys re.append(tuple((each, level))) return re
def solve_puzzle(master, output, height, width, slaves): global HEIGHT, WIDTH, level HEIGHT=height WIDTH=width level = 0 sc = SparkContext(master, "python") """ YOUR CODE HERE """ sol = Sliding.solution(WIDTH, HEIGHT) rdd = sc.parallelize([(sol, level)]) prev_count = 0 count = rdd.count() k = 0 i = 0 #put this here since I am assuming this part? #hashID = Sliding.board_to_hash(WIDTH, HEIGHT, sol) #board(obj) to hash(int) #either sol or value[0], is this here? while prev_count < count: rdd = rdd.flatMap(bfs_map) if k % 4 == 0: rdd = rdd.partitionBy(16, partitionHash) rdd = rdd.reduceByKey(bfs_reduce) level += 1 if i % 4 == 0: prev_count = count count = rdd.count() k += 1 i += 1 #nodes is an rdd #nodes.coalesce(NUM_WORKERS).saveAsTextFile(str(WIDTH) + "x" + str(HEIGHT) + "-output") # Let NUM_WORKERS be the number of workers (6 or 12) # replace num_workers with slaves? #rdd.coalesce(slaves).saveAsTextFile(output) # Let NUM_WORKERS be the number of workers (6 or 12), this is the new way IS IT SLAVES # for top line is NUM_WORKERS GLOBAL VARIABLE PARTITION_COUNT, or is it 6, 12 depending on some sort of if condition # ask manny for clarrification #hash_to_board(WIDTH, HEIGHT, hashID) #hash(int) to board(obj) #should be what we stored in hashID, this should be at top in map function right # do I save this instead as rdd? ask manny #hashID = board_to_hash(WIDTH, HEIGHT, value[0]) #board(obj) to hash(int) #either sol or value[0], is this here? #not sure if need to do #rdd = rdd.collect() #positions = rdd.collect() #positions = sorted(positions, key=lambda kv: kv[1]) #for pos in positions: #output = hashID = Sliding.board_to_hash(WIDTH, HEIGHT, sol) #board(obj) to hash(int) #either sol or value[0], is this here? slaves = 6 rdd.coalesce(slaves).saveAsTextFile(output) # Let NUM_WORKERS be the number of workers (6 or 12), this is the new way IS IT SLAVES sc.stop()
def solve_puzzle(master, output, height, width, slaves): """ Solves a sliding puzzle of the provided height and width. master: specifies master url for the spark context output: function that accepts string to write to the output file height: height of puzzle width: width of puzzle """ # Set up the spark context. Use this to create your RDD sc = SparkContext(master, "python") # Global constants that will be shared across all map and reduce instances. # You can also reference these in any helper functions you write. global HEIGHT, WIDTH, level # Initialize global constants HEIGHT = height WIDTH = width level = 0 # this "constant" will change, but it remains constant for every MapReduce job #THIS MEANS THAT MAPREDUCE FROM LEVEL TO NEXT LEVEL # The solution configuration for this sliding puzzle. You will begin exploring the tree from this node sol = Sliding.solution(WIDTH, HEIGHT) """ YOUR MAP REDUCE PROCESSING CODE HERE """ rdd = [(Sliding.board_to_hash(WIDTH, HEIGHT, sol), 0)] prevcount = 0 c = 1 rdd = sc.parallelize(rdd) k = 0 j = 0 while c != prevcount: if k == 16: rdd = rdd.partitionBy(PARTITION_COUNT, hash) k = 0 rdd = rdd.flatMap(bfs_map) \ .reduceByKey(bfs_reduce, numPartitions=16) if j == 8: prevcount = c c = rdd.count() j = 0 j += 1 level += 1 k += 1 #finalsolution = rdd.collect() #sc.stop() rdd.coalesce(slaves).saveAsTextFile(output)
def solve_sliding_puzzle(master, output, height, width): """ Solves a sliding puzzle of the provided height and width. master: specifies master url for the spark context output: function that accepts string to write to the output file height: height of puzzle width: width of puzzle """ # Set up the spark context. Use this to create your RDD sc = SparkContext(master, "python") # Global constants that will be shared across all map and reduce instances. # You can also reference these in any helper functions you write. global HEIGHT, WIDTH, level # Initialize global constants HEIGHT = height WIDTH = width level = 0 # this "constant" will change, but it remains constant for every MapReduce job # The solution configuration for this sliding puzzle. You will begin exploring the tree from this node sol = Sliding.solution(WIDTH, HEIGHT) """ YOUR MAP REDUCE PROCESSING CODE HERE """ """ YOUR OUTPUT CODE HERE """ sc.stop()
def solve_sliding_puzzle(master, output, height, width): """ Solves a sliding puzzle of the provided height and width. master: specifies master url for the spark context output: function that accepts string to write to the output file height: height of puzzle width: width of puzzle """ # Set up the spark context. Use this to create your RDD sc = SparkContext(master, "python") # Global constants that will be shared across all map and reduce instances. # You can also reference these in any helper functions you write. global HEIGHT, WIDTH, level # Initialize global constants HEIGHT=height WIDTH=width level = 0 # this "constant" will change, but it remains constant for every MapReduce job # The solution configuration for this sliding puzzle. You will begin exploring the tree from this node sol = Sliding.solution(WIDTH, HEIGHT) """ YOUR MAP REDUCE PROCESSING CODE HERE """ """ YOUR OUTPUT CODE HERE """ sc.stop()
def bfs_map(value): """ YOUR CODE HERE """ while curr level has nodes: if level += 1 RDD = RDD.flatMap(lambda x: (x, level), Sliding.children()) bfs_map(each node)
def slidingBfsSolver(puzzle, width, height, max_level=-1): """ BF visita todo el grafo del puzzle, construye las estructuras: * level_to_pos * pos_to_level """ solution = puzzle # Solucion del puzzle level = 0 # La solucion es level 0 level_to_pos[level] = [solution] # el level 0 solo consiste de la solucion pos_to_level[solution] = level # Mientras existan posiciones en el level de la frontera while level_to_pos[level] and (max_level == -1 or level < max_level): level += 1 # Incrementamos el level level_to_pos[level] = [] # Creamos una lista vacia en el nuevo level # Para cada posicion en el ultimo level (antes de aumentarlo) for position in level_to_pos[level - 1]: # Para cada hijo de cada posicion del for de arriba for child in Sliding.children(width, height, position): # Si es primera vez que miramos al child if child not in pos_to_level: # Actualizamos los mappings para recordarlo, y que van a # ser parte de la nueva frontera pos_to_level[child] = level level_to_pos[level].append(child) del level_to_pos[level] # El ultimo level siempre esta vacio, lo eliminan pprint(level_to_pos)
def bfs_map(value): """ YOUR CODE HERE """ level += 1 # get children, make tuples, make a list resultList = [] result.append(value) #ensure parent is linked with children below result.append( Sliding.children().flatMap( lambda x: (x,level) ) )#get first item of board
def solve_puzzle(master, output, height, width, slaves): global HEIGHT, WIDTH, level HEIGHT=height WIDTH=width level = 0 sc = SparkContext(master, "python") """ YOUR CODE HERE """ NUM_WORKERS = slaves sol = Sliding.solution(WIDTH, HEIGHT) """ MAP REDUCE PROCESSING CODE HERE """ level_pos = sc.parallelize((make_state(level, sol),)) prev_size, size = 0, 1 while prev_size != size: level += 1 if level % 10 == 0: level_pos = level_pos.partitionBy(PARTITION_COUNT) level_pos = level_pos.flatMap(bfs_flat_map).reduceByKey(bfs_reduce) prev_size = size size = level_pos.count() """ OUTPUT CODE HERE """ level_pos = level_pos.map(unhash_board) level_pos.coalesce(NUM_WORKERS).saveAsTextFile(output) sc.stop()
def solve_puzzle(master, output, height, width, slaves): global HEIGHT, WIDTH, level HEIGHT = height WIDTH = width level = 0 sc = SparkContext(master, "python") """ YOUR CODE HERE """ NUM_WORKERS = slaves sol = Sliding.solution(WIDTH, HEIGHT) """ MAP REDUCE PROCESSING CODE HERE """ level_pos = sc.parallelize((make_state(level, sol), )) prev_size, size = 0, 1 while prev_size != size: level += 1 if level % 10 == 0: level_pos = level_pos.partitionBy(PARTITION_COUNT) level_pos = level_pos.flatMap(bfs_flat_map).reduceByKey(bfs_reduce) prev_size = size size = level_pos.count() """ OUTPUT CODE HERE """ level_pos = level_pos.map(unhash_board) level_pos.coalesce(NUM_WORKERS).saveAsTextFile(output) sc.stop()
def solve_puzzle(master, output, height, width, slaves): global HEIGHT, WIDTH, level HEIGHT = height WIDTH = width level = 0 sc = SparkContext(master, "python") sol = Sliding.solution(WIDTH, HEIGHT) """ YOUR CODE HERE """ myRdd = sc.parallelize([(sol, level) ]) # myRdd = [(('A', 'B', 'C', '-'), 0)] myRdd = myRdd.flatMap(bfs_flat_map).reduceByKey(bfs_reduce) prev_num = 0 pos_num = myRdd.count() while prev_num != pos_num: level += 1 prev_num = pos_num myRdd = myRdd.flatMap(bfs_flat_map) if level % 4 == 0: myRdd = myRdd.partitionBy(16) myRdd = myRdd.reduceByKey(bfs_reduce) pos_num = myRdd.count() """ YOUR OUTPUT CODE HERE """ myRdd = myRdd.map( lambda a: (a[1], a[0])).sortByKey().collect() # myRdd becomes a list for each in myRdd: output(str(each[0]) + " " + str(each[1])) sc.stop()
def solve_sliding_puzzle(master, output, height, width): """ Solves a sliding puzzle of the provided height and width. master: specifies master url for the spark context output: function that accepts string to write to the output file height: height of puzzle width: width of puzzle """ # Set up the spark context. Use this to create your RDD sc = SparkContext(master, "python") # Global constants that will be shared across all map and reduce instances. # You can also reference these in any helper functions you write. global HEIGHT, WIDTH, level # Initialize global constants HEIGHT = height WIDTH = width level = 0 # this "constant" will change, but it remains constant for every MapReduce job # The solution configuration for this sliding puzzle. You will begin exploring the tree from this node sol = Sliding.solution(WIDTH, HEIGHT) """ YOUR MAP REDUCE PROCESSING CODE HERE """ previous = 0 # num of element in previous rdd curr = 1 # num of element in rdd # The increment of curr from previous is the condition of while loop. # If it is larger than 0, it means we find new (pos, level), and we keep looping. # Otherwise, the while loop ends since there is no new (pos, level) pair. pos_to_level = [(sol, 0)] rdd = sc.parallelize(pos_to_level).partitionBy(16) # intitialization of rdd and partition by 16, a number we find can boost up our speed. while curr - previous > 0: rdd = rdd.flatMap(bfs_map, True).\ flatMap(bfs_map, True).\ flatMap(bfs_map, True).\ flatMap(bfs_map, True).reduceByKey(bfs_reduce, 16) # We do 4 flatMap same time to avoid using count(), a costly method, every time. previous = curr curr = rdd.count() level += 4 # Since we do 4 flatMap, level increases by 4 every time. level_to_pos = rdd.map(exchange, True).sortByKey(True).collect() # Exchange rdd(pos_to_level) to a new rdd contains level_to_pos. # Then the new one is SortByKey, so that (level, pos) pair is in order as level # increasing from the first pos,(0, sol), to final pos. # Finally, we collect() to make a list and assign it to level_to_pos """ YOUR OUTPUT CODE HERE """ # We use a while loop to do the output job. i = 0 while i < len(level_to_pos): output(str(level_to_pos[i][0]) + " " + str(level_to_pos[i][1])) i += 1 sc.stop()
def bfs_flat_map(state): """ """ self_list = [state] if get_level(state) == level-1: #expand children if state is on current (highest) level children = Sliding.children(WIDTH, HEIGHT, get_board(state)) return [make_state(level, board) for board in children] + self_list return self_list
def bfs_map(value): """ Takes in a key, value pair of (board state, level), creates all of the children of that board state if is on the same level as the global level, and returns them in a list. """ """ YOUR CODE HERE """ child_list = [] #Check if we are at the right level, so then we can make children of only those boards if value[1] == level: temp = Sliding.hash_to_board(WIDTH, HEIGHT, value[0]) iter_list = Sliding.children(WIDTH, HEIGHT, temp) for child in iter_list: child_list += [(Sliding.board_to_hash(WIDTH,HEIGHT, child), level + 1)] #Spark map only lets us return a list if we want multiple things. #Unlike Hadoop I believe which allows us to emit return child_list
def solve_puzzle(master, output, height, width, slaves): global HEIGHT, WIDTH, level HEIGHT=height WIDTH=width level = 0 sc = SparkContext(master, "python") """ YOUR CODE HERE """ sol = Sliding.board_to_hash(WIDTH, HEIGHT, Sliding.solution(WIDTH, HEIGHT)) RDD = sc.parallelize([(sol, level)]) counter = RDD.count() k, comp, data = 0, 0, 0 repar = 0 bound = (math.sqrt(WIDTH * HEIGHT)-1) * math.log(math.factorial(WIDTH * HEIGHT),2) # running mapreduce under lower bound while k <= bound: RDD = RDD.flatMap(bfs_map) if repar % 8 == 0: RDD = RDD.partitionBy(PARTITION_COUNT, hash) RDD = RDD.reduceByKey(bfs_reduce) level += 1 k += 1 repar += 1 k = 0 repar = 0 # running mapreduce until the number of elements in RDD stops increasing while True: RDD = RDD.flatMap(bfs_map) if repar % 8 == 0: RDD = RDD.partitionBy(PARTITION_COUNT, hash) RDD = RDD.reduceByKey(bfs_reduce) if k % 3 == 0: comp = RDD.count() if comp == counter: break else: counter = comp level += 1 k += 1 repar += 1 # output code RDD = RDD.map(revert_back) RDD.coalesce(6).saveAsTextFile(output) sc.stop()
def solve_puzzle(master, output, height, width, slaves): global HEIGHT, WIDTH, level HEIGHT = height WIDTH = width level = 0 sc = SparkContext(master, "python") """ YOUR CODE HERE """ sol = Sliding.board_to_hash(WIDTH, HEIGHT, Sliding.solution(WIDTH, HEIGHT)) RDD = sc.parallelize([(sol, level)]) counter = RDD.count() k, comp, data = 0, 0, 0 repar = 0 bound = (math.sqrt(WIDTH * HEIGHT) - 1) * math.log( math.factorial(WIDTH * HEIGHT), 2) # running mapreduce under lower bound while k <= bound: RDD = RDD.flatMap(bfs_map) if repar % 8 == 0: RDD = RDD.partitionBy(PARTITION_COUNT, hash) RDD = RDD.reduceByKey(bfs_reduce) level += 1 k += 1 repar += 1 k = 0 repar = 0 # running mapreduce until the number of elements in RDD stops increasing while True: RDD = RDD.flatMap(bfs_map) if repar % 8 == 0: RDD = RDD.partitionBy(PARTITION_COUNT, hash) RDD = RDD.reduceByKey(bfs_reduce) if k % 3 == 0: comp = RDD.count() if comp == counter: break else: counter = comp level += 1 k += 1 repar += 1 # output code RDD = RDD.map(revert_back) RDD.coalesce(6).saveAsTextFile(output) sc.stop()
def press_map(data): """ YOUR CODE HERE """ # get children output list of them if level == data[1]: # form k,v pairs from children return [data] + [(k, level+1) for k in Sliding.children(WIDTH,HEIGHT,data[0])] else: return [data]
def bfs_flatmap(board): if board[1] == (level - 1): children_list = Sliding.children(WIDTH, HEIGHT, board[0]) level_list = [level for _ in range(len(children_list))] children_list = zip(children_list, level_list) children_list.append(board) return children_list return [board]
def bfs_map(value): """ YOUR CODE HERE """ prev = [(value[0], value[1])] if value[1] == level: #convert from int to board # do I save it, not sure? ask Manny , using hashID since it was declared down? hashID = Sliding.board_to_hash(WIDTH, HEIGHT, prev[0][0]) #value[0] currBoard = Sliding.hash_to_board(WIDTH, HEIGHT, hashID) # ask manny if this is the correct to call this method # also what would number be? children = Sliding.children(WIDTH, HEIGHT, currBoard) # not sure value[0], currBoard #nextID = Sliding.board_to_hash(WIDTH, HEIGHT, children) curr = [] for i in range(0, len(children)): curr.append((children[i], level+1)) return prev + curr #nextID = Sliding.board_to_hash(WIDTH, HEIGHT, children[0]) #children[0] return prev
def solve_sliding_puzzle(master, output, height, width): """ Solves a sliding puzzle of the provided height and width. master: specifies master url for the spark context output: function that accepts string to write to the output file height: height of puzzle width: width of puzzle """ # Set up the spark context. Use this to create your RDD sc = SparkContext(master, "python") # Global constants that will be shared across all map and reduce instances. # You can also reference these in any helper functions you write. global HEIGHT, WIDTH, level print "wut\n\n\n\n\n\n\n\n" # Initialize global constants HEIGHT = height WIDTH = width level = 0 print("the value of level after initialization is: %d") % level # this "constant" will change, but it remains constant for every MapReduce job # The solution configuration for this sliding puzzle. You will begin exploring the tree from this node sol = Sliding.solution(WIDTH, HEIGHT) """ YOUR MAP REDUCE PROCESSING CODE HERE """ sol = (sol, level) #create initial tuple of solution board, level) temp = [] temp.append(sol) print "root after initialization is: xxxxxxxxxxxxxxxxxx " print temp RDD = sc.parallelize(temp) #or bfs_map(sol)? oldSize, newSize = 0, 1 while oldSize < newSize: level += 1 print "the value of level inside the loop is: %d" % level oldSize = RDD.count() RDD = RDD.flatMap(bfs_map).reduceByKey(bfs_reduce) newSize = RDD.count() print "value of newSize after mapreduce----------------------" print newSize RDD.collect() #lazyeval #RDD.bfs_map(sol).bfs_reduce().collect()# #call partition then collect.()? collect is serial/no parallel so be careful #base case: when there are no more boards at a level #global vars, reducing by keys -- """ YOUR OUTPUT CODE HERE """ for x in RDD: str(x) x.output sc.stop()
def solve_sliding_puzzle(master, output, height, width): """ Solves a sliding puzzle of the provided height and width. master: specifies master url for the spark context output: function that accepts string to write to the output file height: height of puzzle width: width of puzzle """ # Set up the spark context. Use this to create your RDD sc = SparkContext(master, "python") # Global constants that will be shared across all map and reduce instances. # You can also reference these in any helper functions you write. global HEIGHT, WIDTH, level # Initialize global constants HEIGHT=height WIDTH=width level = 0 # this "constant" will change, but it remains constant for every MapReduce job # The solution configuration for this sliding puzzle. You will begin exploring the tree from this node sol = Sliding.solution(WIDTH, HEIGHT) """ YOUR MAP REDUCE PROCESSING CODE HERE """ myRdd = sc.parallelize([(sol, level)]) # myRdd = [(('A', 'B', 'C', '-'), 0)] myRdd = myRdd.flatMap(bfs_flat_map).reduceByKey(bfs_reduce) prev_num = 0 pos_num = myRdd.count() while prev_num != pos_num: level+=1 prev_num = pos_num myRdd = myRdd.flatMap(bfs_flat_map) if level%4==0: myRdd = myRdd.partitionBy(16) myRdd = myRdd.reduceByKey(bfs_reduce) pos_num = myRdd.count() """ YOUR OUTPUT CODE HERE """ # myRdd = myRdd.map(lambda a: (a[1], a[0])).sortByKey().collect() # myRdd becomes a list # for each in myRdd: # output(str(each[0]) + " " + str(each[1])) myRdd = myRdd.map(lambda a: (Sliding.hash_to_board(WIDTH, HEIGHT, a[1]), a[0])).sortByKey() sc.stop()
def bfs_map(value): """ value: Taken an element from RDD bfs_map function only applies children() to each element at the last level in RDD; return :If an element is not at the last level, then it will be put in an empty list and return; return :If an element is at the last level, then its children and the element will be put into an empty list and return; """ lst = [] lst.append(value) value = (Sliding.hash_to_board(WIDTH,HEIGHT,value[0]), value[1]) if (value[1] < level): return lst children = Sliding.children(WIDTH, HEIGHT, value[0]) for each in children: lst.append(((Sliding.board_to_hash(WIDTH, HEIGHT, tuple(each))), value[1]+1)) return lst
def bfs_map(value): """ YOUR CODE HERE """ # Add board's children to global position list if value[1] == level: children = Sliding.children(WIDTH, HEIGHT, value[0]) for i in range(0, len(children)): positions.append((children[i], level+1)) # level += 1 return positions
def bfs_flat_map(value): """ YOUR CODE HERE """ # childrenLst=[] # childrenLst.append(value) # for child in Sliding.children(WIDTH,HEIGHT,Sliding.hash_to_board(WIDTH, HEIGHT, value[0])): # pair=[] # pair.append(Sliding.board_to_hash(WIDTH, HEIGHT, child)) # pair.append(level) # childrenLst.append(tuple(pair)) # return childrenLst childrenLst = [] childrenLst.append((value[0], value[1])) if(value[1] == level - 1): for child in Sliding.children(WIDTH, HEIGHT, Sliding.hash_to_board(WIDTH,HEIGHT, value[0])): childrenLst.append((Sliding.board_to_hash(WIDTH,HEIGHT,child), value[1]+1)) return childrenLst
def bfs_map(value): """ YOUR CODE HERE """ global level re_list = [] if value[1] == level - 1: childrenlist = Sliding.children(WIDTH,HEIGHT,value[0]) for child in childrenlist: re_list.append([child, level]) return [value] + re_list
def bfs_map(value): """ Takes in a key, value pair of (board state, level), creates all of the children of that board state if is on the same level as the global level, and returns them in a list. """ """ YOUR CODE HERE """ child_list = [] #Check if we are at the right level, so then we can make children of only those boards if value[1] == level: temp = Sliding.hash_to_board(WIDTH, HEIGHT, value[0]) iter_list = Sliding.children(WIDTH, HEIGHT, temp) for child in iter_list: child_list += [(Sliding.board_to_hash(WIDTH, HEIGHT, child), level + 1)] #Spark map only lets us return a list if we want multiple things. #Unlike Hadoop I believe which allows us to emit return child_list
def bfs_map(value): """ YOUR CODE HERE """ mapped=[value] if level<=value[1] children = Sliding.children(WIDTH, HEIGHT, value[0]) for pos in children: val=value[1]+1 mapped.append([pos,val]) return mapped
def solve_sliding_puzzle(master, output, height, width): """ Solves a sliding puzzle of the provided height and width. master: specifies master url for the spark context output: function that accepts string to write to the output file height: height of puzzle width: width of puzzle """ # Set up the spark context. Use this to create your RDD sc = SparkContext(master, "python") # Global constants that will be shared across all map and reduce instances. # You can also reference these in any helper functions you write. global HEIGHT, WIDTH, level # Initialize global constants HEIGHT=height WIDTH=width level = 0 # this "constant" will change, but it remains constant for every MapReduce job # The solution configuration for this sliding puzzle. You will begin exploring the tree from this node sol = Sliding.solution(WIDTH, HEIGHT) myRDD = sc.parallelize([(sol, level)]) counter = 0 counter = myRDD.count() k = 0 comp = 0 repar = 0 while k <= (math.sqrt(WIDTH * HEIGHT)-1) * math.log(math.factorial(WIDTH * HEIGHT),2): myRDD = myRDD.flatMap(sol_map) if (repar % 8 == 0): myRDD = myRDD.partitionBy(6) myRDD = myRDD.reduceByKey(bfs_reduce) repar += 1 level += 1 k += 1 k = 0 while True: myRDD = myRDD.flatMap(sol_map) myRDD = myRDD.reduceByKey(bfs_reduce) if k % 3 == 0: comp = myRDD.count() if comp == counter: break else: counter = comp level += 1 k += 1 myRDD = myRDD.map(bfs_map).collect() result = "" for each in myRDD: result += str(each) + "\n" output(result) sc.stop()
def bfs_map(value): # value is a (position, level) pair result = [value] if value[1] == level: # if the node is at frontier for child in Sliding.children(WIDTH, HEIGHT, value[0]): result.append((child, level + 1)) return result
def bfs_map(value): """ YOUR CODE HERE """ # Add board's children to global position list if value[1] == level: children = Sliding.children(WIDTH, HEIGHT, value[0]) for i in range(0, len(children)): positions.append((children[i], level + 1)) # level += 1 return positions
def bfs_map(value): """ YOUR CODE HERE """ result = [] if value[1] == level - 1: result = Sliding.children(WIDTH, HEIGHT, value[0]) for i in range(0, len(result)): result[i] = (result[i], level) result.append(value) return result
def solve_sliding_puzzle(master, output, height, width): """ Solves a sliding puzzle of the provided height and width. master: specifies master url for the spark context output: function that accepts string to write to the output file height: height of puzzle width: width of puzzle """ # Set up the spark context. Use this to create your RDD sc = SparkContext(master, "python") # Global constants that will be shared across all map and reduce instances. # You can also reference these in any helper functions you write. global HEIGHT, WIDTH, level # Initialize global constants HEIGHT = height WIDTH = width level = 0 # this "constant" will change, but it remains constant for every MapReduce job # The solution configuration for this sliding puzzle. You will begin exploring the tree from this node sol = Sliding.solution(WIDTH, HEIGHT) """ YOUR MAP REDUCE PROCESSING CODE HERE """ myRdd = sc.parallelize([(sol, level) ]) # myRdd = [(('A', 'B', 'C', '-'), 0)] myRdd = myRdd.flatMap(bfs_flat_map).reduceByKey(bfs_reduce) prev_num = 0 pos_num = myRdd.count() while prev_num != pos_num: level += 1 prev_num = pos_num myRdd = myRdd.flatMap(bfs_flat_map) if level % 4 == 0: myRdd = myRdd.partitionBy(16) myRdd = myRdd.reduceByKey(bfs_reduce) pos_num = myRdd.count() """ YOUR OUTPUT CODE HERE """ # myRdd = myRdd.map(lambda a: (a[1], a[0])).sortByKey().collect() # myRdd becomes a list # for each in myRdd: # output(str(each[0]) + " " + str(each[1])) myRdd = myRdd.map(lambda a: (Sliding.hash_to_board(WIDTH, HEIGHT, a[1]), a[ 0])).sortByKey() sc.stop()
def solve_puzzle(master, output, height, width, slaves): global HEIGHT, WIDTH, level HEIGHT = height WIDTH = width level = 0 sc = SparkContext(master, "python") """ YOUR CODE HERE """ sol = Sliding.solution(WIDTH, HEIGHT) hashID = Sliding.board_to_hash( WIDTH, HEIGHT, sol) #board(obj) to hash(int) #either sol or value[0], is this here? rdd = sc.parallelize([(hashID, level)]) prev_count = 0 count = rdd.count() k = 0 i = 0 #put this here since I am assuming this part? next try uncomment this #hashID = Sliding.board_to_hash(WIDTH, HEIGHT, sol) #board(obj) to hash(int) #either sol or value[0], is this here? while prev_count < count: rdd = rdd.flatMap(bfs_map) if k % 4 == 0: #board = Sliding.hash_to_board(WIDTH, HEIGHT, hashID) rdd = rdd.partitionBy(16, hash) #else try Sliding.board_to_hash rdd = rdd.reduceByKey(bfs_reduce) level += 1 if i % 4 == 0: prev_count = count count = rdd.count() k += 1 i += 1 boardState = Sliding.board_to_hash( WIDTH, HEIGHT, sol ) #board(obj) to hash(int) #either sol or value[0], is this here?, so it is an int #PARTITION_COUNT = slaves #slaves = 12 #output = str(pos[1]) + " " + str(pos[0]) rdd.coalesce(slaves).saveAsTextFile( output ) # Let NUM_WORKERS be the number of workers (6 or 12), this is the new way IS IT SLAVES sc.stop()
def bfs_map(value): """ YOUR CODE HERE """ #value is the tuple lst = [(value)] if value[1] == level: children = Sliding.children(WIDTH,HEIGHT,value[0]) #list of children for child in children: lst.append((child,level+1)) return lst return lst
def bfs_map(value): """ YOUR CODE HERE """ if (value[1] != (level - 1)): return [value] else: children = Sliding.children(WIDTH, HEIGHT, value[0]) childList = [value] for child in children: childList.append((child, level)) return childList
def bfs_map(value): """ YOUR CODE HERE """ prev = [(value[0], value[1])] if value[1] == level: children = Sliding.children(WIDTH, HEIGHT, value[0]) curr = [] for i in range(0, len(children)): curr.append((children[i], level+1)) return prev + curr return prev