def postfunc_no_combine(_): GlobalSocket.pipe_to_cpp.send("Functional#reduce_by_key") GlobalSocket.pipe_to_cpp.send(GlobalVar.reduce_by_key_list) GlobalSocket.pipe_to_cpp.send(str(len(GlobalVar.reduce_by_key_store))) for (x, y) in GlobalVar.reduce_by_key_store: GlobalSocket.pipe_to_cpp.send(Serializer.dumps(x)) GlobalSocket.pipe_to_cpp.send(Serializer.dumps(y)) GlobalVar.reduce_by_key_store = []
def load(op): GlobalSocket.pipe_to_cpp.send("Functional#group_by_key_end") GlobalSocket.pipe_to_cpp.send(op.op_param[OperationParam.list_str]) while True: key = GlobalSocket.pipe_from_cpp.recv() if not key: break key = Serializer.loads(key) num = int(GlobalSocket.pipe_from_cpp.recv()) res = [] for _ in xrange(num): recv = Serializer.loads(GlobalSocket.pipe_from_cpp.recv()) res.append(recv) yield [(key, res)]
def postfunc_combine_hash(op): # send out reduce_by_key_store GlobalSocket.pipe_to_cpp.send("Functional#reduce_by_key") GlobalSocket.pipe_to_cpp.send(GlobalVar.reduce_by_key_list) send_buffer = dict() for x, y in GlobalVar.reduce_by_key_store: if x in send_buffer: send_buffer[x] = op.func(send_buffer[x], y) else: send_buffer[x] = y GlobalSocket.pipe_to_cpp.send(str(len(send_buffer))) for x, y in send_buffer.iteritems(): GlobalSocket.pipe_to_cpp.send(Serializer.dumps(x)) GlobalSocket.pipe_to_cpp.send(Serializer.dumps(y))
def load(op): data = Serializer.loads(op.op_param[OperationParam.data_str]) i = GlobalVar.global_id while i < len(data): # print data[i] yield [data[i]] i += GlobalVar.num_workers
def load(op): GlobalSocket.pipe_to_cpp.send("Functional#reduce_by_key_end") GlobalSocket.pipe_to_cpp.send(op.op_param[OperationParam.list_str]) func = op.func while True: key = GlobalSocket.pipe_from_cpp.recv() if not key: break key = Serializer.loads(key) num = int(GlobalSocket.pipe_from_cpp.recv()) res = None for _ in xrange(num): recv = Serializer.loads(GlobalSocket.pipe_from_cpp.recv()) res = recv if res is None else func(res, recv) yield [(key, res)]
def load(op): GlobalSocket.pipe_to_cpp.send("Functional#distinct_end") GlobalSocket.pipe_to_cpp.send(op.op_param[OperationParam.list_str]) while True: value = GlobalSocket.pipe_from_cpp.recv() if not value: break value = Serializer.loads(value) yield [value]
def postfunc_combine_2(op): # send out reduce_by_key_store GlobalSocket.pipe_to_cpp.send("Functional#reduce_by_key") GlobalSocket.pipe_to_cpp.send(GlobalVar.reduce_by_key_list) # combine send_buffer = [] def reduce_func(x, y): return x[0], op.func(x[1], y[1]) for _, y in groupby(sorted(GlobalVar.reduce_by_key_store), key=lambda x: x[0]): k, v = reduce(reduce_func, y) send_buffer.append(Serializer.dumps(k)) send_buffer.append(Serializer.dumps(v)) GlobalSocket.pipe_to_cpp.send(str(len(send_buffer) / 2)) for x in send_buffer: GlobalSocket.pipe_to_cpp.send(x)
def end_postfunc(_): GlobalSocket.pipe_to_cpp.send("Functional#reduce_end") GlobalSocket.pipe_to_cpp.send(Serializer.dumps(GlobalVar.reduce_res)) res = None while True: recv_str = GlobalSocket.pipe_from_cpp.recv() # fdebug.write("recv: "+recv_str+"\n") if not recv_str: break recv = Serializer.loads(recv_str) if recv is None: continue if res is None: res = recv else: res = GlobalVar.reduce_func(res, recv) # fdebug.write("result: "+str(res)+"\n"); GlobalSocket.pipe_to_cpp.send(Serializer.dumps(res)) GlobalVar.reduce_res = None GlobalVar.reduce_func = None
def postfunc_combine_1(op): # send out reduce_by_key_store GlobalSocket.pipe_to_cpp.send("Functional#reduce_by_key") GlobalSocket.pipe_to_cpp.send(GlobalVar.reduce_by_key_list) # combine GlobalVar.reduce_by_key_store.sort(key=lambda x: x[0]) send_buffer = [] if GlobalVar.reduce_by_key_store: prev_x, prev_y = GlobalVar.reduce_by_key_store[0] for x, y in islice(GlobalVar.reduce_by_key_store, 1, None): if x != prev_x: send_buffer.append(Serializer.dumps(prev_x)) send_buffer.append(Serializer.dumps(prev_y)) prev_x, prev_y = x, y else: prev_y = op.func(prev_y, y) send_buffer.append(Serializer.dumps(prev_x)) send_buffer.append(Serializer.dumps(prev_y)) GlobalSocket.pipe_to_cpp.send(str(len(send_buffer) / 2)) for x in send_buffer: GlobalSocket.pipe_to_cpp.send(x)
def load_n2n(op): """ Attempt 1: init """ GlobalSocket.pipe_to_cpp.send("Functional#reduce_by_key_end") GlobalSocket.pipe_to_cpp.send(op.op_param[OperationParam.list_str]) store = Serializer.loads(GlobalN2NSocket.recv()) for _ in xrange(1, GlobalVar.num_workers): store.extend(Serializer.loads(GlobalN2NSocket.recv())) func = op.func store.sort(key=lambda x: x[0]) if store: prev_x, prev_y = store[0] for x, y in islice(store, 1, None): if x != prev_x: # buff.append((prev_x, prev_y)) yield [(prev_x, prev_y)] prev_x, prev_y = x, y else: prev_y = func(prev_y, y) # buff.append((prev_x, prev_y)) yield [(prev_x, prev_y)]
def postfunc_combine_n2n(_): """ Attempt4: Hash Map """ # send out reduce_by_key_store GlobalSocket.pipe_to_cpp.send("Functional#reduce_by_key") GlobalSocket.pipe_to_cpp.send(GlobalVar.reduce_by_key_list) GlobalSocket.pipe_to_cpp.send("0") send_buffer = [[] for i in xrange(GlobalVar.num_workers)] if GlobalVar.reduce_by_key_store: for x, y in GlobalVar.reduce_by_key_store.items(): dst = hash(x) % GlobalVar.num_workers send_buffer[dst].append((x, y)) for i in xrange(GlobalVar.num_workers): GlobalN2NSocket.send(i, Serializer.dumps(send_buffer[i]))
def pythonbackend_receiver(reply): data = reply.load_str() return Serializer.loads(data)
def func(_, data): for x in data: GlobalVar.difference_store.append(Serializer.dumps(x))
def func(_, data): for x in data: GlobalVar.distinct_store.append(Serializer.dumps(x))
def func(_, data): for x in data: assert (isinstance(x, tuple) or isinstance(x, list)) and len(x) is 2 GlobalVar.group_by_key_store.append( (Serializer.dumps(x[0]), Serializer.dumps(x[1])))
def end_postfunc(_): GlobalSocket.pipe_to_cpp.send("Functional#collect_end") GlobalSocket.pipe_to_cpp.send("collect_list") GlobalSocket.pipe_to_cpp.send( Serializer.dumps(GlobalVar.data_chunk["collect_list"])) del GlobalVar.data_chunk["collect_list"]