Exemplo n.º 1
0
 def postfunc_no_combine(_):
     GlobalSocket.pipe_to_cpp.send("Functional#reduce_by_key")
     GlobalSocket.pipe_to_cpp.send(GlobalVar.reduce_by_key_list)
     GlobalSocket.pipe_to_cpp.send(str(len(GlobalVar.reduce_by_key_store)))
     for (x, y) in GlobalVar.reduce_by_key_store:
         GlobalSocket.pipe_to_cpp.send(Serializer.dumps(x))
         GlobalSocket.pipe_to_cpp.send(Serializer.dumps(y))
     GlobalVar.reduce_by_key_store = []
Exemplo n.º 2
0
 def load(op):
     GlobalSocket.pipe_to_cpp.send("Functional#group_by_key_end")
     GlobalSocket.pipe_to_cpp.send(op.op_param[OperationParam.list_str])
     while True:
         key = GlobalSocket.pipe_from_cpp.recv()
         if not key:
             break
         key = Serializer.loads(key)
         num = int(GlobalSocket.pipe_from_cpp.recv())
         res = []
         for _ in xrange(num):
             recv = Serializer.loads(GlobalSocket.pipe_from_cpp.recv())
             res.append(recv)
         yield [(key, res)]
Exemplo n.º 3
0
    def postfunc_combine_hash(op):
        # send out reduce_by_key_store
        GlobalSocket.pipe_to_cpp.send("Functional#reduce_by_key")
        GlobalSocket.pipe_to_cpp.send(GlobalVar.reduce_by_key_list)

        send_buffer = dict()
        for x, y in GlobalVar.reduce_by_key_store:
            if x in send_buffer:
                send_buffer[x] = op.func(send_buffer[x], y)
            else:
                send_buffer[x] = y
        GlobalSocket.pipe_to_cpp.send(str(len(send_buffer)))
        for x, y in send_buffer.iteritems():
            GlobalSocket.pipe_to_cpp.send(Serializer.dumps(x))
            GlobalSocket.pipe_to_cpp.send(Serializer.dumps(y))
Exemplo n.º 4
0
 def load(op):
     data = Serializer.loads(op.op_param[OperationParam.data_str])
     i = GlobalVar.global_id
     while i < len(data):
         # print data[i]
         yield [data[i]]
         i += GlobalVar.num_workers
Exemplo n.º 5
0
    def load(op):
        GlobalSocket.pipe_to_cpp.send("Functional#reduce_by_key_end")
        GlobalSocket.pipe_to_cpp.send(op.op_param[OperationParam.list_str])
        func = op.func
        while True:
            key = GlobalSocket.pipe_from_cpp.recv()
            if not key:
                break
            key = Serializer.loads(key)
            num = int(GlobalSocket.pipe_from_cpp.recv())
            res = None
            for _ in xrange(num):
                recv = Serializer.loads(GlobalSocket.pipe_from_cpp.recv())
                res = recv if res is None else func(res, recv)

            yield [(key, res)]
Exemplo n.º 6
0
 def load(op):
     GlobalSocket.pipe_to_cpp.send("Functional#distinct_end")
     GlobalSocket.pipe_to_cpp.send(op.op_param[OperationParam.list_str])
     while True:
         value = GlobalSocket.pipe_from_cpp.recv()
         if not value:
             break
         value = Serializer.loads(value)
         yield [value]
Exemplo n.º 7
0
    def postfunc_combine_2(op):
        # send out reduce_by_key_store
        GlobalSocket.pipe_to_cpp.send("Functional#reduce_by_key")
        GlobalSocket.pipe_to_cpp.send(GlobalVar.reduce_by_key_list)

        # combine
        send_buffer = []

        def reduce_func(x, y):
            return x[0], op.func(x[1], y[1])

        for _, y in groupby(sorted(GlobalVar.reduce_by_key_store),
                            key=lambda x: x[0]):
            k, v = reduce(reduce_func, y)
            send_buffer.append(Serializer.dumps(k))
            send_buffer.append(Serializer.dumps(v))
        GlobalSocket.pipe_to_cpp.send(str(len(send_buffer) / 2))
        for x in send_buffer:
            GlobalSocket.pipe_to_cpp.send(x)
Exemplo n.º 8
0
 def end_postfunc(_):
     GlobalSocket.pipe_to_cpp.send("Functional#reduce_end")
     GlobalSocket.pipe_to_cpp.send(Serializer.dumps(GlobalVar.reduce_res))
     res = None
     while True:
         recv_str = GlobalSocket.pipe_from_cpp.recv()
         # fdebug.write("recv: "+recv_str+"\n")
         if not recv_str:
             break
         recv = Serializer.loads(recv_str)
         if recv is None:
             continue
         if res is None:
             res = recv
         else:
             res = GlobalVar.reduce_func(res, recv)
     # fdebug.write("result: "+str(res)+"\n");
     GlobalSocket.pipe_to_cpp.send(Serializer.dumps(res))
     GlobalVar.reduce_res = None
     GlobalVar.reduce_func = None
Exemplo n.º 9
0
    def postfunc_combine_1(op):
        # send out reduce_by_key_store
        GlobalSocket.pipe_to_cpp.send("Functional#reduce_by_key")
        GlobalSocket.pipe_to_cpp.send(GlobalVar.reduce_by_key_list)

        # combine
        GlobalVar.reduce_by_key_store.sort(key=lambda x: x[0])
        send_buffer = []
        if GlobalVar.reduce_by_key_store:
            prev_x, prev_y = GlobalVar.reduce_by_key_store[0]
            for x, y in islice(GlobalVar.reduce_by_key_store, 1, None):
                if x != prev_x:
                    send_buffer.append(Serializer.dumps(prev_x))
                    send_buffer.append(Serializer.dumps(prev_y))
                    prev_x, prev_y = x, y
                else:
                    prev_y = op.func(prev_y, y)
            send_buffer.append(Serializer.dumps(prev_x))
            send_buffer.append(Serializer.dumps(prev_y))
        GlobalSocket.pipe_to_cpp.send(str(len(send_buffer) / 2))
        for x in send_buffer:
            GlobalSocket.pipe_to_cpp.send(x)
Exemplo n.º 10
0
    def load_n2n(op):
        """ Attempt 1: init """

        GlobalSocket.pipe_to_cpp.send("Functional#reduce_by_key_end")
        GlobalSocket.pipe_to_cpp.send(op.op_param[OperationParam.list_str])

        store = Serializer.loads(GlobalN2NSocket.recv())
        for _ in xrange(1, GlobalVar.num_workers):
            store.extend(Serializer.loads(GlobalN2NSocket.recv()))

        func = op.func
        store.sort(key=lambda x: x[0])
        if store:
            prev_x, prev_y = store[0]
            for x, y in islice(store, 1, None):
                if x != prev_x:
                    # buff.append((prev_x, prev_y))
                    yield [(prev_x, prev_y)]
                    prev_x, prev_y = x, y
                else:
                    prev_y = func(prev_y, y)
            # buff.append((prev_x, prev_y))
            yield [(prev_x, prev_y)]
Exemplo n.º 11
0
    def postfunc_combine_n2n(_):
        """ Attempt4: Hash Map """

        # send out reduce_by_key_store
        GlobalSocket.pipe_to_cpp.send("Functional#reduce_by_key")
        GlobalSocket.pipe_to_cpp.send(GlobalVar.reduce_by_key_list)

        GlobalSocket.pipe_to_cpp.send("0")
        send_buffer = [[] for i in xrange(GlobalVar.num_workers)]
        if GlobalVar.reduce_by_key_store:
            for x, y in GlobalVar.reduce_by_key_store.items():
                dst = hash(x) % GlobalVar.num_workers
                send_buffer[dst].append((x, y))
        for i in xrange(GlobalVar.num_workers):
            GlobalN2NSocket.send(i, Serializer.dumps(send_buffer[i]))
Exemplo n.º 12
0
 def pythonbackend_receiver(reply):
     data = reply.load_str()
     return Serializer.loads(data)
Exemplo n.º 13
0
 def func(_, data):
     for x in data:
         GlobalVar.difference_store.append(Serializer.dumps(x))
Exemplo n.º 14
0
 def func(_, data):
     for x in data:
         GlobalVar.distinct_store.append(Serializer.dumps(x))
Exemplo n.º 15
0
 def func(_, data):
     for x in data:
         assert (isinstance(x, tuple)
                 or isinstance(x, list)) and len(x) is 2
         GlobalVar.group_by_key_store.append(
             (Serializer.dumps(x[0]), Serializer.dumps(x[1])))
Exemplo n.º 16
0
 def end_postfunc(_):
     GlobalSocket.pipe_to_cpp.send("Functional#collect_end")
     GlobalSocket.pipe_to_cpp.send("collect_list")
     GlobalSocket.pipe_to_cpp.send(
         Serializer.dumps(GlobalVar.data_chunk["collect_list"]))
     del GlobalVar.data_chunk["collect_list"]