Exemplo n.º 1
0
    def __create_unary_process(self, table: _DTable, func):
        operand = self.__create_storage_locator_from_dtable(table)
        task_info = self.__create_task_info(func=func, is_in_place_computing=table.get_in_place_computing())

        return processor_pb2.UnaryProcess(info=task_info,
                                          operand=operand,
                                          conf=processor_pb2.ProcessConf(namingPolicy=self.eggroll_context.get_naming_policy().name))
Exemplo n.º 2
0
 def reduce(self, _table, func):
     func_id, func_bytes = self.serialize_and_hash_func(func)
     rtn = None
     results = []
     for partition in range(_table.partition):
         operand = EggRoll.__get_storage_locator(_table, partition)
         proc_id = partition % len(self.proc_list)
         channel, stub = self.proc_list[proc_id]
         unary_p = processor_pb2.UnaryProcess(
             operand=operand,
             info=processor_pb2.TaskInfo(task_id=self.job_id,
                                         function_id=func_id,
                                         function_bytes=func_bytes))
         results = results + list(stub.reduce(unary_p))
     rs = []
     for val in results:
         if len(val.value) > 0:
             rs.append(self._serdes.deserialize(val.value))
     rs = [r for r in filter(partial(is_not, None), rs)]
     if len(results) <= 0:
         return rtn
     rtn = rs[0]
     for r in rs[1:]:
         rtn = func(rtn, r)
     return rtn
Exemplo n.º 3
0
    def __create_unary_process(self, table: _DTable, func):
        operand = self.__create_storage_locator_from_dtable(table)
        task_info = self.__create_task_info(
            func=func, is_in_place_computing=table.get_in_place_computing())

        return processor_pb2.UnaryProcess(
            info=task_info,
            operand=operand,
            session=self.eggroll_session.to_protobuf())
Exemplo n.º 4
0
 def process_wrapper(req_type, func, result, req):
     try:
         req_pb = processor_pb2.UnaryProcess(
         ) if req_type == "UnaryProcess" else processor_pb2.BinaryProcess(
         )
         req_pb.ParseFromString(req)
         #TODO context serialize?
         func(req_pb, None)
         result.put("ok")
     except:
         err_str = traceback.format_exc()
         LOGGER.error(err_str)
         result.put("error:" + err_str)
Exemplo n.º 5
0
 def glom(self, _table):
     results = []
     func_id = str(uuid.uuid1())
     for p in range(_table.partition):
         operand = EggRoll.__get_storage_locator(_table, p)
         unary_p = processor_pb2.UnaryProcess(operand=operand,
                                              info=processor_pb2.TaskInfo(
                                                  task_id=self.job_id,
                                                  function_id=func_id))
         proc_id = p % len(self.proc_list)
         channel, stub = self.proc_list[proc_id]
         results.append(stub.glom.future(unary_p))
     for r in results:
         result = r.result()
     return _DTable(self, result.type, result.namespace, result.name,
                    _table.partition)
Exemplo n.º 6
0
    def mapValues(self, _table, func):
        func_id, func_bytes = self.serialize_and_hash_func(func)
        results = []
        for partition in range(_table.partition):
            operand = EggRoll.__get_storage_locator(_table, partition)
            unary_p = processor_pb2.UnaryProcess(
                operand=operand,
                info=processor_pb2.TaskInfo(task_id=self.job_id,
                                            function_id=func_id,
                                            function_bytes=func_bytes))

            proc_id = partition % len(self.proc_list)
            channel, stub = self.proc_list[proc_id]
            results.append(stub.mapValues.future(unary_p))

        for r in results:
            result = r.result()
        return _DTable(self, result.type, result.namespace, result.name,
                       _table.partition)
Exemplo n.º 7
0
 def sample(self, _table, fraction, seed):
     if fraction < 0 or fraction > 1:
         raise ValueError("fraction must be in [0, 1]")
     func_bytes = self._serdes.serialize((fraction, seed))
     results = []
     func_id = str(uuid.uuid1())
     for p in range(_table.partition):
         operand = EggRoll.__get_storage_locator(_table, p)
         unary_p = processor_pb2.UnaryProcess(
             operand=operand,
             info=processor_pb2.TaskInfo(task_id=self.job_id,
                                         function_id=func_id,
                                         function_bytes=func_bytes))
         proc_id = p % len(self.proc_list)
         channel, stub = self.proc_list[proc_id]
         results.append(stub.sample.future(unary_p))
     for r in results:
         result = r.result()
     return _DTable(self, result.type, result.namespace, result.name,
                    _table.partition)