Exemplo n.º 1
0
    def map_partition(self, operator):
        """
        Applies a MapPartition transformation on a DataSet.

        The transformation calls a MapPartitionFunction once per parallel partition of the DataSet.
        The entire partition is available through the given Iterator.
        Each MapPartitionFunction may return an arbitrary number of results.

        The number of elements that each instance of the MapPartition function
        sees is non deterministic and depends on the degree of parallelism of the operation.

        :param operator: The MapFunction that is called for each element of the DataSet.
        :return:A MapOperator that represents the transformed DataSet
        """
        if isinstance(operator, TYPES.FunctionType):
            f = operator
            operator = MapPartitionFunction()
            operator.map_partition = f
        child = OperationInfo()
        child_set = OperatorSet(self._env, child)
        child.identifier = _Identifier.MAPPARTITION
        child.parent = self._info
        child.operator = operator
        child.types = _createArrayTypeInfo()
        child.name = "PythonMapPartition"
        self._info.children.append(child)
        self._env._sets.append(child)
        return child_set
Exemplo n.º 2
0
    def map_partition(self, operator, types):
        """
        Applies a MapPartition transformation on a DataSet.

        The transformation calls a MapPartitionFunction once per parallel partition of the DataSet.
        The entire partition is available through the given Iterator.
        Each MapPartitionFunction may return an arbitrary number of results.

        The number of elements that each instance of the MapPartition function
        sees is non deterministic and depends on the degree of parallelism of the operation.

        :param operator: The MapFunction that is called for each element of the DataSet.
        :param types: The type of the resulting DataSet
        :return:A MapOperator that represents the transformed DataSet
        """
        if isinstance(operator, TYPES.FunctionType):
            f = operator
            operator = MapPartitionFunction()
            operator.map_partition = f
        child = dict()
        child_set = OperatorSet(self._env, child)
        child[_Fields.IDENTIFIER] = _Identifier.MAPPARTITION
        child[_Fields.PARENT] = self._info
        child[_Fields.OPERATOR] = operator
        child[_Fields.META] = str(inspect.getmodule(operator)) + "|" + str(operator.__class__.__name__)
        child[_Fields.TYPES] = types
        child[_Fields.NAME] = "PythonMapPartition"
        self._info[_Fields.CHILDREN].append(child)
        self._env._sets.append(child)
        return child_set
Exemplo n.º 3
0
    def map_partition(self, operator):
        """
        Applies a MapPartition transformation on a DataSet.

        The transformation calls a MapPartitionFunction once per parallel partition of the DataSet.
        The entire partition is available through the given Iterator.
        Each MapPartitionFunction may return an arbitrary number of results.

        The number of elements that each instance of the MapPartition function
        sees is non deterministic and depends on the degree of parallelism of the operation.

        :param operator: The MapFunction that is called for each element of the DataSet.
        :return:A MapOperator that represents the transformed DataSet
        """
        if isinstance(operator, TYPES.FunctionType):
            f = operator
            operator = MapPartitionFunction()
            operator.map_partition = f
        child = OperationInfo()
        child_set = OperatorSet(self._env, child)
        child.identifier = _Identifier.MAPPARTITION
        child.parent = self._info
        child.operator = operator
        child.types = _createArrayTypeInfo()
        child.name = "PythonMapPartition"
        self._info.children.append(child)
        self._env._sets.append(child)
        return child_set
Exemplo n.º 4
0
    def map_partition(self, operator, types):
        """
        Applies a MapPartition transformation on a DataSet.

        The transformation calls a MapPartitionFunction once per parallel partition of the DataSet.
        The entire partition is available through the given Iterator.
        Each MapPartitionFunction may return an arbitrary number of results.

        The number of elements that each instance of the MapPartition function
        sees is non deterministic and depends on the degree of parallelism of the operation.

        :param operator: The MapFunction that is called for each element of the DataSet.
        :param types: The type of the resulting DataSet
        :return:A MapOperator that represents the transformed DataSet
        """
        if isinstance(operator, TYPES.FunctionType):
            f = operator
            operator = MapPartitionFunction()
            operator.map_partition = f
        child = dict()
        child_set = OperatorSet(self._env, child)
        child[_Fields.IDENTIFIER] = _Identifier.MAPPARTITION
        child[_Fields.PARENT] = self._info
        child[_Fields.OPERATOR] = operator
        child[_Fields.META] = str(inspect.getmodule(operator)) + "|" + str(
            operator.__class__.__name__)
        child[_Fields.TYPES] = types
        child[_Fields.NAME] = "PythonMapPartition"
        self._info[_Fields.CHILDREN].append(child)
        self._env._sets.append(child)
        return child_set