示例#1
0
  def expand(self, pcoll):
    # This code path is only used in the local direct runner.  For Dataflow
    # runner execution, the GroupByKey transform is expanded on the service.
    input_type = pcoll.element_type

    if input_type is not None:
      # Initialize type-hints used below to enforce type-checking and to pass
      # downstream to further PTransforms.
      key_type, value_type = trivial_inference.key_value_types(input_type)
      typecoders.registry.verify_deterministic(
          typecoders.registry.get_coder(key_type),
          'GroupByKey operation "%s"' % self.label)

      reify_output_type = KV[key_type, typehints.WindowedValue[value_type]]
      gbk_input_type = (
          KV[key_type, Iterable[typehints.WindowedValue[value_type]]])
      gbk_output_type = KV[key_type, Iterable[value_type]]

      # pylint: disable=bad-continuation
      return (pcoll
              | 'reify_windows' >> (ParDo(self.ReifyWindows())
                 .with_output_types(reify_output_type))
              | 'group_by_key' >> (GroupByKeyOnly()
                 .with_input_types(reify_output_type)
                 .with_output_types(gbk_input_type))
              | ('group_by_window' >> ParDo(
                     self.GroupAlsoByWindow(pcoll.windowing))
                 .with_input_types(gbk_input_type)
                 .with_output_types(gbk_output_type)))
    else:
      return (pcoll
              | 'reify_windows' >> ParDo(self.ReifyWindows())
              | 'group_by_key' >> GroupByKeyOnly()
              | 'group_by_window' >> ParDo(
                    self.GroupAlsoByWindow(pcoll.windowing)))
示例#2
0
文件: opcodes.py 项目: zhoufek/beam
def build_map_unpack(state, arg):
    """Joins arg count maps from the stack into a single dict."""
    key_types = []
    value_types = []
    for _ in range(arg):
        type_constraint = state.stack.pop()
        if isinstance(type_constraint, typehints.Dict.DictConstraint):
            key_types.append(type_constraint.key_type)
            value_types.append(type_constraint.value_type)
        else:
            key_type, value_type = key_value_types(
                element_type(type_constraint))
            key_types.append(key_type)
            value_types.append(value_type)
    state.stack.append(Dict[Union[key_types], Union[value_types]])
示例#3
0
文件: opcodes.py 项目: zhoufek/beam
def dict_update(state, arg):
    other = state.stack.pop()
    base = state.stack[-arg]
    if isinstance(base, typehints.Dict.DictConstraint):
        base_key_type = base.key_type
        base_value_type = base.value_type
    else:
        base_key_type = Any
        base_value_type = Any
    if isinstance(other, typehints.Dict.DictConstraint):
        other_key_type = other.key_type
        other_value_type = other.value_type
    else:
        other_key_type, other_value_type = key_value_types(element_type(other))
    state.stack[-arg] = Dict[union(base_key_type, other_key_type),
                             union(base_value_type, other_value_type)]
  def expand(self, pcoll):
    # Imported here to avoid circular dependencies.
    # pylint: disable=wrong-import-order, wrong-import-position
    from apache_beam.coders import typecoders

    input_type = pcoll.element_type
    if input_type is not None:
      # Initialize type-hints used below to enforce type-checking and to
      # pass downstream to further PTransforms.
      key_type, value_type = trivial_inference.key_value_types(input_type)
      # Enforce the input to a GBK has a KV element type.
      pcoll.element_type = typehints.typehints.coerce_to_kv_type(
          pcoll.element_type)
      typecoders.registry.verify_deterministic(
          typecoders.registry.get_coder(key_type),
          'GroupByKey operation "%s"' % self.label)

      reify_output_type = typehints.KV[
          key_type, typehints.WindowedValue[value_type]]  # type: ignore[misc]
      gbk_input_type = (
          typehints.KV[
              key_type,
              typehints.Iterable[typehints.WindowedValue[  # type: ignore[misc]
                  value_type]]])
      gbk_output_type = typehints.KV[key_type, typehints.Iterable[value_type]]

      # pylint: disable=bad-continuation
      return (
          pcoll
          | 'ReifyWindows' >> (
              ParDo(beam.GroupByKey.ReifyWindows()).with_output_types(
                  reify_output_type))
          | 'GroupByKey' >> (
              _GroupByKeyOnly().with_input_types(
                  reify_output_type).with_output_types(gbk_input_type))
          | (
              'GroupByWindow' >>
              _GroupAlsoByWindow(pcoll.windowing).with_input_types(
                  gbk_input_type).with_output_types(gbk_output_type)))
    else:
      # The input_type is None, run the default
      return (
          pcoll
          | 'ReifyWindows' >> ParDo(beam.GroupByKey.ReifyWindows())
          | 'GroupByKey' >> _GroupByKeyOnly()
          | 'GroupByWindow' >> _GroupAlsoByWindow(pcoll.windowing))
示例#5
0
 def infer_output_type(self, input_type):
     key_type, windowed_value_iter_type = trivial_inference.key_value_types(
         input_type)
     value_type = windowed_value_iter_type.inner_type.inner_type
     return typehints.Iterable[typehints.KV[key_type,
                                            typehints.Iterable[value_type]]]
示例#6
0
 def infer_output_type(self, input_type):
     key_type, value_type = trivial_inference.key_value_types(input_type)
     return typehints.KV[key_type, typehints.Iterable[value_type]]
示例#7
0
 def infer_output_type(self, input_type):
   key_type, value_type = trivial_inference.key_value_types(input_type)
   return KV[key_type, Iterable[value_type]]
示例#8
0
 def infer_output_type(self, input_type):
   key_type, windowed_value_iter_type = trivial_inference.key_value_types(
       input_type)
   value_type = windowed_value_iter_type.inner_type.inner_type
   return Iterable[KV[key_type, Iterable[value_type]]]
示例#9
0
 def infer_output_type(self, input_type):
   key_type, value_type = trivial_inference.key_value_types(input_type)
   return Iterable[KV[key_type, typehints.WindowedValue[value_type]]]