def _infer_result_type(self, transform, inputs, result_pcollection): # TODO(robertwb): Multi-input, multi-output inference. type_options = self._options.view_as(TypeOptions) if (type_options is not None and type_options.pipeline_type_check and isinstance(result_pcollection, pvalue.PCollection) and (not result_pcollection.element_type # TODO(robertwb): Ideally we'd do intersection here. or result_pcollection.element_type == typehints.Any)): input_element_type = (inputs[0].element_type if len(inputs) == 1 else typehints.Any) type_hints = transform.get_type_hints() declared_output_type = type_hints.simple_output_type( transform.label) if declared_output_type: input_types = type_hints.input_types if input_types and input_types[0]: declared_input_type = input_types[0][0] result_pcollection.element_type = typehints.bind_type_variables( declared_output_type, typehints.match_type_variables(declared_input_type, input_element_type)) else: result_pcollection.element_type = declared_output_type else: result_pcollection.element_type = transform.infer_output_type( input_element_type)
def _infer_result_type(self, transform, inputs, result_pcollection): # TODO(robertwb): Multi-input, multi-output inference. # TODO(robertwb): Ideally we'd do intersection here. type_options = self._options.view_as(TypeOptions) if (type_options is not None and type_options.pipeline_type_check and isinstance(result_pcollection, pvalue.PCollection) and not result_pcollection.element_type): input_element_type = ( inputs[0].element_type if len(inputs) == 1 else typehints.Any) type_hints = transform.get_type_hints() declared_output_type = type_hints.simple_output_type(transform.label) if declared_output_type: input_types = type_hints.input_types if input_types and input_types[0]: declared_input_type = input_types[0][0] result_pcollection.element_type = typehints.bind_type_variables( declared_output_type, typehints.match_type_variables(declared_input_type, input_element_type)) else: result_pcollection.element_type = declared_output_type else: result_pcollection.element_type = transform.infer_output_type( input_element_type)
def _infer_result_type(self, transform, inputs, result_pcollection): # TODO(robertwb): Multi-input inference. type_options = self._options.view_as(TypeOptions) if type_options is None or not type_options.pipeline_type_check: return if (isinstance(result_pcollection, pvalue.PCollection) and (not result_pcollection.element_type # TODO(robertwb): Ideally we'd do intersection here. or result_pcollection.element_type == typehints.Any)): # Single-input, single-output inference. input_element_type = (inputs[0].element_type if len(inputs) == 1 else typehints.Any) type_hints = transform.get_type_hints() declared_output_type = type_hints.simple_output_type( transform.label) if declared_output_type: input_types = type_hints.input_types if input_types and input_types[0]: declared_input_type = input_types[0][0] result_pcollection.element_type = typehints.bind_type_variables( declared_output_type, typehints.match_type_variables(declared_input_type, input_element_type)) else: result_pcollection.element_type = declared_output_type else: result_pcollection.element_type = transform.infer_output_type( input_element_type) elif isinstance(result_pcollection, pvalue.DoOutputsTuple): # Single-input, multi-output inference. # TODO(BEAM-4132): Add support for tagged type hints. # https://github.com/apache/beam/pull/9810#discussion_r338765251 for pcoll in result_pcollection: if pcoll.element_type is None: pcoll.element_type = typehints.Any
def apply(self, transform, pvalueish=None, label=None): """Applies a custom transform using the pvalueish specified. Args: transform (~apache_beam.transforms.ptransform.PTransform): the :class:`~apache_beam.transforms.ptransform.PTransform` to apply. pvalueish (~apache_beam.pvalue.PCollection): the input for the :class:`~apache_beam.transforms.ptransform.PTransform` (typically a :class:`~apache_beam.pvalue.PCollection`). label (str): label of the :class:`~apache_beam.transforms.ptransform.PTransform`. Raises: ~exceptions.TypeError: if the transform object extracted from the argument list is not a :class:`~apache_beam.transforms.ptransform.PTransform`. ~exceptions.RuntimeError: if the transform object was already applied to this pipeline and needs to be cloned in order to apply again. """ if isinstance(transform, ptransform._NamedPTransform): return self.apply(transform.transform, pvalueish, label or transform.label) if not isinstance(transform, ptransform.PTransform): raise TypeError("Expected a PTransform object, got %s" % transform) if label: # Fix self.label as it is inspected by some PTransform operations # (e.g. to produce error messages for type hint violations). try: old_label, transform.label = transform.label, label return self.apply(transform, pvalueish) finally: transform.label = old_label full_label = '/'.join([self._current_transform().full_label, label or transform.label]).lstrip('/') if full_label in self.applied_labels: raise RuntimeError( 'Transform "%s" does not have a stable unique label. ' 'This will prevent updating of pipelines. ' 'To apply a transform with a specified label write ' 'pvalue | "label" >> transform' % full_label) self.applied_labels.add(full_label) pvalueish, inputs = transform._extract_input_pvalues(pvalueish) try: inputs = tuple(inputs) for leaf_input in inputs: if not isinstance(leaf_input, pvalue.PValue): raise TypeError except TypeError: raise NotImplementedError( 'Unable to extract PValue inputs from %s; either %s does not accept ' 'inputs of this format, or it does not properly override ' '_extract_input_pvalues' % (pvalueish, transform)) current = AppliedPTransform( self._current_transform(), transform, full_label, inputs) self._current_transform().add_part(current) self.transforms_stack.append(current) type_options = self._options.view_as(TypeOptions) if type_options.pipeline_type_check: transform.type_check_inputs(pvalueish) pvalueish_result = self.runner.apply(transform, pvalueish) if type_options is not None and type_options.pipeline_type_check: transform.type_check_outputs(pvalueish_result) for result in ptransform.get_nested_pvalues(pvalueish_result): assert isinstance(result, (pvalue.PValue, pvalue.DoOutputsTuple)) # Make sure we set the producer only for a leaf node in the transform DAG. # This way we preserve the last transform of a composite transform as # being the real producer of the result. if result.producer is None: result.producer = current # TODO(robertwb): Multi-input, multi-output inference. # TODO(robertwb): Ideally we'd do intersection here. if (type_options is not None and type_options.pipeline_type_check and isinstance(result, pvalue.PCollection) and not result.element_type): input_element_type = ( inputs[0].element_type if len(inputs) == 1 else typehints.Any) type_hints = transform.get_type_hints() declared_output_type = type_hints.simple_output_type(transform.label) if declared_output_type: input_types = type_hints.input_types if input_types and input_types[0]: declared_input_type = input_types[0][0] result.element_type = typehints.bind_type_variables( declared_output_type, typehints.match_type_variables(declared_input_type, input_element_type)) else: result.element_type = declared_output_type else: result.element_type = transform.infer_output_type(input_element_type) assert isinstance(result.producer.inputs, tuple) current.add_output(result) if (type_options is not None and type_options.type_check_strictness == 'ALL_REQUIRED' and transform.get_type_hints().output_types is None): ptransform_name = '%s(%s)' % (transform.__class__.__name__, full_label) raise TypeCheckError('Pipeline type checking is enabled, however no ' 'output type-hint was found for the ' 'PTransform %s' % ptransform_name) current.update_input_refcounts() self.transforms_stack.pop() return pvalueish_result
def apply(self, transform, pvalueish=None, label=None): """Applies a custom transform using the pvalueish specified. Args: transform: the PTranform to apply. pvalueish: the input for the PTransform (typically a PCollection). label: label of the PTransform. Raises: TypeError: if the transform object extracted from the argument list is not a PTransform. RuntimeError: if the transform object was already applied to this pipeline and needs to be cloned in order to apply again. """ if isinstance(transform, ptransform._NamedPTransform): return self.apply(transform.transform, pvalueish, label or transform.label) if not isinstance(transform, ptransform.PTransform): raise TypeError("Expected a PTransform object, got %s" % transform) if label: # Fix self.label as it is inspected by some PTransform operations # (e.g. to produce error messages for type hint violations). try: old_label, transform.label = transform.label, label return self.apply(transform, pvalueish) finally: transform.label = old_label full_label = '/'.join([self._current_transform().full_label, label or transform.label]).lstrip('/') if full_label in self.applied_labels: raise RuntimeError( 'Transform "%s" does not have a stable unique label. ' 'This will prevent updating of pipelines. ' 'To apply a transform with a specified label write ' 'pvalue | "label" >> transform' % full_label) self.applied_labels.add(full_label) pvalueish, inputs = transform._extract_input_pvalues(pvalueish) try: inputs = tuple(inputs) for leaf_input in inputs: if not isinstance(leaf_input, pvalue.PValue): raise TypeError except TypeError: raise NotImplementedError( 'Unable to extract PValue inputs from %s; either %s does not accept ' 'inputs of this format, or it does not properly override ' '_extract_input_pvalues' % (pvalueish, transform)) current = AppliedPTransform( self._current_transform(), transform, full_label, inputs) self._current_transform().add_part(current) self.transforms_stack.append(current) type_options = self._options.view_as(TypeOptions) if type_options.pipeline_type_check: transform.type_check_inputs(pvalueish) pvalueish_result = self.runner.apply(transform, pvalueish) if type_options is not None and type_options.pipeline_type_check: transform.type_check_outputs(pvalueish_result) for result in ptransform.GetPValues().visit(pvalueish_result): assert isinstance(result, (pvalue.PValue, pvalue.DoOutputsTuple)) # Make sure we set the producer only for a leaf node in the transform DAG. # This way we preserve the last transform of a composite transform as # being the real producer of the result. if result.producer is None: result.producer = current # TODO(robertwb): Multi-input, multi-output inference. # TODO(robertwb): Ideally we'd do intersection here. if (type_options is not None and type_options.pipeline_type_check and isinstance(result, pvalue.PCollection) and not result.element_type): input_element_type = ( inputs[0].element_type if len(inputs) == 1 else typehints.Any) type_hints = transform.get_type_hints() declared_output_type = type_hints.simple_output_type(transform.label) if declared_output_type: input_types = type_hints.input_types if input_types and input_types[0]: declared_input_type = input_types[0][0] result.element_type = typehints.bind_type_variables( declared_output_type, typehints.match_type_variables(declared_input_type, input_element_type)) else: result.element_type = declared_output_type else: result.element_type = transform.infer_output_type(input_element_type) assert isinstance(result.producer.inputs, tuple) current.add_output(result) if (type_options is not None and type_options.type_check_strictness == 'ALL_REQUIRED' and transform.get_type_hints().output_types is None): ptransform_name = '%s(%s)' % (transform.__class__.__name__, full_label) raise TypeCheckError('Pipeline type checking is enabled, however no ' 'output type-hint was found for the ' 'PTransform %s' % ptransform_name) current.update_input_refcounts() self.transforms_stack.pop() return pvalueish_result