def __init__(self, executor: Any, input_base: Optional[channel.Channel] = None, input_config: Optional[example_gen_pb2.Input] = None, output_config: Optional[example_gen_pb2.Output] = None, component_name: Optional[Text] = 'ExampleGen', unique_name: Optional[Text] = None, outputs: Optional[base_component.ComponentOutputs] = None): if input_base is None and input_config is None: raise RuntimeError( 'One of input_base and input_config must be set.') input_dict = { 'input-base': channel.as_channel(input_base) } if input_base else {} # Default value need to be set in component instead of executor as output # artifacts depend on it. self._input_config = input_config or utils.make_default_input_config() self._output_config = output_config or utils.make_default_output_config( self._input_config) exec_properties = { 'input': json_format.MessageToJson(self._input_config), 'output': json_format.MessageToJson(self._output_config) } super(ExampleGen, self).__init__( component_name=component_name, unique_name=unique_name, driver=driver.Driver if input_base else base_driver.BaseDriver, executor=executor, input_dict=input_dict, outputs=outputs, exec_properties=exec_properties)
def __init__(self, input_config: example_gen_pb2.Input, output_config: Optional[example_gen_pb2.Output] = None, component_name: Optional[Text] = 'ExampleGen', example_artifacts: Optional[channel.Channel] = None, name: Optional[Text] = None): """Construct an ExampleGen component. Args: input_config: An example_gen_pb2.Input instance, providing input configuration. output_config: An example_gen_pb2.Output instance, providing output configuration. If unset, default splits will be 'train' and 'eval' with size 2:1. component_name: Name of the component, should be unique per component class. Default to 'ExampleGen', can be overwritten by sub-classes. example_artifacts: Optional channel of 'ExamplesPath' for output train and eval examples. name: Unique name for every component class instance. """ # Configure inputs and outputs. input_config = input_config or utils.make_default_input_config() output_config = output_config or utils.make_default_output_config( input_config) example_artifacts = example_artifacts or channel.as_channel([ types.TfxArtifact('ExamplesPath', split=split_name) for split_name in utils.generate_output_split_names( input_config, output_config) ]) spec = ExampleGenSpec(component_name=component_name, input_config=input_config, output_config=output_config, examples=example_artifacts) super(_ExampleGen, self).__init__(spec=spec, name=name)
def __init__(self, query: Optional[Text] = None, input_config: Optional[example_gen_pb2.Input] = None, output_config: Optional[example_gen_pb2.Output] = None, example_artifacts: Optional[channel.Channel] = None, name: Optional[Text] = None): """Constructs a BigQueryExampleGen component. Args: query: BigQuery sql string, query result will be treated as a single split, can be overwritten by input_config. input_config: An example_gen_pb2.Input instance with Split.pattern as BigQuery sql string. If set, it overwrites the 'query' arg, and allows different queries per split. output_config: An example_gen_pb2.Output instance, providing output configuration. If unset, default splits will be 'train' and 'eval' with size 2:1. example_artifacts: Optional channel of 'ExamplesPath' for output train and eval examples. name: Optional unique name. Necessary if multiple BigQueryExampleGen components are declared in the same pipeline. Raises: RuntimeError: Only one of query and input_config should be set. """ if bool(query) == bool(input_config): raise RuntimeError( 'Exactly one of query and input_config should be set.') input_config = input_config or utils.make_default_input_config(query) super(BigQueryExampleGen, self).__init__(input_config=input_config, output_config=output_config, example_artifacts=example_artifacts, name=name)
def __init__(self, query: Optional[str] = None, input_config: Optional[example_gen_pb2.Input] = None, output_config: Optional[example_gen_pb2.Output] = None, example_artifacts: Optional[types.Channel] = None): """Constructs a BigQueryExampleGen component. Args: query: BigQuery sql string, query result will be treated as a single split, can be overwritten by input_config. input_config: An example_gen_pb2.Input instance with Split.pattern as BigQuery sql string. If set, it overwrites the 'query' arg, and allows different queries per split. If any field is provided as a RuntimeParameter, input_config should be constructed as a dict with the same field names as Input proto message. output_config: An example_gen_pb2.Output instance, providing output configuration. If unset, default splits will be 'train' and 'eval' with size 2:1. If any field is provided as a RuntimeParameter, input_config should be constructed as a dict with the same field names as Output proto message. example_artifacts: Optional channel of 'ExamplesPath' for output train and eval examples. Raises: RuntimeError: Only one of query and input_config should be set. """ if bool(query) == bool(input_config): raise RuntimeError( 'Exactly one of query and input_config should be set.') input_config = input_config or utils.make_default_input_config(query) super(BigQueryExampleGen, self).__init__(input_config=input_config, output_config=output_config, example_artifacts=example_artifacts)
def __init__( self, input_base: Optional[Text] = None, input_config: Optional[Union[example_gen_pb2.Input, Dict[Text, Any]]] = None, output_config: Optional[Union[example_gen_pb2.Output, Dict[Text, Any]]] = None, custom_config: Optional[Union[example_gen_pb2.CustomConfig, Dict[Text, Any]]] = None, range_config: Optional[Union[range_config_pb2.RangeConfig, Dict[Text, Any]]] = None, output_data_format: Optional[int] = example_gen_pb2. FORMAT_TF_EXAMPLE, example_artifacts: Optional[types.Channel] = None, custom_executor_spec: Optional[executor_spec.ExecutorSpec] = None, instance_name: Optional[Text] = None): """Construct a FileBasedExampleGen component. Args: input_base: an external directory containing the data files. input_config: An [`example_gen_pb2.Input`](https://github.com/tensorflow/tfx/blob/master/tfx/proto/example_gen.proto) instance, providing input configuration. If unset, input files will be treated as a single split. output_config: An example_gen_pb2.Output instance, providing the output configuration. If unset, default splits will be 'train' and 'eval' with size 2:1. custom_config: An optional example_gen_pb2.CustomConfig instance, providing custom configuration for executor. range_config: An optional range_config_pb2.RangeConfig instance, specifying the range of span values to consider. If unset, driver will default to searching for latest span with no restrictions. output_data_format: Payload format of generated data in output artifact, one of example_gen_pb2.PayloadFormat enum. example_artifacts: Channel of 'ExamplesPath' for output train and eval examples. custom_executor_spec: Optional custom executor spec overriding the default executor spec specified in the component attribute. instance_name: Optional unique instance name. Required only if multiple ExampleGen components are declared in the same pipeline. """ # Configure inputs and outputs. input_config = input_config or utils.make_default_input_config() output_config = output_config or utils.make_default_output_config( input_config) if not example_artifacts: example_artifacts = types.Channel(type=standard_artifacts.Examples) spec = FileBasedExampleGenSpec(input_base=input_base, input_config=input_config, output_config=output_config, custom_config=custom_config, range_config=range_config, output_data_format=output_data_format, examples=example_artifacts) super(FileBasedExampleGen, self).__init__(spec=spec, custom_executor_spec=custom_executor_spec, instance_name=instance_name)
def __init__( self, input: types.Channel = None, # pylint: disable=redefined-builtin input_config: Optional[Union[example_gen_pb2.Input, Dict[Text, Any]]] = None, output_config: Optional[Union[example_gen_pb2.Output, Dict[Text, Any]]] = None, custom_config: Optional[Union[example_gen_pb2.CustomConfig, Dict[Text, Any]]] = None, example_artifacts: Optional[types.Channel] = None, custom_executor_spec: Optional[executor_spec.ExecutorSpec] = None, input_base: Optional[types.Channel] = None, instance_name: Optional[Text] = None): """Construct a FileBasedExampleGen component. Args: input: A Channel of type `standard_artifacts.ExternalArtifact`, which includes one artifact whose uri is an external directory containing the data files. _required_ input_config: An [`example_gen_pb2.Input`](https://github.com/tensorflow/tfx/blob/master/tfx/proto/example_gen.proto) instance, providing input configuration. If unset, the files under input_base will be treated as a single dataset. output_config: An example_gen_pb2.Output instance, providing the output configuration. If unset, default splits will be 'train' and 'eval' with size 2:1. custom_config: An optional example_gen_pb2.CustomConfig instance, providing custom configuration for executor. example_artifacts: Channel of 'ExamplesPath' for output train and eval examples. custom_executor_spec: Optional custom executor spec overriding the default executor spec specified in the component attribute. input_base: Backwards compatibility alias for the 'input' argument. instance_name: Optional unique instance name. Required only if multiple ExampleGen components are declared in the same pipeline. Either `input_base` or `input` must be present in the input arguments. """ input = input or input_base # Configure inputs and outputs. input_config = input_config or utils.make_default_input_config() output_config = output_config or utils.make_default_output_config( input_config) example_artifacts = example_artifacts or channel_utils.as_channel([ standard_artifacts.Examples(split=str(split_name)) for split_name in utils.generate_output_split_names( input_config, output_config) ]) spec = FileBasedExampleGenSpec(input_base=input, input_config=input_config, output_config=output_config, custom_config=custom_config, examples=example_artifacts) super(FileBasedExampleGen, self).__init__(spec=spec, custom_executor_spec=custom_executor_spec, instance_name=instance_name)
def testMakeDefaultOutputConfig(self): output_config = utils.make_default_output_config( utils.make_default_input_config()) self.assertEqual(2, len(output_config.split_config.splits)) output_config = utils.make_default_output_config( example_gen_pb2.Input(splits=[ example_gen_pb2.Input.Split(name='train', pattern='train/*'), example_gen_pb2.Input.Split(name='eval', pattern='eval/*') ])) self.assertEqual(0, len(output_config.split_config.splits))
def __init__( self, input_base: Optional[str] = None, input_config: Optional[Union[example_gen_pb2.Input, data_types.RuntimeParameter]] = None, output_config: Optional[Union[example_gen_pb2.Output, data_types.RuntimeParameter]] = None, custom_config: Optional[Union[example_gen_pb2.CustomConfig, data_types.RuntimeParameter]] = None, range_config: Optional[Union[range_config_pb2.RangeConfig, data_types.RuntimeParameter]] = None, output_data_format: Optional[int] = example_gen_pb2. FORMAT_TF_EXAMPLE, output_file_format: Optional[int] = example_gen_pb2. FORMAT_TFRECORDS_GZIP, custom_executor_spec: Optional[executor_spec.ExecutorSpec] = None): """Construct a FileBasedExampleGen component. Args: input_base: an external directory containing the data files. input_config: An [`example_gen_pb2.Input`](https://github.com/tensorflow/tfx/blob/master/tfx/proto/example_gen.proto) instance, providing input configuration. If unset, input files will be treated as a single split. output_config: An example_gen_pb2.Output instance, providing the output configuration. If unset, default splits will be 'train' and 'eval' with size 2:1. custom_config: An optional example_gen_pb2.CustomConfig instance, providing custom configuration for executor. range_config: An optional range_config_pb2.RangeConfig instance, specifying the range of span values to consider. If unset, driver will default to searching for latest span with no restrictions. output_data_format: Payload format of generated data in output artifact, one of example_gen_pb2.PayloadFormat enum. output_file_format: File format of generated data in output artifact, one of example_gen_pb2.FileFormat enum. custom_executor_spec: Optional custom executor spec overriding the default executor spec specified in the component attribute. """ # Configure inputs and outputs. input_config = input_config or utils.make_default_input_config() output_config = output_config or utils.make_default_output_config( input_config) example_artifacts = types.Channel(type=standard_artifacts.Examples) spec = standard_component_specs.FileBasedExampleGenSpec( input_base=input_base, input_config=input_config, output_config=output_config, custom_config=custom_config, range_config=range_config, output_data_format=output_data_format, output_file_format=output_file_format, examples=example_artifacts) super().__init__(spec=spec, custom_executor_spec=custom_executor_spec)
def __init__(self, query: Optional[Text] = None, elwc_config: Optional[elwc_config_pb2.ElwcConfig] = None, input_config: Optional[example_gen_pb2.Input] = None, output_config: Optional[example_gen_pb2.Output] = None, example_artifacts: Optional[types.Channel] = None, instance_name: Optional[Text] = None): """Constructs a BigQueryElwcExampleGen component. Args: query: BigQuery sql string, query result will be treated as a single split, can be overwritten by input_config. elwc_config: The elwc config contains a list of context feature fields. The fields are used to build context feature. Examples with the same context feature will be converted to an ELWC(ExampleListWithContext) instance. For example, when there are two examples with the same context field, the two examples will be intergrated to a ELWC instance. input_config: An example_gen_pb2.Input instance with Split.pattern as BigQuery sql string. If set, it overwrites the 'query' arg, and allows different queries per split. If any field is provided as a RuntimeParameter, input_config should be constructed as a dict with the same field names as Input proto message. output_config: An example_gen_pb2.Output instance, providing output configuration. If unset, default splits will be 'train' and 'eval' with size 2:1. If any field is provided as a RuntimeParameter, input_config should be constructed as a dict with the same field names as Output proto message. example_artifacts: Optional channel of 'ExamplesPath' for output train and eval examples. instance_name: Optional unique instance name. Necessary if multiple BigQueryExampleGen components are declared in the same pipeline. Raises: RuntimeError: Only one of query and input_config should be set and elwc_config is required. """ if bool(query) == bool(input_config): raise RuntimeError( 'Exactly one of query and input_config should be set.') if not elwc_config: raise RuntimeError( 'elwc_config is required for BigQueryToElwcExampleGen.') input_config = input_config or utils.make_default_input_config(query) packed_custom_config = example_gen_pb2.CustomConfig() packed_custom_config.custom_config.Pack(elwc_config) super(BigQueryToElwcExampleGen, self).__init__(input_config=input_config, output_config=output_config, output_data_format=example_gen_pb2.FORMAT_PROTO, custom_config=packed_custom_config, example_artifacts=example_artifacts, instance_name=instance_name)
def __init__( self, input_base: types.Channel = None, input_config: Optional[example_gen_pb2.Input] = None, output_config: Optional[example_gen_pb2.Output] = None, custom_config: Optional[example_gen_pb2.CustomConfig] = None, component_name: Optional[Text] = 'ExampleGen', example_artifacts: Optional[types.Channel] = None, executor_class: Optional[Type[base_executor.BaseExecutor]] = None, input: Optional[types.Channel] = None, # pylint: disable=redefined-builtin name: Optional[Text] = None): """Construct a FileBasedExampleGen component. Args: input_base: A Channel of 'ExternalPath' type, which includes one artifact whose uri is an external directory with data files inside (required). input_config: An optional example_gen_pb2.Input instance, providing input configuration. If unset, the files under input_base (must set) will be treated as a single split. output_config: An optional example_gen_pb2.Output instance, providing output configuration. If unset, default splits will be 'train' and 'eval' with size 2:1. custom_config: An optional example_gen_pb2.CustomConfig instance, providing custom configuration for executor. component_name: Name of the component, should be unique per component class. Default to 'ExampleGen', can be overwritten by sub-classes. example_artifacts: Optional channel of 'ExamplesPath' for output train and eval examples. executor_class: Optional custom executor class overriding the default executor specified in the component attribute. input: Forwards compatibility alias for the 'input_base' argument. name: Unique name for every component class instance. """ input_base = input_base or input # Configure inputs and outputs. input_config = input_config or utils.make_default_input_config() output_config = output_config or utils.make_default_output_config( input_config) example_artifacts = example_artifacts or channel_utils.as_channel([ standard_artifacts.Examples(split=split_name) for split_name in utils.generate_output_split_names( input_config, output_config) ]) spec = FileBasedExampleGenSpec(input_base=input_base, input_config=input_config, output_config=output_config, custom_config=custom_config, examples=example_artifacts) super(FileBasedExampleGen, self).__init__(spec=spec, custom_executor_class=executor_class, name=name)
def __init__(self, conn_config: presto_config_pb2.PrestoConnConfig, query: Optional[Text] = None, input_config: Optional[example_gen_pb2.Input] = None, output_config: Optional[example_gen_pb2.Output] = None, example_artifacts: Optional[channel.Channel] = None, name: Optional[Text] = None): """Constructs a PrestoExampleGen component. Args: conn_config: Parameters for Presto connection client. query: Presto sql string, query result will be treated as a single split, can be overwritten by input_config. input_config: An example_gen_pb2.Input instance with Split.pattern as Presto sql string. If set, it overwrites the 'query' arg, and allows different queries per split. output_config: An example_gen_pb2.Output instance, providing output configuration. If unset, default splits will be 'train' and 'eval' with size 2:1. example_artifacts: Optional channel of 'ExamplesPath' for output train and eval examples. name: Optional unique name. Necessary if multiple PrestoExampleGen components are declared in the same pipeline. Raises: RuntimeError: Only one of query and input_config should be set. Or required host field in connection_config should be set. """ if bool(query) == bool(input_config): raise RuntimeError( 'Exactly one of query and input_config should be set.') if not bool(conn_config.host): raise RuntimeError( 'Required host field in connection config should be set.') input_config = input_config or utils.make_default_input_config(query) packed_custom_config = example_gen_pb2.CustomConfig() packed_custom_config.custom_config.Pack(conn_config) output_config = output_config or utils.make_default_output_config( input_config) super(PrestoExampleGen, self).__init__(input_config=input_config, output_config=output_config, custom_config=packed_custom_config, component_name='PrestoExampleGen', example_artifacts=example_artifacts, name=name)
def __init__( self, input_example: channel.Channel, string_execution_parameter: Text, integer_execution_parameter: int, output_example: Optional[channel.Channel] = None, # don't change these three: input_config: Optional[example_gen_pb2.Input] = None, output_config: Optional[example_gen_pb2.Output] = None, name: Optional[Text] = None): """Constructs a Head Component. Args: input_example: A Channel of 'RandomTypeNameForInput' type, (type can be any string, as long as it consistent in the channel, spec and artifacts) string_execution_parameter: An string execution parameter (only used in executor, not persistent or shared up stream) integer_execution_parameter: An integer execution parameter (only used in executor, not persistent or shared up stream) output_example: Optional output channel of 'RandomTypeNameForOutput' (type can be any string, as long as it consistent in the channel, spec and artifacts); will be created for you if not specified. input_config: An optional example_gen_pb2.Input instance, providing input configuration. If unset, the files under input_base (must set) will be treated as a single split. output_config: An optional example_gen_pb2.Output instance, providing output configuration. If unset, default splits will be 'train' and ' eval' with size 2:1. name: Optional unique name. Necessary if multiple Pusher components are declared in the same pipeline. """ # Configure inputs and outputs (don't change). input_config = input_config or utils.make_default_input_config() output_config = output_config or utils.make_default_output_config( input_config) output_example = output_example or channel.Channel( type_name='RandomTypeNameForOutput', artifacts=[types.TfxArtifact('RandomTypeNameForOutput')]) spec = CustomHeadComponentSpec( input_example=input_example, integer_execution_parameter=integer_execution_parameter, string_execution_parameter=string_execution_parameter, input_config=input_config, output_config=output_config, output_example=output_example) super(CustomHeadComponent, self).__init__(spec=spec, name=name)
def __init__(self, query: Optional[Text] = None, input_config: Optional[example_gen_pb2.Input] = None, output_config: Optional[example_gen_pb2.Output] = None, name: Optional[Text] = None, outputs: Optional[base_component.ComponentOutputs] = None): if bool(query) == bool(input_config): raise RuntimeError('Only one of query and input_config should be set.') input_config = input_config or utils.make_default_input_config(query) output_config = output_config or utils.make_default_output_config( input_config) super(BigQueryExampleGen, self).__init__( executor=executor.Executor, input_base=None, input_config=input_config, output_config=output_config, component_name='BigQueryExampleGen', unique_name=name, outputs=outputs)
def __init__( self, input_base: channel.Channel, input_config: Optional[example_gen_pb2.Input] = None, output_config: Optional[example_gen_pb2.Output] = None, example_artifacts: Optional[channel.Channel] = None, executor_class: Optional[Type[base_executor.BaseExecutor]] = None, name: Optional[Text] = None): """Construct a FileBasedExampleGen component. Args: input_base: A Channel of 'ExternalPath' type, which includes one artifact whose uri is an external directory with data files inside. input_config: An optional example_gen_pb2.Input instance, providing input configuration. If unset, the files under input_base (must set) will be treated as a single split. output_config: An optional example_gen_pb2.Output instance, providing output configuration. If unset, default splits will be 'train' and 'eval' with size 2:1. example_artifacts: Optional channel of 'ExamplesPath' for output train and eval examples. executor_class: Optional custom executor class overriding the default executor specified in the component attribute. name: Unique name for every component class instance. """ # Configure inputs and outputs. input_config = input_config or utils.make_default_input_config() output_config = output_config or utils.make_default_output_config( input_config) example_artifacts = example_artifacts or channel.as_channel([ types.TfxArtifact('ExamplesPath', split=split_name) for split_name in utils.generate_output_split_names( input_config, output_config) ]) spec = FileBasedExampleGenSpec(input_base=input_base, input_config=input_config, output_config=output_config, examples=example_artifacts) super(FileBasedExampleGen, self).__init__(spec=spec, custom_executor_class=executor_class, name=name)
def __init__(self, conn_config: presto_config_pb2.PrestoConnConfig, query: Optional[str] = None, input_config: Optional[example_gen_pb2.Input] = None, output_config: Optional[example_gen_pb2.Output] = None): """Constructs a PrestoExampleGen component. Args: conn_config: Parameters for Presto connection client. query: Presto sql string, query result will be treated as a single split, can be overwritten by input_config. input_config: An example_gen_pb2.Input instance with Split.pattern as Presto sql string. If set, it overwrites the 'query' arg, and allows different queries per split. output_config: An example_gen_pb2.Output instance, providing output configuration. If unset, default splits will be 'train' and 'eval' with size 2:1. Raises: RuntimeError: Only one of query and input_config should be set. Or required host field in connection_config should be set. """ if bool(query) == bool(input_config): raise RuntimeError('Exactly one of query and input_config should be set.') if not bool(conn_config.host): raise RuntimeError( 'Required host field in connection config should be set.') input_config = input_config or utils.make_default_input_config(query) packed_custom_config = example_gen_pb2.CustomConfig() packed_custom_config.custom_config.Pack(conn_config) output_config = output_config or utils.make_default_output_config( input_config) super().__init__( input_config=input_config, output_config=output_config, custom_config=packed_custom_config)
def __init__(self, input_base: channel.Channel, input_config: Optional[example_gen_pb2.Input] = None, output_config: Optional[example_gen_pb2.Output] = None, component_name: Optional[Text] = 'ExampleGen', example_artifacts: Optional[channel.Channel] = None, name: Optional[Text] = None): """Construct a FileBasedExampleGen component. Args: input_base: A Channel of 'ExternalPath' type, which includes one artifact whose uri is an external directory with data files inside. input_config: An optional example_gen_pb2.Input instance, providing input configuration. If unset, the files under input_base (must set) will be treated as a single split. output_config: An optional example_gen_pb2.Output instance, providing output configuration. If unset, default splits will be 'train' and 'eval' with size 2:1. component_name: Name of the component, should be unique per component class. Default to 'ExampleGen', can be overwritten by sub-classes. example_artifacts: Optional channel of 'ExamplesPath' for output train and eval examples. name: Unique name for every component class instance. """ # Configure inputs and outputs. input_config = input_config or utils.make_default_input_config() output_config = output_config or utils.make_default_output_config( input_config) example_artifacts = example_artifacts or channel.as_channel([ types.TfxArtifact('ExamplesPath', split=split_name) for split_name in utils.generate_output_split_names( input_config, output_config) ]) spec = FileBasedExampleGenSpec(component_name=component_name, input_base=input_base, input_config=input_config, output_config=output_config, examples=example_artifacts) super(_FileBasedExampleGen, self).__init__(spec=spec, name=name)
def __init__( self, query: Optional[str] = None, input_config: Optional[Union[example_gen_pb2.Input, data_types.RuntimeParameter]] = None, output_config: Optional[Union[example_gen_pb2.Output, data_types.RuntimeParameter]] = None, range_config: Optional[Union[range_config_pb2.RangeConfig, data_types.RuntimeParameter]] = None): """Constructs a BigQueryExampleGen component. Args: query: BigQuery sql string, query result will be treated as a single split, can be overwritten by input_config. input_config: An example_gen_pb2.Input instance with Split.pattern as BigQuery sql string. If set, it overwrites the 'query' arg, and allows different queries per split. If any field is provided as a RuntimeParameter, input_config should be constructed as a dict with the same field names as Input proto message. output_config: An example_gen_pb2.Output instance, providing output configuration. If unset, default splits will be 'train' and 'eval' with size 2:1. If any field is provided as a RuntimeParameter, input_config should be constructed as a dict with the same field names as Output proto message. range_config: An optional range_config_pb2.RangeConfig instance, specifying the range of span values to consider. Raises: RuntimeError: Only one of query and input_config should be set. """ if bool(query) == bool(input_config): raise RuntimeError( 'Exactly one of query and input_config should be set.') input_config = input_config or utils.make_default_input_config(query) super().__init__(input_config=input_config, output_config=output_config, range_config=range_config)
def __init__( self, # TODO(b/159467778): deprecate this, use input_base instead. input: Optional[types.Channel] = None, # pylint: disable=redefined-builtin input_base: Optional[Text] = None, input_config: Optional[Union[example_gen_pb2.Input, Dict[Text, Any]]] = None, output_config: Optional[Union[example_gen_pb2.Output, Dict[Text, Any]]] = None, custom_config: Optional[Union[example_gen_pb2.CustomConfig, Dict[Text, Any]]] = None, output_data_format: Optional[int] = example_gen_pb2. FORMAT_TF_EXAMPLE, example_artifacts: Optional[types.Channel] = None, custom_executor_spec: Optional[executor_spec.ExecutorSpec] = None, instance_name: Optional[Text] = None): """Construct a FileBasedExampleGen component. Args: input: A Channel of type `standard_artifacts.ExternalArtifact`, which includes one artifact whose uri is an external directory containing the data files. (Deprecated by input_base) input_base: an external directory containing the data files. input_config: An [`example_gen_pb2.Input`](https://github.com/tensorflow/tfx/blob/master/tfx/proto/example_gen.proto) instance, providing input configuration. If unset, input files will be treated as a single split. output_config: An example_gen_pb2.Output instance, providing the output configuration. If unset, default splits will be 'train' and 'eval' with size 2:1. custom_config: An optional example_gen_pb2.CustomConfig instance, providing custom configuration for executor. output_data_format: Payload format of generated data in output artifact, one of example_gen_pb2.PayloadFormat enum. example_artifacts: Channel of 'ExamplesPath' for output train and eval examples. custom_executor_spec: Optional custom executor spec overriding the default executor spec specified in the component attribute. instance_name: Optional unique instance name. Required only if multiple ExampleGen components are declared in the same pipeline. """ if input: logging.warning( 'The "input" argument to the ExampleGen component has been ' 'deprecated by "input_base". Please update your usage as support for ' 'this argument will be removed soon.') input_base = artifact_utils.get_single_uri(list(input.get())) # Configure inputs and outputs. input_config = input_config or utils.make_default_input_config() output_config = output_config or utils.make_default_output_config( input_config) if not example_artifacts: example_artifacts = types.Channel(type=standard_artifacts.Examples) spec = FileBasedExampleGenSpec(input_base=input_base, input_config=input_config, output_config=output_config, custom_config=custom_config, output_data_format=output_data_format, examples=example_artifacts) super(FileBasedExampleGen, self).__init__(spec=spec, custom_executor_spec=custom_executor_spec, instance_name=instance_name)
def __init__(self, query: Optional[Text] = None, beam_transform: beam.PTransform = None, bucket_name: Optional[Text] = None, output_schema: Optional[Text] = None, table_name: Optional[Text] = None, use_bigquery_source: Optional[Any] = False, input_config: Optional[example_gen_pb2.Input] = None, output_config: Optional[example_gen_pb2.Output] = None, example_artifacts: Optional[types.Channel] = None, instance_name: Optional[Text] = None): """Constructs a BigQueryExampleGen component. Args: query: BigQuery sql string, query result will be treated as a single split, can be overwritten by input_config. input_config: An example_gen_pb2.Input instance with Split.pattern as BigQuery sql string. If set, it overwrites the 'query' arg, and allows different queries per split. If any field is provided as a RuntimeParameter, input_config should be constructed as a dict with the same field names as Input proto message. beam_transform: beam.PTransform pipeline. Will be used to processed data ingested by the BigQuery query. bucket_name: string containing a GCS bucket name. Will be used as a temporary storage space to read query and pickle file. table_name: string containing the BigQuery output table name. use_bigquery_source: Whether to use BigQuerySource instead of experimental `ReadFromBigQuery` PTransform (required by the BigQueryExampleGen executor) input_config: An example_gen_pb2.Input instance with Split.pattern as BigQuery sql string. If set, it overwrites the 'query' arg, and allows different queries per split. If any field is provided as a RuntimeParameter, input_config should be constructed as a dict with the same field names as Input proto message. output_config: An example_gen_pb2.Output instance, providing output configuration. If unset, default splits will be 'train' and 'eval' with size 2:1. If any field is provided as a RuntimeParameter, input_config should be constructed as a dict with the same field names as Output proto message. example_artifacts: Optional channel of 'ExamplesPath' for output train and eval examples. instance_name: Optional unique instance name. Necessary if multiple BigQueryExampleGen components are declared in the same pipeline. Raises: RuntimeError: Only one of query and input_config should be set. """ # Configure inputs and outputs input_config = input_config or utils.make_default_input_config() output_config = output_config or utils.make_default_output_config( input_config) if not example_artifacts: example_artifacts = channel_utils.as_channel( [standard_artifacts.Examples()]) # Upload Beam Transform to a GCS Bucket beam_transform_uri = upload_beam_to_gcs(beam_transform, bucket_name) spec = TCGAPreprocessingSpec( # custom parameters query=query, output_schema=output_schema, table_name=table_name, use_bigquery_source=use_bigquery_source, # default parameters input_config=input_config, output_config=output_config, input_base=beam_transform_uri, # outputs examples=example_artifacts) super(TCGAPreprocessing, self).__init__(spec=spec, instance_name=instance_name)
def __init__( self, input: types.Channel = None, # pylint: disable=redefined-builtin input_config: Optional[Union[example_gen_pb2.Input, Dict[Text, Any]]] = None, output_config: Optional[Union[example_gen_pb2.Output, Dict[Text, Any]]] = None, custom_config: Optional[Union[example_gen_pb2.CustomConfig, Dict[Text, Any]]] = None, example_artifacts: Optional[types.Channel] = None, custom_executor_spec: Optional[executor_spec.ExecutorSpec] = None, input_base: Optional[types.Channel] = None, instance_name: Optional[Text] = None, enable_cache: Optional[bool] = None): """Construct a FileBasedExampleGen component. Args: input: A Channel of type `standard_artifacts.ExternalArtifact`, which includes one artifact whose uri is an external directory containing the data files. _required_ input_config: An [`example_gen_pb2.Input`](https://github.com/tensorflow/tfx/blob/master/tfx/proto/example_gen.proto) instance, providing input configuration. If unset, the files under input_base will be treated as a single dataset. output_config: An example_gen_pb2.Output instance, providing the output configuration. If unset, default splits will be 'train' and 'eval' with size 2:1. custom_config: An optional example_gen_pb2.CustomConfig instance, providing custom configuration for executor. example_artifacts: Channel of 'ExamplesPath' for output train and eval examples. custom_executor_spec: Optional custom executor spec overriding the default executor spec specified in the component attribute. input_base: Backwards compatibility alias for the 'input' argument. instance_name: Optional unique instance name. Required only if multiple ExampleGen components are declared in the same pipeline. Either `input_base` or `input` must be present in the input arguments. enable_cache: Optional boolean to indicate if cache is enabled for the FileBasedExampleGen component. If not specified, defaults to the value specified for pipeline's enable_cache parameter. """ if input_base: absl.logging.warning( 'The "input_base" argument to the ExampleGen component has ' 'been renamed to "input" and is deprecated. Please update your ' 'usage as support for this argument will be removed soon.') input = input_base # Configure inputs and outputs. input_config = input_config or utils.make_default_input_config() output_config = output_config or utils.make_default_output_config( input_config) if not example_artifacts: artifact = standard_artifacts.Examples() artifact.split_names = artifact_utils.encode_split_names( utils.generate_output_split_names(input_config, output_config)) example_artifacts = channel_utils.as_channel([artifact]) spec = FileBasedExampleGenSpec(input=input, input_config=input_config, output_config=output_config, custom_config=custom_config, examples=example_artifacts) super(FileBasedExampleGen, self).__init__(spec=spec, custom_executor_spec=custom_executor_spec, instance_name=instance_name, enable_cache=enable_cache)