Exemplo n.º 1
0
 def _write_text(self, path, write_mode):
     child = OperationInfo()
     child.identifier = _Identifier.SINK_TEXT
     child.parent = self._info
     child.path = path
     child.write_mode = write_mode
     self._info.sinks.append(child)
     self._env._sinks.append(child)
Exemplo n.º 2
0
 def _write_text(self, path, write_mode):
     child = OperationInfo()
     child.identifier = _Identifier.SINK_TEXT
     child.parent = self._info
     child.path = path
     child.write_mode = write_mode
     self._info.sinks.append(child)
     self._env._sinks.append(child)
Exemplo n.º 3
0
 def _write_csv(self, path, line_delimiter, field_delimiter, write_mode):
     child = OperationInfo()
     child.identifier = _Identifier.SINK_CSV
     child.path = path
     child.parent = self._info
     child.delimiter_field = field_delimiter
     child.delimiter_line = line_delimiter
     child.write_mode = write_mode
     self._info.sinks.append(child)
     self._env._sinks.append(child)
Exemplo n.º 4
0
 def _write_csv(self, path, line_delimiter, field_delimiter, write_mode):
     child = OperationInfo()
     child.identifier = _Identifier.SINK_CSV
     child.path = path
     child.parent = self._info
     child.delimiter_field = field_delimiter
     child.delimiter_line = line_delimiter
     child.write_mode = write_mode
     self._info.sinks.append(child)
     self._env._sinks.append(child)
Exemplo n.º 5
0
 def _write_text(self, path, write_mode):
     child = OperationInfo()
     child_set = DataSink(self._env, child)
     child.identifier = _Identifier.SINK_TEXT
     child.parent = self._info
     child.path = path
     child.write_mode = write_mode
     self._info.parallelism = child.parallelism
     self._info.sinks.append(child)
     self._env._sinks.append(child)
     return child_set
Exemplo n.º 6
0
 def _write_text(self, path, write_mode):
     child = OperationInfo()
     child_set = DataSink(self._env, child)
     child.identifier = _Identifier.SINK_TEXT
     child.parent = self._info
     child.path = path
     child.write_mode = write_mode
     self._info.parallelism = child.parallelism
     self._info.sinks.append(child)
     self._env._sinks.append(child)
     return child_set
Exemplo n.º 7
0
    def write_text(self, path, write_mode=WriteMode.NO_OVERWRITE):
        """
        Writes a DataSet as a text file to the specified location.

        :param path: he path pointing to the location the text file is written to.
        :param write_mode: OutputFormat.WriteMode value, indicating whether files should be overwritten
        """
        child = OperationInfo()
        child.identifier = _Identifier.SINK_TEXT
        child.parent = self._info
        child.path = path
        child.write_mode = write_mode
        self._info.sinks.append(child)
        self._env._sinks.append(child)
Exemplo n.º 8
0
    def write_text(self, path, write_mode=WriteMode.NO_OVERWRITE):
        """
        Writes a DataSet as a text file to the specified location.

        :param path: he path pointing to the location the text file is written to.
        :param write_mode: OutputFormat.WriteMode value, indicating whether files should be overwritten
        """
        child = OperationInfo()
        child.identifier = _Identifier.SINK_TEXT
        child.parent = self._info
        child.path = path
        child.write_mode = write_mode
        self._info.sinks.append(child)
        self._env._sinks.append(child)
Exemplo n.º 9
0
    def read_text(self, path):
        """
        Creates a DataSet that represents the Strings produced by reading the given file line wise.

        The file will be read with the system's default character set.

        :param path: The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path").
        :return: A DataSet that represents the data read from the given file as text lines.
        """
        child = OperationInfo()
        child_set = DataSet(self, child)
        child.identifier = _Identifier.SOURCE_TEXT
        child.path = path
        self._sources.append(child)
        return child_set
Exemplo n.º 10
0
    def read_text(self, path):
        """
        Creates a DataSet that represents the Strings produced by reading the given file line wise.

        The file will be read with the system's default character set.

        :param path: The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path").
        :return: A DataSet that represents the data read from the given file as text lines.
        """
        child = OperationInfo()
        child_set = DataSet(self, child)
        child.identifier = _Identifier.SOURCE_TEXT
        child.path = path
        self._sources.append(child)
        return child_set
Exemplo n.º 11
0
 def read_custom(self, path, filter, splits, format):
     """
     Creates a DataSet using a custom input format that is executed directly in the Python process.
     """
     child = OperationInfo()
     child_set = DataSet(self, child)
     child.identifier = _Identifier.SOURCE_CUSTOM
     child.name = "PythonInputFormat"
     child.path = path
     child.filter = filter
     child.computeSplits = splits
     child.operator = copy.deepcopy(format)
     child.types = _createArrayTypeInfo()
     self._sources.append(child)
     return child_set
Exemplo n.º 12
0
 def read_custom(self, path, filter, splits, format):
     """
     Creates a DataSet using a custom input format that is executed directly in the Python process.
     """
     child = OperationInfo()
     child_set = DataSet(self, child)
     child.identifier = _Identifier.SOURCE_CUSTOM
     child.name = "PythonInputFormat"
     child.path = path
     child.filter = filter
     child.computeSplits = splits
     child.operator = copy.deepcopy(format)
     child.types = _createArrayTypeInfo()
     self._sources.append(child)
     return child_set
Exemplo n.º 13
0
    def read_csv(self, path, types, line_delimiter="\n", field_delimiter=','):
        """
        Create a DataSet that represents the tuples produced by reading the given CSV file.

        :param path: The path of the CSV file.
        :param types: Specifies the types for the CSV fields.
        :return:A CsvReader that can be used to configure the CSV input.
        """
        child = OperationInfo()
        child_set = DataSet(self, child)
        child.identifier = _Identifier.SOURCE_CSV
        child.delimiter_line = line_delimiter
        child.delimiter_field = field_delimiter
        child.path = path
        child.types = types
        self._sources.append(child)
        return child_set
Exemplo n.º 14
0
    def read_csv(self, path, types, line_delimiter="\n", field_delimiter=','):
        """
        Create a DataSet that represents the tuples produced by reading the given CSV file.

        :param path: The path of the CSV file.
        :param types: Specifies the types for the CSV fields.
        :return:A CsvReader that can be used to configure the CSV input.
        """
        child = OperationInfo()
        child_set = DataSet(self, child)
        child.identifier = _Identifier.SOURCE_CSV
        child.delimiter_line = line_delimiter
        child.delimiter_field = field_delimiter
        child.path = path
        child.types = types
        self._sources.append(child)
        return child_set
Exemplo n.º 15
0
    def write_csv(self, path, line_delimiter="\n", field_delimiter=',', write_mode=WriteMode.NO_OVERWRITE):
        """
        Writes a Tuple DataSet as a CSV file to the specified location.

        Note: Only a Tuple DataSet can written as a CSV file.
        :param path: The path pointing to the location the CSV file is written to.
        :param write_mode: OutputFormat.WriteMode value, indicating whether files should be overwritten
        """
        child = OperationInfo()
        child.identifier = _Identifier.SINK_CSV
        child.path = path
        child.parent = self._info
        child.delimiter_field = field_delimiter
        child.delimiter_line = line_delimiter
        child.write_mode = write_mode
        self._info.sinks.append(child)
        self._env._sinks.append(child)
Exemplo n.º 16
0
    def write_csv(self,
                  path,
                  line_delimiter="\n",
                  field_delimiter=',',
                  write_mode=WriteMode.NO_OVERWRITE):
        """
        Writes a Tuple DataSet as a CSV file to the specified location.

        Note: Only a Tuple DataSet can written as a CSV file.
        :param path: The path pointing to the location the CSV file is written to.
        :param write_mode: OutputFormat.WriteMode value, indicating whether files should be overwritten
        """
        child = OperationInfo()
        child.identifier = _Identifier.SINK_CSV
        child.path = path
        child.parent = self._info
        child.delimiter_field = field_delimiter
        child.delimiter_line = line_delimiter
        child.write_mode = write_mode
        self._info.sinks.append(child)
        self._env._sinks.append(child)