Exemplo n.º 1
0
    def execute(self, *args):
        # essential parameters check
        valid = EssentialParameters(
            self.__class__.__name__,
            [
                self._src_dir, self._src_pattern, self._dest_dir,
                self._dest_pattern
            ],
        )
        valid()

        # get a target file
        target_files = super().get_target_files(self._src_dir,
                                                self._src_pattern)
        if len(target_files) == 0:
            raise InvalidCount("An input file %s does not exist." %
                               os.path.join(self._src_dir, self._src_pattern))
        elif len(target_files) > 1:
            self._logger.error("Hit target files %s" % target_files)
            raise InvalidCount("Input files must be only one.")
        self._logger.info("A target file to be converted: %s" %
                          os.path.join(target_files[0]))

        # convert
        _, dest_ext = os.path.splitext(self._dest_pattern)
        if dest_ext != ".csv":
            raise InvalidFormat(
                "%s is not supported format in %s. The supported format is .csv"
                % (dest_ext, self._dest_pattern))

        df = pandas.read_excel(target_files[0], encoding=self._encoding)
        dest_path = os.path.join(self._dest_dir, self._dest_pattern)
        self._logger.info("Convert %s to %s" % (target_files[0], dest_path))
        df.to_csv(dest_path, encoding=self._encoding)
Exemplo n.º 2
0
    def execute(self, *args):
        # essential parameters check
        valid = EssentialParameters(
            self.__class__.__name__,
            [
                self._src_dir, self._src_pattern, self._dest_dir,
                self._dest_pattern
            ],
        )
        valid()

        if self._dest_pattern:
            self._logger.warning(
                "'dest_pattern' will be unavailable in the near future." +
                "Basically every classes which extends FileBaseTransform will be allowed"
                +
                " plural input files, and output files will be the same name with input"
                + " file names.\n"
                "At that time, if 'dest_dir' is given, transformed files will be created in the given directory.\n"  # noqa
                +
                "If not, original files will be updated by transformed files.")

        # get a target file
        target_files = super().get_target_files(self._src_dir,
                                                self._src_pattern)
        if len(target_files) == 0:
            raise InvalidCount("An input file %s does not exist." %
                               os.path.join(self._src_dir, self._src_pattern))
        elif len(target_files) > 1:
            self._logger.error("Hit target files %s" % target_files)
            raise InvalidCount("Input files must be only one.")
        self._logger.info("A target file to be converted: %s" %
                          os.path.join(target_files[0]))

        # convert
        _, dest_ext = os.path.splitext(self._dest_pattern)
        if dest_ext != ".csv":
            raise InvalidFormat(
                "%s is not supported format in %s. The supported format is .csv"
                % (dest_ext, self._dest_pattern))

        df = pandas.read_excel(target_files[0], encoding=self._encoding)
        dest_path = os.path.join(self._dest_dir, self._dest_pattern)
        self._logger.info("Convert %s to %s" % (target_files[0], dest_path))
        df.to_csv(dest_path, encoding=self._encoding)
Exemplo n.º 3
0
    def __format_insert_data(self, insert_rows):
        """
        Format insert data to pass DataFrame as the below.

        insert_data = {
            "column1": [1, 2],
            "column2": ["spam", "spam"],
            ...
        }

        Args
            insert_rows: dictionary list of input cache
        """
        insert_data = {}
        for c in self.__columns:
            v_list = [d.get(c) for d in insert_rows]
            if not v_list:
                raise InvalidFormat(
                    "Specified column %s does not exist in an input file." % c)
            insert_data[c] = v_list
        return insert_data
Exemplo n.º 4
0
    def __create_insert_data(self, cache_list):
        """
        Create insert data like the below.

        insert_data = {
            "column1": [1, 2],
            "column2": ["spam", "spam"],
            ...
        }

        Args
            cache_list: dictionary list of input cache
        """
        insert_data = {}
        columns = [
            name_and_type["name"] for name_and_type in self._table_schema
        ]
        for c in columns:
            v_list = [d.get(c) for d in cache_list]
            if not v_list:
                raise InvalidFormat(
                    "Specified column %s does not exist in an input file." % c)
            insert_data[c] = v_list
        return insert_data