def execute(self, *args): valid = EssentialParameters( self.__class__.__name__, [self._src_dir, self._src_pattern, self._dest_dir], ) valid() if not self._columns and not self._column_numbers: raise InvalidParameter( "Specifying either 'column' or 'column_numbers' is essential.") if self._columns and self._column_numbers: raise InvalidParameter( "Cannot specify both 'column' and 'column_numbers'.") files = super().get_target_files(self._src_dir, self._src_pattern) if len(files) == 0: raise FileNotFound("The specified csv file not found.") for f in files: _, filename = os.path.split(f) dest_path = os.path.join(self._dest_dir, filename) if self._columns: Csv.extract_columns_with_names(f, dest_path, self._columns) elif self._column_numbers: if isinstance(self._column_numbers, int) is True: remain_column_numbers = [] remain_column_numbers.append(self._column_numbers) else: column_numbers = self._column_numbers.split(",") remain_column_numbers = [int(n) for n in column_numbers] Csv.extract_columns_with_numbers(f, dest_path, remain_column_numbers)
def execute(self, *args): # essential parameters check valid = EssentialParameters( self.__class__.__name__, [ self._host, self._user, self._src_dir, self._src_pattern, self._dest_dir ], ) valid() sftp = Sftp( self._host, self._user, self._password, self._key, self._timeout, self._retry_count, self._port, ) files = super().get_target_files(self._src_dir, self._src_pattern) if len(files) > 0: for file in files: sftp.put_file( file, os.path.join(self._dest_dir, os.path.basename(file))) self._logger.info("%s is successfully uploaded." % file) else: self._logger.info( "Files to upload do not exist. File pattern: {}".format( os.path.join(self._src_dir, self._src_pattern))) if self._quit is True: return StepStatus.SUCCESSFUL_TERMINATION
def execute(self, *args): # essential parameters check valid = EssentialParameters( self.__class__.__name__, [ self._host, self._user, self._src_dir, self._src_pattern, self._dest_dir ], ) valid() sftp = Sftp( self._host, self._user, self._password, self._key, self._timeout, self._retry_count, self._port, ) files = super().get_target_files(self._src_dir, self._src_pattern) for file in files: sftp.put_file(file, os.path.join(self._dest_dir, os.path.basename(file)))
def execute(self, *args): # essential parameters check valid = EssentialParameters( self.__class__.__name__, [self._src_dir, self._src_pattern, self._format] ) valid() files = super().get_target_files(self._src_dir, self._src_pattern) self._logger.info("Files found %s" % files) dir = self._dest_dir if self._dest_dir is not None else self._src_dir for f in files: if self._format == "zip": self._logger.info("Compress file %s to zip." % f) with zipfile.ZipFile( os.path.join(dir, (os.path.basename(f) + ".zip")), "w", zipfile.ZIP_DEFLATED, ) as o: o.write(f, arcname=os.path.basename(f)) elif self._format in ("gz", "gzip"): with open(f, "rb") as i: self._logger.info("Compress file %s to gzip." % f) with gzip.open( os.path.join(dir, (os.path.basename(f) + ".gz")), "wb" ) as o: shutil.copyfileobj(i, o) elif self._format in ("bz2", "bzip2"): with open(f, "rb") as i: self._logger.info("Compress file %s to bzip2." % f) with open( os.path.join(dir, (os.path.basename(f) + ".bz2")), "wb" ) as o: o.write(bz2.compress(i.read()))
def execute(self, *args): # essential parameters check valid = EssentialParameters( self.__class__.__name__, [self._src_dir, self._src_pattern, self._encoding_from, self._encoding_to], ) valid() files = super().get_target_files(self._src_dir, self._src_pattern) if len(files) == 0: self._logger.info("No files are found. Nothing to do.") return for file in files: basename = os.path.basename(file) if self._dest_dir: File().convert_encoding( file, os.path.join(self._dest_dir, basename), self._encoding_from, self._encoding_to, ) else: tmpfile = os.path.join( os.path.dirname(file), "." + StringUtil().random_str(10) + "." + basename, ) File().convert_encoding( file, tmpfile, self._encoding_from, self._encoding_to ) os.remove(file) os.rename(tmpfile, file) self._logger.info("Encoded file %s" % basename)
def execute(self, *args): # essential parameters check valid = EssentialParameters( self.__class__.__name__, [ self._src_dir, self._src_pattern, self._dest_dir, ], ) valid() files = super().get_target_files(self._src_dir, self._src_pattern) self._logger.info("Files found %s" % files) for file in files: root, _ = os.path.splitext(os.path.split(file)[1]) dest_file = os.path.join(self._dest_dir, (root + ".jsonl")) with open( file, mode="r", encoding=self._encoding, newline="" ) as i, jsonlines.open(dest_file, mode="w") as writer: reader = csv.DictReader(i) for row in reader: writer.write(row)
def execute(self, *args): # essential parameters check valid = EssentialParameters( self.__class__.__name__, [ self._host, self._user, self._src_dir, self._src_pattern, self._dest_dir ], ) valid() if isinstance(self._key, str): self._logger.warning( ("DeprecationWarning: " "In the near future, " "the `key` will be changed to accept only dictionary types.")) key_filepath = self._key else: key_filepath = self._source_path_reader(self._key) sftp = Sftp( self._host, self._user, self._password, key_filepath, self._passphrase, self._timeout, self._retry_count, self._port, ) files = super().get_target_files(self._src_dir, self._src_pattern) for file in files: sftp.put_file(file, os.path.join(self._dest_dir, os.path.basename(file)))
def execute(self, *args): for k, v in self.__dict__.items(): self._logger.info("%s : %s" % (k, v)) # essential parameters check valid = EssentialParameters( self.__class__.__name__, [ self._host, self._user, self._src_dir, self._src_pattern, self._dest_dir ], ) valid() sftp = Sftp( self._host, self._user, self._password, self._key, self._timeout, self._retry_count, self._port, ) files = super().get_target_files(self._src_dir, self._src_pattern) if len(files) > 0: for file in files: sftp.put_file( file, os.path.join(self._dest_dir, os.path.basename(file))) else: self._logger.info( "Files to upload do not exist. File pattern: {}".format( os.path.join(self._src_dir, self._src_pattern))) return 0
def execute(self, *args): file = super().execute() valid = EssentialParameters(self.__class__.__name__, [self._columns, self._formatter]) valid() _, ext = os.path.splitext(file) if ext == ".csv": delimiter = "," elif ext == ".tsv": delimiter = "\t" with codecs.open(file, mode="r", encoding=self._encoding) as fi, codecs.open( self._dest_path, mode="w", encoding=self._encoding) as fo: reader = csv.DictReader(fi, delimiter=delimiter) writer = csv.DictWriter(fo, reader.fieldnames) writer.writeheader() date_util = DateUtil() for row in reader: for column in self._columns: r = row.get(column) if not r: continue row[column] = date_util.convert_date_format( r, self._formatter) writer.writerow(row) fo.flush() self._logger.info("Finish %s" % self.__class__.__name__)
def execute(self, *args): super().execute() valid = EssentialParameters( self.__class__.__name__, [self._query, self._dest_path] ) valid() with self.get_adaptor() as ps: with open( self._dest_path, mode="w", encoding=self._encoding, newline="" ) as f: cur = ps.select(super()._property_path_reader(self._query)) writer = None for i, row in enumerate(cur): if i == 0: if type(row) is tuple: writer = csv.writer(f, quoting=csv.QUOTE_ALL) columns = [i[0] for i in cur.description] writer.writerow(columns) elif type(row) is dict: writer = csv.DictWriter( f, list(row.keys()), quoting=csv.QUOTE_ALL ) writer.writeheader() if writer: writer.writerow(self.callback_handler(row)) else: f.write(self.callback_handler(row))
def execute(self, *args): # essential parameters check valid = EssentialParameters( self.__class__.__name__, [ self._src_dir, self._src_pattern, self._dest_dir, self._dest_pattern ], ) valid() # get a target file target_files = super().get_target_files(self._src_dir, self._src_pattern) if len(target_files) == 0: raise InvalidCount("An input file %s does not exist." % os.path.join(self._src_dir, self._src_pattern)) elif len(target_files) > 1: self._logger.error("Hit target files %s" % target_files) raise InvalidCount("Input files must be only one.") self._logger.info("A target file to be converted: %s" % os.path.join(target_files[0])) # convert _, dest_ext = os.path.splitext(self._dest_pattern) if dest_ext != ".csv": raise InvalidFormat( "%s is not supported format in %s. The supported format is .csv" % (dest_ext, self._dest_pattern)) df = pandas.read_excel(target_files[0], encoding=self._encoding) dest_path = os.path.join(self._dest_dir, self._dest_pattern) self._logger.info("Convert %s to %s" % (target_files[0], dest_path)) df.to_csv(dest_path, encoding=self._encoding)
def execute(self, *args): # essential parameters check valid = EssentialParameters(self.__class__.__name__, [self._src_dir, self._src_pattern]) valid() files = super().get_target_files(self._src_dir, self._src_pattern) if len(files) == 0: self._logger.info("No files are found. Nothing to do.") return for file in files: dirname = os.path.dirname(file) basename = os.path.basename(file) if "." in basename: nameonly, ext = basename.split(".", 1) ext = "." + ext else: nameonly = basename ext = "" newfilename = self._prefix + nameonly + self._suffix + ext newfilepath = os.path.join(dirname, newfilename) os.rename(file, newfilepath) self._logger.info("File name changed %s -> %s" % (file, newfilepath))
def execute(self, *args): super().execute() valid = EssentialParameters(self.__class__.__name__, [self._recipients]) valid() files = super().get_target_files(self._src_dir, self._src_pattern) if len(files) == 0: self._logger.info("No files are found. Nothing to do.") return gpg = Gpg(self._gnupghome) if self._key_dir and self._key_pattern: key_files = super().get_target_files(self._key_dir, self._key_pattern) self._logger.info("Keys found %s" % key_files) self.key_import(gpg, key_files, self._trust_level) for file in files: dest_path = (os.path.join(self._dest_dir, os.path.basename(file)) if self._dest_dir is not None else os.path.join( self._src_dir, os.path.basename(file))) gpg.encrypt( file, dest_path, recipients=self._recipients, passphrase=self._passphrase, always_trust=self._always_trust, )
def execute(self, *args): file = super().execute() # essential parameters check valid = EssentialParameters( self.__class__.__name__, [ self._before_format, self._before_enc, self._after_format, self._after_enc, self._dest_dir, self._dest_pattern, ], ) valid() with open(file, mode="rt", encoding=self._before_enc) as i: reader = csv.reader(i, delimiter=self._csv_delimiter( self._before_format)) with open( os.path.join(self._dest_dir, self._dest_pattern), mode="wt", newline="", encoding=self._after_enc, ) as o: writer = csv.writer( o, delimiter=self._csv_delimiter(self._after_format), quoting=self._csv_quote(), lineterminator=self._csv_newline(), ) for line in reader: writer.writerow(line)
def execute(self, *args): super().execute() valid = EssentialParameters( self.__class__.__name__, [self._collection, self._src_dir, self._src_pattern], ) valid() files = super().get_target_files(self._src_dir, self._src_pattern) if len(files) == 0: raise FileNotFound("No files are found.") if isinstance(self._credentials, str): self._logger.warning(( "DeprecationWarning: " "In the near future, " "the `credentials` will be changed to accept only dictionary types. " "Please see more information " "https://github.com/BrainPad/cliboa/blob/master/docs/modules/firestore_document_create.md" # noqa )) key_filepath = self._credentials else: key_filepath = self._source_path_reader(self._credentials) firestore_client = Firestore.get_firestore_client(key_filepath) for file in files: with open(file) as f: fname = os.path.splitext(os.path.basename(file))[0] doc = firestore_client.collection( self._collection).document(fname) doc.set(json.load(f))
def execute(self, *args): super().execute() valid = EssentialParameters(self.__class__.__name__, [self._src_dir, self._src_pattern]) valid() if isinstance(self._credentials, str): self._logger.warning(( "DeprecationWarning: " "In the near future, " "the `credentials` will be changed to accept only dictionary types. " "Please see more information " "https://github.com/BrainPad/cliboa/blob/master/docs/modules/gcs_upload.md" )) key_filepath = self._credentials else: key_filepath = self._source_path_reader(self._credentials) gcs_client = Gcs.get_gcs_client(key_filepath) bucket = gcs_client.bucket(self._bucket) files = super().get_target_files(self._src_dir, self._src_pattern) self._logger.info("Upload files %s" % files) for file in files: self._logger.info("Start upload %s" % file) blob = bucket.blob( os.path.join(self._dest_dir, os.path.basename(file))) blob.upload_from_filename(file) self._logger.info("Finish upload %s" % file)
def execute(self, *args): # essential parameters check valid = EssentialParameters( self.__class__.__name__, [ self._src_dir, self._src1_pattern, self._src2_pattern, self._dest_dir, self._dest_pattern, ], ) valid() if self._dest_pattern: self._logger.warning( "'dest_pattern' will be unavailable in the near future." + "'dest_pattern' will change to 'dest_name'." ) target1_files = File().get_target_files(self._src_dir, self._src1_pattern) target2_files = File().get_target_files(self._src_dir, self._src2_pattern) if len(target1_files) == 0: raise InvalidCount( "An input file %s does not exist." % os.path.join(self._src_dir, self._src1_pattern) ) elif len(target2_files) == 0: raise InvalidCount( "An input file %s does not exist." % os.path.join(self._src_dir, self._src2_pattern) ) elif len(target1_files) > 1: self._logger.error("Hit target files %s" % target1_files) raise InvalidCount("Input files must be only one.") elif len(target2_files) > 1: self._logger.error("Hit target files %s" % target2_files) raise InvalidCount("Input files must be only one.") self._logger.info("Merge %s and %s." % (target1_files[0], target2_files[0])) df1 = pandas.read_csv( os.path.join(self._src_dir, target1_files[0]), dtype=str, encoding=self._encoding, ) df2 = pandas.read_csv( os.path.join(self._src_dir, target2_files[0]), dtype=str, encoding=self._encoding, ) df = pandas.merge(df1, df2) if "Unnamed: 0" in df.index: del df["Unnamed: 0"] df.to_csv( os.path.join(self._dest_dir, self._dest_pattern), encoding=self._encoding, index=False, )
def execute(self, *args): for k, v in self.__dict__.items(): self._logger.debug("%s : %s" % (k, v)) # essential parameters check valid = EssentialParameters( self.__class__.__name__, [ self._src_dir, self.__src1_pattern, self.__src2_pattern, self._dest_dir, self._dest_pattern, ], ) valid() target1_files = File().get_target_files(self._src_dir, self.__src1_pattern) target2_files = File().get_target_files(self._src_dir, self.__src2_pattern) if len(target1_files) == 0: raise InvalidCount( "An input file %s does not exist." % os.path.join(self._src_dir, self.__src1_pattern)) elif len(target2_files) == 0: raise InvalidCount( "An input file %s does not exist." % os.path.join(self._src_dir, self.__src2_pattern)) elif len(target1_files) > 1: self._logger.error("Hit target files %s" % target1_files) raise InvalidCount("Input files must be only one.") elif len(target2_files) > 1: self._logger.error("Hit target files %s" % target2_files) raise InvalidCount("Input files must be only one.") self._logger.info("Merge %s and %s." % (target1_files[0], target2_files[0])) df1 = pandas.read_csv( os.path.join(self._src_dir, target1_files[0]), dtype=str, encoding=self._encoding, ) df2 = pandas.read_csv( os.path.join(self._src_dir, target2_files[0]), dtype=str, encoding=self._encoding, ) df = pandas.merge(df1, df2) if "Unnamed: 0" in df.index: del df["Unnamed: 0"] df.to_csv( os.path.join(self._dest_dir, self._dest_pattern), encoding=self._encoding, index=False, )
def execute(self, *args): # essential parameters check valid = EssentialParameters(self.__class__.__name__, [self._src_dir, self._src_pattern]) valid() files = super().get_target_files(self._src_dir, self._src_pattern) if len(files) != 1: raise Exception("Input file must be only one.") return files[0]
def execute(self, *args): # essential parameters check valid = EssentialParameters( self.__class__.__name__, [self._src_dir, self._dest_dir, self._dest_pattern], ) valid() if self._dest_pattern: self._logger.warning( "'dest_pattern' will be unavailable in the near future." + "'dest_pattern' will change to 'dest_name'." ) if not self._src_pattern and not self._src_filenames: raise InvalidParameter( "Specifying either 'src_pattern' or 'src_filenames' is essential." ) if self._src_pattern and self._src_filenames: raise InvalidParameter( "Cannot specify both 'src_pattern' and 'src_filenames'." ) if self._src_pattern: files = File().get_target_files(self._src_dir, self._src_pattern) else: files = [] for file in self._src_filenames: files.append(os.path.join(self._src_dir, file)) if len(files) == 0: raise FileNotFound("No files are found.") elif len(files) == 1: self._logger.warning("Two or more input files are required.") file = files.pop(0) df1 = pandas.read_csv( file, dtype=str, encoding=self._encoding, ) for file in files: df2 = pandas.read_csv( file, dtype=str, encoding=self._encoding, ) df1 = pandas.concat([df1, df2]) df1.to_csv( os.path.join(self._dest_dir, self._dest_pattern), encoding=self._encoding, index=False, )
def execute(self, *args): for k, v in self.__dict__.items(): self._logger.debug("%s : %s" % (k, v)) super().execute() param_valid = EssentialParameters(self.__class__.__name__, [self.__table_schema]) param_valid() cache_list = [] inserts = False # initial if_exists if_exists = self.REPLACE if self.__replace is True else self.APPEND with open(self._s.cache_file, "r", encoding="utf-8") as f: for i, l_str in enumerate(f): l_dict = ast.literal_eval(l_str) cache_list.append(l_dict) if len(cache_list) == self.BULK_LINE_CNT: df = pandas.DataFrame( self.__create_insert_data(cache_list)) if inserts is True: # if_exists after the first insert execution if_exists = self.APPEND dest_tbl = self._dataset + "." + self._tblname self._logger.info("Start insert %s rows to %s" % (len(cache_list), dest_tbl)) df.to_gbq( dest_tbl, project_id=self._project_id, if_exists=if_exists, table_schema=self.__table_schema, location=self._location, credentials=self._auth(), ) cache_list.clear() inserts = True if len(cache_list) > 0: df = pandas.DataFrame(self.__create_insert_data(cache_list)) if inserts is True: # if_exists after the first insert execution if_exists = self.APPEND dest_tbl = self._dataset + "." + self._tblname self._logger.info("Start insert %s rows to %s" % (len(cache_list), dest_tbl)) df.to_gbq( dest_tbl, project_id=self._project_id, if_exists=if_exists, table_schema=self.__table_schema, location=self._location, credentials=self._auth(), ) self._s.remove()
def execute(self, *args): # essential parameters check valid = EssentialParameters( self.__class__.__name__, [ self._src_dir, self._src_pattern, self._dest_dir, self._dest_pattern, self._divide_rows, ], ) valid() if self._dest_pattern: self._logger.warning( "'dest_pattern' will be unavailable in the near future." + "Basically every classes which extends FileBaseTransform will be allowed" + " plural input files, and output files will be the same name with input" + " file names.\n" "At that time, if 'dest_dir' is given, transformed files will be created in the given directory.\n" # noqa + "If not, original files will be updated by transformed files.") files = super().get_target_files(self._src_dir, self._src_pattern) self._logger.info("Files found %s" % files) file = files[0] if self._dest_pattern is None: fname = os.path.basename(file) else: fname = self._dest_pattern if "." in fname: nameonly, ext = fname.split(".", 1) ext = "." + ext else: nameonly = fname ext = "" if self._header: with open(file, encoding=self._encoding) as i: self._header_row = i.readline() row = self._ifile_reader(file) newfilename = nameonly + ".%s" + ext has_left = True index = 1 while has_left: ofile_path = os.path.join(self._dest_dir, newfilename % str(index)) has_left = self._ofile_generator(ofile_path, row) index = index + 1
def execute(self, *args): # essential parameters check valid = EssentialParameters( self.__class__.__name__, [ self._host, self._user, self._src_dir, self._src_pattern, self._dest_dir ], ) valid() if isinstance(self._key, str): self._logger.warning(( "DeprecationWarning: " "In the near future, " "the `key` will be changed to accept only dictionary types. " "Please see more information " "https://github.com/BrainPad/cliboa/blob/master/docs/modules/sftp_upload.md" )) key_filepath = self._key else: key_filepath = self._source_path_reader(self._key) sftp = Sftp( self._host, self._user, self._password, key_filepath, self._passphrase, self._timeout, self._retry_count, self._port, ) files = super().get_target_files(self._src_dir, self._src_pattern) if len(files) > 0: for file in files: if self._ignore_empty_file and os.path.getsize(file) == 0: self._logger.info("0 byte file will no be uploaded %s." % file) continue sftp.put_file( file, os.path.join(self._dest_dir, os.path.basename(file)), self._endfile_suffix, ) self._logger.info("%s is successfully uploaded." % file) else: self._logger.info( "Files to upload do not exist. File pattern: {}".format( os.path.join(self._src_dir, self._src_pattern))) if self._quit is True: return StepStatus.SUCCESSFUL_TERMINATION
def execute(self, *args): super().execute() valid = EssentialParameters(self.__class__.__name__, [self._dest_dir, self._name_email]) valid() Gpg(self._gnupghome).generate_key( self._dest_dir, name_real=self._name_real, name_email=self._name_email, passphrase=self._passphrase, )
def execute(self, *args): # essential parameters check valid = EssentialParameters(self.__class__.__name__, [self._src_dir, self._src_pattern]) valid() # TODO This implementation will be removed in the near future. # Parent class will not returns any values. # Only Check required parameters. files = super().get_target_files(self._src_dir, self._src_pattern) if len(files) != 1: raise Exception("Input file must be only one.") return files[0]
def execute(self, *args): # essential parameters check valid = EssentialParameters( self.__class__.__name__, [ self._src_dir, self._src_pattern, self._dest_dir, self._dest_pattern, self._headers, ], ) valid() if self._dest_pattern: self._logger.warning( "'dest_pattern' will be unavailable in the near future." + "Basically every classes which extends FileBaseTransform will be allowed" + " plural input files, and output files will be the same name with input" + " file names.\n" "At that time, if 'dest_dir' is given, transformed files will be created in the given directory.\n" # noqa + "If not, original files will be updated by transformed files." ) target_files = super().get_target_files(self._src_dir, self._src_pattern) if len(target_files) == 0: raise InvalidCount( "An input file %s does not exist." % os.path.join(self._src_dir, self._src_pattern) ) elif len(target_files) > 1: self._logger.error("Hit target files %s" % target_files) raise InvalidCount("Input files must be only one.") self._logger.info("A target file to be converted: %s") dest_path = os.path.join(self._dest_dir, self._dest_pattern) self._logger.info( "Convert header of %s. An output file is %s." % (target_files[0], dest_path) ) with open(target_files[0], "r", encoding=self._encoding) as s, open( dest_path, "w", encoding=self._encoding ) as d: reader = csv.reader(s) writer = csv.writer(d, quoting=csv.QUOTE_ALL) headers = next(reader, None) new_headers = self.__replace_headers(headers) writer.writerow(new_headers) for r in reader: writer.writerow(r) d.flush()
def execute(self, *args): super().execute() valid = EssentialParameters(self.__class__.__name__, [self._src_dir, self._src_pattern]) valid() gcs_client = storage.Client.from_service_account_json( self._credentials) bucket = gcs_client.get_bucket(self._bucket) files = super().get_target_files(self._src_dir, self._src_pattern) for file in files: blob = bucket.blob( os.path.join(self._dest_dir, os.path.basename(file))) blob.upload_from_filename(file)
def execute(self, *args): super().execute() valid = EssentialParameters(self.__class__.__name__, [self._src_dir, self._src_pattern]) valid() gcs_client = Gcs.get_gcs_client(self._credentials) bucket = gcs_client.bucket(self._bucket) files = super().get_target_files(self._src_dir, self._src_pattern) self._logger.info("Upload files %s" % files) for file in files: self._logger.info("Start upload %s" % file) blob = bucket.blob( os.path.join(self._dest_dir, os.path.basename(file))) blob.upload_from_filename(file) self._logger.info("Finish upload %s" % file)
def execute(self, *args): # essential parameters check valid = EssentialParameters( self.__class__.__name__, [self._src_dir, self._dest_dir, self._dest_pattern], ) valid() if not self._src_pattern and not self._src_filenames: raise InvalidParameter( "Specifying either 'src_pattern' or 'src_filenames' is essential." ) if self._src_pattern and self._src_filenames: raise InvalidParameter( "Cannot specify both 'src_pattern' and 'src_filenames'.") if self._src_pattern: files = File().get_target_files(self._src_dir, self._src_pattern) else: files = [] for file in self._src_filenames: files.append(os.path.join(self._src_dir, file)) if len(files) < 2: raise InvalidCount("Two or more input files are required.") file = files.pop(0) df1 = pandas.read_csv( file, dtype=str, encoding=self._encoding, ) for file in files: df2 = pandas.read_csv( file, dtype=str, encoding=self._encoding, ) df1 = pandas.concat([df1, df2]) df1.to_csv( os.path.join(self._dest_dir, self._dest_pattern), encoding=self._encoding, index=False, )
def execute(self, *args): files = super().get_target_files(self._src_dir, self._src_pattern) if len(files) != 1: raise Exception("Input file must be only one.") self._logger.info("Files found %s" % files) # essential parameters check valid = EssentialParameters( self.__class__.__name__, [ self._before_format, self._before_enc, self._after_format, self._after_enc, self._dest_dir, self._dest_pattern, ], ) valid() if self._dest_pattern: self._logger.warning( "'dest_pattern' will be unavailable in the near future." + "Basically every classes which extends FileBaseTransform will be allowed" + " plural input files, and output files will be the same name with input" + " file names.\n" "At that time, if 'dest_dir' is given, transformed files will be created in the given directory.\n" # noqa + "If not, original files will be updated by transformed files." ) with open(files[0], mode="rt", encoding=self._before_enc) as i: reader = csv.reader(i, delimiter=Csv.delimiter_convert(self._before_format)) with open( os.path.join(self._dest_dir, self._dest_pattern), mode="wt", newline="", encoding=self._after_enc, ) as o: writer = csv.writer( o, delimiter=Csv.delimiter_convert(self._after_format), quoting=Csv.quote_convert(self._quote), lineterminator=Csv.newline_convert(self._after_nl), ) for line in reader: writer.writerow(line)