def execute(self, *args): super().execute() valid = EssentialParameters(self.__class__.__name__, [self._src_pattern]) valid() if isinstance(self._credentials, str): self._logger.warning( ( "DeprecationWarning: " "In the near future, " "the `credentials` will be changed to accept only dictionary types. " "Please see more information " "https://github.com/BrainPad/cliboa/blob/master/docs/modules/gcs_download.md" ) ) key_filepath = self._credentials else: key_filepath = self._source_path_reader(self._credentials) client = Gcs.get_gcs_client(key_filepath) bucket = client.bucket(self._bucket) dl_files = [] for blob in client.list_blobs( bucket, prefix=self._prefix, delimiter=self._delimiter ): r = re.compile(self._src_pattern) if not r.fullmatch(blob.name): continue dl_files.append(blob.name) blob.download_to_filename( os.path.join(self._dest_dir, os.path.basename(blob.name)) ) ObjectStore.put(self._step, dl_files)
def execute(self, *args): # essential parameters check valid = EssentialParameters( self.__class__.__name__, [self._host, self._user, self._src_dir, self._src_pattern], ) valid() os.makedirs(self._dest_dir, exist_ok=True) # fetch src sftp = Sftp( self._host, self._user, self._password, self._key, self._timeout, self._retry_count, self._port, ) files = sftp.list_files( self._src_dir, self._dest_dir, re.compile(self._src_pattern) ) if self._quit is True and len(files) == 0: self._logger.info("No file was found. After process will not be processed") return StepStatus.SUCCESSFUL_TERMINATION self._logger.info("Files downloaded %s" % files) # cache downloaded file names ObjectStore.put(self._step, files)
def _save_to_cache(self): self._logger.info("Save data to on memory") if isinstance(self._credentials, str): self._logger.warning( ( "DeprecationWarning: " "In the near future, " "the `credentials` will be changed to accept only dictionary types. " "Please see more information " "https://github.com/BrainPad/cliboa/blob/master/docs/modules/bigquery_read.md" ) ) key_filepath = self._credentials else: key_filepath = self._source_path_reader(self._credentials) df = pandas.read_gbq( query="SELECT * FROM %s.%s" % (self._dataset, self._tblname) if self._query is None else self._query, dialect="standard", location=self._location, project_id=self._project_id, credentials=ServiceAccount.auth(key_filepath), ) ObjectStore.put(self._key, df)
def execute(self, *args): for k, v in self.__dict__.items(): self._logger.info("%s : %s" % (k, v)) # essential parameters check valid = EssentialParameters( self.__class__.__name__, [self._host, self._user, self._src_dir, self._src_pattern], ) valid() os.makedirs(self._dest_dir, exist_ok=True) # fetch src sftp = Sftp( self._host, self._user, self._password, self._key, self._timeout, self._retry_count, self._port, ) files = sftp.list_files(self._src_dir, self._dest_dir, re.compile(self._src_pattern)) if self.__quit is True and len(files) == 0: self._logger.info( "No file was found. After process will not be processed") return 0 # cache downloaded file names ObjectStore.put(self._step, files)
def execute(self, *args): input_valid = IOInput(self._io) input_valid() files = glob(self._src_path) if len(files) > 1: raise CliboaException("Input file must be only one.") if len(files) == 0: raise FileNotFound("The specified csv file not found.") with open(files[0], "r", encoding=self._encoding) as f: # save per one column if self._columns: reader = csv.DictReader(f, delimiter=",") for row in reader: # extract only the specified columns row_dict = {} for c in self._columns: if not row.get(c): continue row_dict[c] = row.get(c) self._s.save(row_dict) else: reader = csv.reader(f) header = next(reader, None) for row in reader: row_dict = dict(zip(header, row)) self._s.save(row_dict) # cache downloaded file names ObjectStore.put(self._step, files)
def _save_to_cache(self): self._logger.info("Save data to on memory") df = pandas.read_gbq( query="SELECT * FROM %s.%s" % (self._dataset, self._tblname) if self._query is None else self._query, dialect="standard", location=self._location, project_id=self._project_id, credentials=ServiceAccount.auth(self._credentials), ) ObjectStore.put(self._key, df)
def execute(self, *args): super().execute() valid = EssentialParameters(self.__class__.__name__, [self._key]) valid() df = pandas.read_gbq( query=self._get_query(), dialect="standard", location=self._location, project_id=self._project_id, credentials=self._auth(), ) ObjectStore.put(self._key, df)
def execute(self, *args): # essential parameters check valid = EssentialParameters( self.__class__.__name__, [self._host, self._user, self._src_dir, self._src_pattern], ) valid() os.makedirs(self._dest_dir, exist_ok=True) if isinstance(self._key, str): self._logger.warning(( "DeprecationWarning: " "In the near future, " "the `key` will be changed to accept only dictionary types. " "Please see more information " "https://github.com/BrainPad/cliboa/blob/master/docs/modules/sftp_download.md" )) key_filepath = self._key else: key_filepath = self._source_path_reader(self._key) # fetch src sftp = Sftp( self._host, self._user, self._password, key_filepath, self._passphrase, self._timeout, self._retry_count, self._port, ) files = sftp.list_files( self._src_dir, self._dest_dir, re.compile(self._src_pattern), self._endfile_suffix, self._ignore_empty_file, ) if self._quit is True and len(files) == 0: self._logger.info( "No file was found. After process will not be processed") return StepStatus.SUCCESSFUL_TERMINATION self._logger.info("Files downloaded %s" % files) # cache downloaded file names ObjectStore.put(self._step, files)
def execute(self, *args): super().execute() valid = EssentialParameters(self.__class__.__name__, [self._src_pattern]) valid() client = self._gcs_client() bucket = client.get_bucket(self._bucket) dl_files = [] for blob in bucket.list_blobs(prefix=self._prefix, delimiter=self._delimiter): r = re.compile(self._src_pattern) if not r.fullmatch(blob.name): continue dl_files.append(blob.name) blob.download_to_filename( os.path.join(self._dest_dir, os.path.basename(blob.name))) ObjectStore.put(self._step, dl_files)
def execute(self, *args): for k, v in self.__dict__.items(): self._logger.info("%s : %s" % (k, v)) super().execute() valid = EssentialParameters(self.__class__.__name__, [self._src_pattern]) valid() c = storage.Client(self._project_id, credentials=ServiceAccount.auth(self._credentials)) bucket = c.get_bucket(self._bucket) dl_files = [] for blob in bucket.list_blobs(prefix=self._prefix, delimiter=self._delimiter): r = re.compile(self._src_pattern) if not r.fullmatch(blob.name): continue dl_files.append(blob.name) blob.download_to_filename( os.path.join(self._dest_dir, os.path.basename(blob.name))) ObjectStore.put(self._step, dl_files)
def execute(self, *args): super().execute() valid = EssentialParameters(self.__class__.__name__, [self._key]) valid() if isinstance(self._credentials, str): self._logger.warning( ( "DeprecationWarning: " "In the near future, " "the `credentials` will be changed to accept only dictionary types. " ) ) key_filepath = self._credentials else: key_filepath = self._source_path_reader(self._credentials) df = pandas.read_gbq( query=self._get_query(), dialect="standard", location=self._location, project_id=self._project_id, credentials=ServiceAccount.auth(key_filepath), ) ObjectStore.put(self._key, df)