Exemplo n.º 1
0
    def execute(self, *args):
        super().execute()

        valid = EssentialParameters(self.__class__.__name__, [self._src_pattern])
        valid()

        if isinstance(self._credentials, str):
            self._logger.warning(
                (
                    "DeprecationWarning: "
                    "In the near future, "
                    "the `credentials` will be changed to accept only dictionary types. "
                    "Please see more information "
                    "https://github.com/BrainPad/cliboa/blob/master/docs/modules/gcs_download.md"
                )
            )
            key_filepath = self._credentials
        else:
            key_filepath = self._source_path_reader(self._credentials)
        client = Gcs.get_gcs_client(key_filepath)
        bucket = client.bucket(self._bucket)
        dl_files = []
        for blob in client.list_blobs(
            bucket, prefix=self._prefix, delimiter=self._delimiter
        ):
            r = re.compile(self._src_pattern)
            if not r.fullmatch(blob.name):
                continue
            dl_files.append(blob.name)
            blob.download_to_filename(
                os.path.join(self._dest_dir, os.path.basename(blob.name))
            )

        ObjectStore.put(self._step, dl_files)
Exemplo n.º 2
0
    def execute(self, *args):
        # essential parameters check
        valid = EssentialParameters(
            self.__class__.__name__,
            [self._host, self._user, self._src_dir, self._src_pattern],
        )
        valid()

        os.makedirs(self._dest_dir, exist_ok=True)

        # fetch src
        sftp = Sftp(
            self._host,
            self._user,
            self._password,
            self._key,
            self._timeout,
            self._retry_count,
            self._port,
        )
        files = sftp.list_files(
            self._src_dir, self._dest_dir, re.compile(self._src_pattern)
        )

        if self._quit is True and len(files) == 0:
            self._logger.info("No file was found. After process will not be processed")
            return StepStatus.SUCCESSFUL_TERMINATION

        self._logger.info("Files downloaded %s" % files)

        # cache downloaded file names
        ObjectStore.put(self._step, files)
Exemplo n.º 3
0
 def _save_to_cache(self):
     self._logger.info("Save data to on memory")
     if isinstance(self._credentials, str):
         self._logger.warning(
             (
                 "DeprecationWarning: "
                 "In the near future, "
                 "the `credentials` will be changed to accept only dictionary types. "
                 "Please see more information "
                 "https://github.com/BrainPad/cliboa/blob/master/docs/modules/bigquery_read.md"
             )
         )
         key_filepath = self._credentials
     else:
         key_filepath = self._source_path_reader(self._credentials)
     df = pandas.read_gbq(
         query="SELECT * FROM %s.%s" % (self._dataset, self._tblname)
         if self._query is None
         else self._query,
         dialect="standard",
         location=self._location,
         project_id=self._project_id,
         credentials=ServiceAccount.auth(key_filepath),
     )
     ObjectStore.put(self._key, df)
Exemplo n.º 4
0
    def execute(self, *args):
        for k, v in self.__dict__.items():
            self._logger.info("%s : %s" % (k, v))

        # essential parameters check
        valid = EssentialParameters(
            self.__class__.__name__,
            [self._host, self._user, self._src_dir, self._src_pattern],
        )
        valid()

        os.makedirs(self._dest_dir, exist_ok=True)

        # fetch src
        sftp = Sftp(
            self._host,
            self._user,
            self._password,
            self._key,
            self._timeout,
            self._retry_count,
            self._port,
        )
        files = sftp.list_files(self._src_dir, self._dest_dir,
                                re.compile(self._src_pattern))

        if self.__quit is True and len(files) == 0:
            self._logger.info(
                "No file was found. After process will not be processed")
            return 0

        # cache downloaded file names
        ObjectStore.put(self._step, files)
Exemplo n.º 5
0
    def execute(self, *args):
        input_valid = IOInput(self._io)
        input_valid()

        files = glob(self._src_path)
        if len(files) > 1:
            raise CliboaException("Input file must be only one.")

        if len(files) == 0:
            raise FileNotFound("The specified csv file not found.")

        with open(files[0], "r", encoding=self._encoding) as f:

            # save per one column
            if self._columns:
                reader = csv.DictReader(f, delimiter=",")
                for row in reader:
                    # extract only the specified columns
                    row_dict = {}
                    for c in self._columns:
                        if not row.get(c):
                            continue
                        row_dict[c] = row.get(c)
                    self._s.save(row_dict)
            else:
                reader = csv.reader(f)
                header = next(reader, None)
                for row in reader:
                    row_dict = dict(zip(header, row))
                    self._s.save(row_dict)

        # cache downloaded file names
        ObjectStore.put(self._step, files)
Exemplo n.º 6
0
 def _save_to_cache(self):
     self._logger.info("Save data to on memory")
     df = pandas.read_gbq(
         query="SELECT * FROM %s.%s" % (self._dataset, self._tblname)
         if self._query is None else self._query,
         dialect="standard",
         location=self._location,
         project_id=self._project_id,
         credentials=ServiceAccount.auth(self._credentials),
     )
     ObjectStore.put(self._key, df)
Exemplo n.º 7
0
    def execute(self, *args):
        super().execute()
        valid = EssentialParameters(self.__class__.__name__, [self._key])
        valid()

        df = pandas.read_gbq(
            query=self._get_query(),
            dialect="standard",
            location=self._location,
            project_id=self._project_id,
            credentials=self._auth(),
        )
        ObjectStore.put(self._key, df)
Exemplo n.º 8
0
    def execute(self, *args):
        # essential parameters check
        valid = EssentialParameters(
            self.__class__.__name__,
            [self._host, self._user, self._src_dir, self._src_pattern],
        )
        valid()

        os.makedirs(self._dest_dir, exist_ok=True)

        if isinstance(self._key, str):
            self._logger.warning((
                "DeprecationWarning: "
                "In the near future, "
                "the `key` will be changed to accept only dictionary types. "
                "Please see more information "
                "https://github.com/BrainPad/cliboa/blob/master/docs/modules/sftp_download.md"
            ))
            key_filepath = self._key
        else:
            key_filepath = self._source_path_reader(self._key)

        # fetch src
        sftp = Sftp(
            self._host,
            self._user,
            self._password,
            key_filepath,
            self._passphrase,
            self._timeout,
            self._retry_count,
            self._port,
        )
        files = sftp.list_files(
            self._src_dir,
            self._dest_dir,
            re.compile(self._src_pattern),
            self._endfile_suffix,
            self._ignore_empty_file,
        )
        if self._quit is True and len(files) == 0:
            self._logger.info(
                "No file was found. After process will not be processed")
            return StepStatus.SUCCESSFUL_TERMINATION

        self._logger.info("Files downloaded %s" % files)

        # cache downloaded file names
        ObjectStore.put(self._step, files)
Exemplo n.º 9
0
    def execute(self, *args):
        super().execute()

        valid = EssentialParameters(self.__class__.__name__,
                                    [self._src_pattern])
        valid()

        client = self._gcs_client()
        bucket = client.get_bucket(self._bucket)
        dl_files = []
        for blob in bucket.list_blobs(prefix=self._prefix,
                                      delimiter=self._delimiter):
            r = re.compile(self._src_pattern)
            if not r.fullmatch(blob.name):
                continue
            dl_files.append(blob.name)
            blob.download_to_filename(
                os.path.join(self._dest_dir, os.path.basename(blob.name)))

        ObjectStore.put(self._step, dl_files)
Exemplo n.º 10
0
    def execute(self, *args):
        for k, v in self.__dict__.items():
            self._logger.info("%s : %s" % (k, v))
        super().execute()

        valid = EssentialParameters(self.__class__.__name__,
                                    [self._src_pattern])
        valid()

        c = storage.Client(self._project_id,
                           credentials=ServiceAccount.auth(self._credentials))
        bucket = c.get_bucket(self._bucket)
        dl_files = []
        for blob in bucket.list_blobs(prefix=self._prefix,
                                      delimiter=self._delimiter):
            r = re.compile(self._src_pattern)
            if not r.fullmatch(blob.name):
                continue
            dl_files.append(blob.name)
            blob.download_to_filename(
                os.path.join(self._dest_dir, os.path.basename(blob.name)))

        ObjectStore.put(self._step, dl_files)
Exemplo n.º 11
0
    def execute(self, *args):
        super().execute()
        valid = EssentialParameters(self.__class__.__name__, [self._key])
        valid()

        if isinstance(self._credentials, str):
            self._logger.warning(
                (
                    "DeprecationWarning: "
                    "In the near future, "
                    "the `credentials` will be changed to accept only dictionary types. "
                )
            )
            key_filepath = self._credentials
        else:
            key_filepath = self._source_path_reader(self._credentials)
        df = pandas.read_gbq(
            query=self._get_query(),
            dialect="standard",
            location=self._location,
            project_id=self._project_id,
            credentials=ServiceAccount.auth(key_filepath),
        )
        ObjectStore.put(self._key, df)