Exemplo n.º 1
0
    def _copy_to_storage(self, path: str, normalized_file_name: str) -> None:
        storage_dir_path = f'gs://{self.storage_bucket}/{self.date}'

        logging.info("Copying [%s] to [%s]", path, storage_dir_path)

        full_file_storage_path = f'{storage_dir_path}/{normalized_file_name}'
        gsutil_cp(path, full_file_storage_path)
    def _copy_files_for_date(self, date_str: str):

        from_path = f'gs://{self.prod_storage_bucket}/{date_str}/*'
        to_path = f'gs://{self.staging_storage_bucket}/{date_str}/'

        if not self.dry_run:
            gsutil_cp(from_path=from_path, to_path=to_path)
        with self.mutex:
            self.copy_list.append((from_path, to_path))
            if self.copy_progress:
                self.copy_progress.next()
    def _copy_files_for_date(self, subdir_path_str: str):
        dir_path = GcsfsDirectoryPath.from_absolute_path(subdir_path_str.rstrip('/'))

        from_path = f'gs://{self.prod_region_storage_dir_path.bucket_name}/{dir_path.relative_path}*'
        to_path = f'gs://{self.staging_region_storage_dir_path.bucket_name}/{dir_path.relative_path}'

        if not self.dry_run:
            gsutil_cp(from_path=from_path, to_path=to_path)
        with self.mutex:
            self.copy_list.append((from_path, to_path))
            if self.copy_progress:
                self.copy_progress.next()
Exemplo n.º 4
0
    def _copy_to_ingest_bucket(self, path: str,
                               normalized_file_name: str) -> None:
        full_file_upload_path_uri = GcsfsFilePath.from_directory_and_file_name(
            self.ingest_bucket, normalized_file_name).uri()

        if not self.dry_run:
            gsutil_cp(path, full_file_upload_path_uri)

        with self.mutex:
            self.copies_list.append((path, full_file_upload_path_uri))
            if self.move_progress:
                self.move_progress.next()
Exemplo n.º 5
0
    def _copy_files_for_date(self, subdir_path_str: str) -> None:
        dir_path = GcsfsDirectoryPath.from_absolute_path(
            subdir_path_str.rstrip("/"))

        from_path = f"gs://{self.source_region_storage_dir_path.bucket_name}/{dir_path.relative_path}*"
        to_path = f"gs://{self.destination_region_storage_dir_path.bucket_name}/{dir_path.relative_path}"

        if not self.dry_run:
            gsutil_cp(from_path=from_path, to_path=to_path)
        with self.mutex:
            self.copy_list.append((from_path, to_path))
            if self.copy_progress:
                self.copy_progress.next()
    def _copy_to_storage(self, path: str, normalized_file_name: str) -> None:
        storage_dir_path = os.path.join(
            'gs://', self.storage_bucket,
            GcsfsDirectIngestFileType.RAW_DATA.value,
            f'{self.datetime.year:04}', f'{self.datetime.month:02}',
            f'{self.datetime.day:02}')

        full_file_storage_path = f'{storage_dir_path}/{normalized_file_name}'

        if self.dry_run:
            logging.info("[DRY RUN] Would copy [%s] to [%s]", path,
                         full_file_storage_path)
            return
        logging.info("Copying [%s] to [%s]", path, full_file_storage_path)
        gsutil_cp(path, full_file_storage_path)
    def _copy_to_ingest_bucket(
        self,
        path: str,
        full_file_upload_path: GcsfsFilePath,
    ) -> None:
        if not self.dry_run:
            try:
                gsutil_cp(path, full_file_upload_path.uri())
                self.uploaded_files.append(path)
                self.copies_list.append((path, full_file_upload_path.uri()))
            except ValueError:
                self.unable_to_upload_files.append(path)
        else:
            self.copies_list.append((path, full_file_upload_path.uri()))

        with self.mutex:
            if self.move_progress:
                # pylint: disable=not-callable
                self.move_progress.next()