Python GCSHook.upload示例，airflow.providers.google.cloud.hooks.gcs.GCSHook.upload Python示例

示例#1

0

显示文件

    def _upload_data(
        self,
        events: List[Any],
    ) -> str:
        gcs_hook = GCSHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            impersonation_chain=self.impersonation_chain,
        )

        # Construct destination file path
        file_name = f"{self.calendar_id}.json".replace(" ", "_")
        dest_file_name = (f"{self.destination_path.strip('/')}/{file_name}"
                          if self.destination_path else file_name)

        with NamedTemporaryFile("w+") as temp_file:
            # Write data
            json.dump(events, temp_file)
            temp_file.flush()

            # Upload to GCS
            gcs_hook.upload(
                bucket_name=self.destination_bucket,
                object_name=dest_file_name,
                filename=temp_file.name,
            )
        return dest_file_name

示例#2

0

显示文件

    def execute(self, context: 'Context') -> str:
        uri = f"gs://{self.bucket}/{self.object_name}"

        ads_hook = GoogleAdsHook(
            gcp_conn_id=self.gcp_conn_id,
            google_ads_conn_id=self.google_ads_conn_id,
            api_version=self.api_version,
        )

        gcs_hook = GCSHook(gcp_conn_id=self.gcp_conn_id,
                           impersonation_chain=self.impersonation_chain)
        with NamedTemporaryFile("w+") as temp_file:
            # Download accounts
            accounts = ads_hook.list_accessible_customers()
            writer = csv.writer(temp_file)
            writer.writerows(accounts)
            temp_file.flush()

            # Upload to GCS
            gcs_hook.upload(bucket_name=self.bucket,
                            object_name=self.object_name,
                            gzip=self.gzip,
                            filename=temp_file.name)
            self.log.info("Uploaded %s to %s", len(accounts), uri)

        return uri

示例#3

0

显示文件

    def execute(self, context: Dict):
        salesforce = SalesforceHook(conn_id=self.salesforce_conn_id)
        response = salesforce.make_query(query=self.query,
                                         include_deleted=self.include_deleted,
                                         query_params=self.query_params)

        with tempfile.TemporaryDirectory() as tmp:
            path = os.path.join(tmp, "salesforce_temp_file")
            salesforce.write_object_to_file(
                query_results=response["records"],
                filename=path,
                fmt=self.export_format,
                coerce_to_timestamp=self.coerce_to_timestamp,
                record_time_added=self.record_time_added,
            )

            hook = GCSHook(gcp_conn_id=self.gcp_conn_id)
            hook.upload(
                bucket_name=self.bucket_name,
                object_name=self.object_name,
                filename=path,
                gzip=self.gzip,
            )

            gcs_uri = "gs://{}/{}".format(self.bucket_name, self.object_name)
            self.log.info("%s uploaded to GCS", gcs_uri)
            return gcs_uri

示例#4

0

显示文件

    def execute(self, context: Dict):
        service = GoogleAdsHook(gcp_conn_id=self.gcp_conn_id,
                                google_ads_conn_id=self.google_ads_conn_id)
        rows = service.search(client_ids=self.client_ids,
                              query=self.query,
                              page_size=self.page_size)

        try:
            getter = attrgetter(*self.attributes)
            converted_rows = [getter(row) for row in rows]
        except Exception as e:
            self.log.error(
                "An error occurred in converting the Google Ad Rows. \n Error %s",
                e)
            raise

        with NamedTemporaryFile("w", suffix=".csv") as csvfile:
            writer = csv.writer(csvfile)
            writer.writerows(converted_rows)
            csvfile.flush()

            hook = GCSHook(gcp_conn_id=self.gcp_conn_id)
            hook.upload(
                bucket_name=self.bucket,
                object_name=self.obj,
                filename=csvfile.name,
                gzip=self.gzip,
            )
            self.log.info("%s uploaded to GCS", self.obj)

示例#5

0

显示文件

    def execute(self, context: dict) -> str:
        gcs_hook = GCSHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            impersonation_chain=self.impersonation_chain,
        )
        hook = GoogleDisplayVideo360Hook(
            gcp_conn_id=self.gcp_conn_id,
            api_version=self.api_version,
            delegate_to=self.delegate_to,
            impersonation_chain=self.impersonation_chain,
        )

        self.log.info("Retrieving report...")
        content: List[str] = hook.download_line_items(
            request_body=self.request_body)

        with tempfile.NamedTemporaryFile("w+") as temp_file:
            writer = csv.writer(temp_file)
            writer.writerows(content)
            temp_file.flush()
            gcs_hook.upload(
                bucket_name=self.bucket_name,
                object_name=self.object_name,
                filename=temp_file.name,
                mime_type="text/csv",
                gzip=self.gzip,
            )
        return f"{self.bucket_name}/{self.object_name}"

示例#6

0

显示文件

    def execute(self, context: Dict):
        hook = GCSHook(gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain)

        with NamedTemporaryFile() as source_file, NamedTemporaryFile() as destination_file:
            self.log.info("Downloading file from %s", self.source_bucket)
            hook.download(
                bucket_name=self.source_bucket, object_name=self.source_object, filename=source_file.name
            )

            self.log.info("Starting the transformation")
            cmd = [self.transform_script] if isinstance(self.transform_script, str) else self.transform_script
            cmd += [source_file.name, destination_file.name]
            process = subprocess.Popen(
                args=cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True
            )
            self.log.info("Process output:")
            if process.stdout:
                for line in iter(process.stdout.readline, b''):
                    self.log.info(line.decode(self.output_encoding).rstrip())

            process.wait()
            if process.returncode:
                raise AirflowException("Transform script failed: {0}".format(process.returncode))

            self.log.info("Transformation succeeded. Output temporarily located at %s", destination_file.name)

            self.log.info("Uploading file to %s as %s", self.destination_bucket, self.destination_object)
            hook.upload(
                bucket_name=self.destination_bucket,
                object_name=self.destination_object,
                filename=destination_file.name,
            )

示例#7

0

显示文件

    def _copy_single_object(
        self,
        gcs_hook: GCSHook,
        sftp_hook: SFTPHook,
        source_path: str,
        destination_object: str,
    ) -> None:
        """
        Helper function to copy single object.
        """
        self.log.info(
            "Executing copy of %s to gs://%s/%s",
            source_path,
            self.destination_bucket,
            destination_object,
        )

        with NamedTemporaryFile("w") as tmp:
            sftp_hook.retrieve_file(source_path, tmp.name)

            gcs_hook.upload(
                bucket_name=self.destination_bucket,
                object_name=destination_object,
                filename=tmp.name,
                mime_type=self.mime_type,
            )

        if self.move_object:
            self.log.info("Executing delete of %s", source_path)
            sftp_hook.delete_file(source_path)

示例#8

0

显示文件

    def execute(self, context):
        gcs_hook = GCSHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            impersonation_chain=self.impersonation_chain,
        )
        with NamedTemporaryFile("w+") as tmp_file:
            # Download file from GCS
            self.log.info(
                "Downloading file from GCS: %s/%s ", self.storage_bucket, self.storage_name_object,
            )

            gcs_hook.download(
                bucket_name=self.storage_bucket, object_name=self.storage_name_object, filename=tmp_file.name,
            )

            # Modify file
            self.log.info("Modifying temporary file %s", tmp_file.name)
            self._modify_column_headers(
                tmp_file_location=tmp_file.name,
                custom_dimension_header_mapping=self.custom_dimension_header_mapping,
            )

            # Upload newly formatted file to cloud storage
            self.log.info(
                "Uploading file to GCS: %s/%s ", self.storage_bucket, self.storage_name_object,
            )
            gcs_hook.upload(
                bucket_name=self.storage_bucket, object_name=self.storage_name_object, filename=tmp_file.name,
            )

示例#9

0

显示文件

文件： local_to_gcs.py 项目： ysktir/airflow-1

    def execute(self, context):
        """Uploads a file or list of files to Google Cloud Storage"""
        hook = GCSHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            impersonation_chain=self.impersonation_chain,
        )

        filepaths = self.src if isinstance(self.src, list) else glob(self.src)
        if os.path.basename(self.dst):  # path to a file
            if len(filepaths) > 1:  # multiple file upload
                raise ValueError(
                    "'dst' parameter references filepath. Please specify "
                    "directory (with trailing backslash) to upload multiple "
                    "files. e.g. /path/to/directory/"
                )
            object_paths = [self.dst]
        else:  # directory is provided
            object_paths = [os.path.join(self.dst, os.path.basename(filepath)) for filepath in filepaths]

        for filepath, object_path in zip(filepaths, object_paths):
            hook.upload(
                bucket_name=self.bucket,
                object_name=object_path,
                mime_type=self.mime_type,
                filename=filepath,
                gzip=self.gzip,
            )

示例#10

0

显示文件

文件： azure_fileshare_to_gcs.py 项目： kushsharma/airflow

    def execute(self, context: 'Context'):
        self._check_inputs()
        azure_fileshare_hook = AzureFileShareHook(self.azure_fileshare_conn_id)
        files = azure_fileshare_hook.list_files(
            share_name=self.share_name, directory_name=self.directory_name
        )

        gcs_hook = GCSHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            impersonation_chain=self.google_impersonation_chain,
        )

        dest_gcs_bucket, dest_gcs_object_prefix = _parse_gcs_url(self.dest_gcs)

        if not self.replace:
            # if we are not replacing -> list all files in the GCS bucket
            # and only keep those files which are present in
            # S3 and not in Google Cloud Storage
            existing_files_prefixed = gcs_hook.list(dest_gcs_bucket, prefix=dest_gcs_object_prefix)

            existing_files = []

            # Remove the object prefix itself, an empty directory was found
            if dest_gcs_object_prefix in existing_files_prefixed:
                existing_files_prefixed.remove(dest_gcs_object_prefix)

            # Remove the object prefix from all object string paths
            for file in existing_files_prefixed:
                if file.startswith(dest_gcs_object_prefix):
                    existing_files.append(file[len(dest_gcs_object_prefix) :])
                else:
                    existing_files.append(file)

            files = list(set(files) - set(existing_files))

        if files:
            self.log.info('%s files are going to be synced.', len(files))
            if self.directory_name is None:
                raise RuntimeError("The directory_name must be set!.")
            for file in files:
                with NamedTemporaryFile() as temp_file:
                    azure_fileshare_hook.get_file_to_stream(
                        stream=temp_file,
                        share_name=self.share_name,
                        directory_name=self.directory_name,
                        file_name=file,
                    )
                    temp_file.flush()

                    # There will always be a '/' before file because it is
                    # enforced at instantiation time
                    dest_gcs_object = dest_gcs_object_prefix + file
                    gcs_hook.upload(dest_gcs_bucket, dest_gcs_object, temp_file.name, gzip=self.gzip)
            self.log.info("All done, uploaded %d files to Google Cloud Storage.", len(files))
        else:
            self.log.info('There are no new files to sync. Have a nice day!')
            self.log.info('In sync, no files needed to be uploaded to Google Cloud Storage')

        return files

示例#11

0

显示文件

文件： SocrataToGCSOperator.py 项目： simonbreton/Capstone-project

    def execute(self, context):

        http_hook = HttpHook(http_conn_id=self.http_conn_id,
                             method=self.method)

        retry_args = dict(wait=tenacity.wait_fixed(10),
                          stop=tenacity.stop_after_attempt(10))

        gcp_conn = GCSHook(gcp_conn_id=self.google_cloud_storage_conn_id)

        total_rows = int(self.max_rows)

        for offset in range(0, total_rows, 5000):

            q = self.query + f'&$offset={offset}'
            response = http_hook.run_with_advanced_retry(
                endpoint=self.resource, data=q, _retry_args=retry_args)

            Json_response = response.json()
            df = pd.DataFrame(Json_response)
            df.insert(0, "surrogate_keys", 'null', True)
            df['partitioned_key'] = df[{self.partitioned_key}]
            first_col = df.pop('partitioned_key')
            df.insert(1, 'partitioned_key', first_col)
            df = df.to_csv(index=False)

            name = self.object_name + '/' + str(offset)

            gcp_conn.upload(self.bucket_name, name, data=df)

示例#12

0

显示文件

文件： text_to_speech.py 项目： vipadm/airflow

 def execute(self, context: 'Context') -> None:
     hook = CloudTextToSpeechHook(
         gcp_conn_id=self.gcp_conn_id,
         impersonation_chain=self.impersonation_chain,
     )
     result = hook.synthesize_speech(
         input_data=self.input_data,
         voice=self.voice,
         audio_config=self.audio_config,
         retry=self.retry,
         timeout=self.timeout,
     )
     with NamedTemporaryFile() as temp_file:
         temp_file.write(result.audio_content)
         cloud_storage_hook = GCSHook(
             gcp_conn_id=self.gcp_conn_id,
             impersonation_chain=self.impersonation_chain,
         )
         cloud_storage_hook.upload(bucket_name=self.target_bucket_name,
                                   object_name=self.target_filename,
                                   filename=temp_file.name)
         FileDetailsLink.persist(
             context=context,
             task_instance=self,
             uri=f"{self.target_bucket_name}/{self.target_filename}",
             project_id=cloud_storage_hook.project_id,
         )

示例#13

0

显示文件

    def _upload_data(
        self,
        gcs_hook: GCSHook,
        hook: GSheetsHook,
        sheet_range: str,
        sheet_values: List[Any],
    ) -> str:
        # Construct destination file path
        sheet = hook.get_spreadsheet(self.spreadsheet_id)
        file_name = f"{sheet['properties']['title']}_{sheet_range}.csv".replace(
            " ", "_")
        dest_file_name = (f"{self.destination_path.strip('/')}/{file_name}"
                          if self.destination_path else file_name)

        with NamedTemporaryFile("w+") as temp_file:
            # Write data
            writer = csv.writer(temp_file)
            writer.writerows(sheet_values)
            temp_file.flush()

            # Upload to GCS
            gcs_hook.upload(
                bucket_name=self.destination_bucket,
                object_name=dest_file_name,
                filename=temp_file.name,
            )
        return dest_file_name

示例#14

0

显示文件

    def execute(self, context: Dict):
        service = FacebookAdsReportingHook(
            facebook_conn_id=self.facebook_conn_id,
            api_version=self.api_version)
        rows = service.bulk_facebook_report(params=self.params,
                                            fields=self.fields)

        converted_rows = [dict(row) for row in rows]
        self.log.info("Facebook Returned %s data points", len(converted_rows))

        if converted_rows:
            headers = converted_rows[0].keys()
            with tempfile.NamedTemporaryFile("w", suffix=".csv") as csvfile:
                writer = csv.DictWriter(csvfile, fieldnames=headers)
                writer.writeheader()
                writer.writerows(converted_rows)
                csvfile.flush()
                hook = GCSHook(
                    gcp_conn_id=self.gcp_conn_id,
                    impersonation_chain=self.impersonation_chain,
                )
                hook.upload(
                    bucket_name=self.bucket_name,
                    object_name=self.object_name,
                    filename=csvfile.name,
                    gzip=self.gzip,
                )
                self.log.info("%s uploaded to GCS", csvfile.name)

示例#15

0

显示文件

文件： glacier_to_gcs.py 项目： kushsharma/airflow

    def execute(self, context: 'Context') -> str:
        glacier_hook = GlacierHook(aws_conn_id=self.aws_conn_id)
        gcs_hook = GCSHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            impersonation_chain=self.impersonation_chain,
        )
        job_id = glacier_hook.retrieve_inventory(vault_name=self.vault_name)

        with tempfile.NamedTemporaryFile() as temp_file:
            glacier_data = glacier_hook.retrieve_inventory_results(
                vault_name=self.vault_name, job_id=job_id["jobId"])
            # Read the file content in chunks using StreamingBody
            # https://botocore.amazonaws.com/v1/documentation/api/latest/reference/response.html
            stream = glacier_data["body"]
            for chunk in stream.iter_chunk(chunk_size=self.chunk_size):
                temp_file.write(chunk)
            temp_file.flush()
            gcs_hook.upload(
                bucket_name=self.bucket_name,
                object_name=self.object_name,
                filename=temp_file.name,
                gzip=self.gzip,
            )
        return f"gs://{self.bucket_name}/{self.object_name}"

示例#16

0

显示文件

    def execute(self, context: dict) -> str:
        hook = GoogleDisplayVideo360Hook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            api_version=self.api_version,
            impersonation_chain=self.impersonation_chain,
        )
        gcs_hook = GCSHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            impersonation_chain=self.impersonation_chain,
        )

        self.log.info("Retrieving operation...")
        operation = hook.get_sdf_download_operation(
            operation_name=self.operation_name)

        self.log.info("Creating file for upload...")
        media = hook.download_media(resource_name=operation)

        self.log.info("Sending file to the Google Cloud Storage...")
        with tempfile.NamedTemporaryFile() as temp_file:
            hook.download_content_from_request(temp_file,
                                               media,
                                               chunk_size=1024 * 1024)
            temp_file.flush()
            gcs_hook.upload(
                bucket_name=self.bucket_name,
                object_name=self.object_name,
                filename=temp_file.name,
                gzip=self.gzip,
            )

        return f"{self.bucket_name}/{self.object_name}"

示例#17

0

显示文件

def _fetch_ratings(api_conn_id, gcp_conn_id, gcs_bucket, **context):
    year = context["execution_date"].year
    month = context["execution_date"].month

    # Fetch ratings from our API.
    logging.info(f"Fetching ratings for {year}/{month:02d}")

    api_hook = MovielensHook(conn_id=api_conn_id)
    ratings = pd.DataFrame.from_records(
        api_hook.get_ratings_for_month(year=year, month=month),
        columns=["userId", "movieId", "rating", "timestamp"],
    )

    logging.info(f"Fetched {ratings.shape[0]} rows")

    # Write ratings to temp file.
    with tempfile.TemporaryDirectory() as tmp_dir:
        tmp_path = path.join(tmp_dir, "ratings.csv")
        ratings.to_csv(tmp_path, index=False)

        # Upload file to GCS.
        logging.info(f"Writing results to ratings/{year}/{month:02d}.csv")
        gcs_hook = GCSHook(gcp_conn_id)
        gcs_hook.upload(
            bucket_name=gcs_bucket,
            object_name=f"ratings/{year}/{month:02d}.csv",
            filename=tmp_path,
        )

示例#18

0

显示文件

文件： azure_blob_to_gcs.py 项目： zjffdu/airflow

    def execute(self, context):
        azure_hook = WasbHook(wasb_conn_id=self.wasb_conn_id)
        gcs_hook = GCSHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            impersonation_chain=self.impersonation_chain,
        )

        with tempfile.NamedTemporaryFile() as temp_file:
            self.log.info("Downloading data from blob: %s", self.blob_name)
            azure_hook.get_file(
                file_path=temp_file.name,
                container_name=self.container_name,
                blob_name=self.blob_name,
            )
            self.log.info("Uploading data from blob's: %s into GCP bucket: %s",
                          self.object_name, self.bucket_name)
            gcs_hook.upload(
                bucket_name=self.bucket_name,
                object_name=self.object_name,
                filename=temp_file.name,
                gzip=self.gzip,
            )
            self.log.info(
                "Resources have been uploaded from blob: %s to GCS bucket:%s",
                self.blob_name,
                self.bucket_name,
            )
        return f"gs://{self.bucket_name}/{self.object_name}"

示例#19

0

显示文件

 def _upload_to_gcs(self, files_to_upload: Dict[str, Any]):
     hook = GCSHook(google_cloud_storage_conn_id=self.gcp_conn_id,
                    delegate_to=self.delegate_to)
     for obj, tmp_file_handle in files_to_upload.items():
         hook.upload(bucket_name=self.bucket,
                     object_name=obj,
                     filename=tmp_file_handle.name,
                     mime_type='application/json',
                     gzip=self.gzip)

示例#20

0

显示文件

 def _upload_to_gcs(self, files_to_upload):
     """
     Upload all of the file splits (and optionally the schema .json file) to
     Google Cloud Storage.
     """
     hook = GCSHook(
         gcp_conn_id=self.gcp_conn_id,
         delegate_to=self.delegate_to)
     for tmp_file in files_to_upload:
         hook.upload(self.bucket, tmp_file.get('file_name'),
                     tmp_file.get('file_handle').name,
                     mime_type=tmp_file.get('file_mime_type'),
                     gzip=self.gzip if tmp_file.get('file_name') != self.schema_filename else False)

示例#21

0

显示文件

    def execute(self, context):
        """
        Uploads the file to Google Cloud Storage
        """
        hook = GCSHook(google_cloud_storage_conn_id=self.gcp_conn_id,
                       delegate_to=self.delegate_to)

        hook.upload(
            bucket_name=self.bucket,
            object_name=self.dst,
            mime_type=self.mime_type,
            filename=self.src,
            gzip=self.gzip,
        )

示例#22

0

显示文件

 def _upload_to_gcs(self, file_to_upload):
     """Upload a file (data split or schema .json file) to Google Cloud Storage."""
     hook = GCSHook(
         gcp_conn_id=self.gcp_conn_id,
         delegate_to=self.delegate_to,
         impersonation_chain=self.impersonation_chain,
     )
     hook.upload(
         bucket_name=self.bucket,
         object_name=file_to_upload.get('file_name'),
         filename=file_to_upload.get('file_handle').name,
         mime_type='application/json',
         gzip=self.gzip,
     )

示例#23

0

显示文件

文件： sql_to_gcs.py 项目： dskoda1/airflow

 def _upload_to_gcs(self, file_to_upload):
     """Upload a file (data split or schema .json file) to Google Cloud Storage."""
     hook = GCSHook(
         gcp_conn_id=self.gcp_conn_id,
         delegate_to=self.delegate_to,
         impersonation_chain=self.impersonation_chain,
     )
     hook.upload(
         self.bucket,
         file_to_upload.get('file_name'),
         file_to_upload.get('file_handle').name,
         mime_type=file_to_upload.get('file_mime_type'),
         gzip=self.gzip if file_to_upload.get('file_name') != self.schema_filename else False,
     )

示例#24

0

显示文件

文件： text_to_speech.py 项目： PreethamMadupuri91/AirflowWorkflow

 def execute(self, context):
     hook = CloudTextToSpeechHook(gcp_conn_id=self.gcp_conn_id)
     result = hook.synthesize_speech(
         input_data=self.input_data,
         voice=self.voice,
         audio_config=self.audio_config,
         retry=self.retry,
         timeout=self.timeout,
     )
     with NamedTemporaryFile() as temp_file:
         temp_file.write(result.audio_content)
         cloud_storage_hook = GCSHook(google_cloud_storage_conn_id=self.gcp_conn_id)
         cloud_storage_hook.upload(
             bucket_name=self.target_bucket_name, object_name=self.target_filename, filename=temp_file.name
         )

示例#25

0

显示文件

    def execute(self, context: 'Context'):
        hook = GoogleDisplayVideo360Hook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            api_version=self.api_version,
            impersonation_chain=self.impersonation_chain,
        )
        gcs_hook = GCSHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            impersonation_chain=self.impersonation_chain,
        )

        resource = hook.get_query(query_id=self.report_id)
        # Check if report is ready
        if resource["metadata"]["running"]:
            raise AirflowException(f"Report {self.report_id} is still running")

        # If no custom report_name provided, use DV360 name
        file_url = resource["metadata"][
            "googleCloudStoragePathForLatestReport"]
        report_name = self.report_name or urlparse(file_url).path.split(
            "/")[-1]
        report_name = self._resolve_file_name(report_name)

        # Download the report
        self.log.info("Starting downloading report %s", self.report_id)
        with tempfile.NamedTemporaryFile(delete=False) as temp_file:
            with urllib.request.urlopen(file_url) as response:
                shutil.copyfileobj(response, temp_file, length=self.chunk_size)

            temp_file.flush()
            # Upload the local file to bucket
            bucket_name = self._set_bucket_name(self.bucket_name)
            gcs_hook.upload(
                bucket_name=bucket_name,
                object_name=report_name,
                gzip=self.gzip,
                filename=temp_file.name,
                mime_type="text/csv",
            )
        self.log.info(
            "Report %s was saved in bucket %s as %s.",
            self.report_id,
            self.bucket_name,
            report_name,
        )
        self.xcom_push(context, key="report_name", value=report_name)

示例#26

0

显示文件

文件： gdrive_to_gcs.py 项目： ysktir/airflow-1

 def _upload_data(self, gcs_hook: GCSHook,
                  gdrive_hook: GoogleDriveHook) -> str:
     file_handle = BytesIO()
     self._set_file_metadata(gdrive_hook=gdrive_hook)
     file_id = self.file_metadata["id"]
     mime_type = self.file_metadata["mime_type"]
     request = gdrive_hook.get_media_request(file_id=file_id)
     gdrive_hook.download_content_from_request(file_handle=file_handle,
                                               request=request,
                                               chunk_size=104857600)
     gcs_hook.upload(
         bucket_name=self.destination_bucket,
         object_name=self.destination_object,
         data=file_handle.getvalue(),
         mime_type=mime_type,
     )

示例#27

0

显示文件

    def execute(self, context):
        """
        Uploads the file to Google cloud storage
        """
        hook = GoogleCloudStorageHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to)

        with tempfile.NamedTemporaryFile('w', suffix=self.src) as temp:
            hook.upload(
                bucket_name=self.bucket,
                object_name=self.dst,
                mime_type=self.mime_type,
                filename=temp.name,
                gzip=self.gzip,
            )
            temp.flush()

示例#28

0

显示文件

    def execute(self, context: dict):
        hook = GoogleSearchAdsHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            api_version=self.api_version,
            impersonation_chain=self.impersonation_chain,
        )

        gcs_hook = GCSHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            impersonation_chain=self.impersonation_chain,
        )

        # Resolve file name of the report
        report_name = self.report_name or self.report_id
        report_name = self._resolve_file_name(report_name)

        response = hook.get(report_id=self.report_id)
        if not response['isReportReady']:
            raise AirflowException('Report {} is not ready yet'.format(
                self.report_id))

        # Resolve report fragments
        fragments_count = len(response["files"])

        # Download chunks of report's data
        self.log.info("Downloading Search Ads report %s", self.report_id)
        with NamedTemporaryFile() as temp_file:
            for i in range(fragments_count):
                byte_content = hook.get_file(report_fragment=i,
                                             report_id=self.report_id)
                fragment = byte_content if i == 0 else self._handle_report_fragment(
                    byte_content)
                temp_file.write(fragment)

            temp_file.flush()

            gcs_hook.upload(
                bucket_name=self.bucket_name,
                object_name=report_name,
                gzip=self.gzip,
                filename=temp_file.name,
            )
        self.xcom_push(context, key="file_name", value=report_name)

示例#29

0

显示文件

    def execute(self, context: dict) -> None:
        hook = GoogleCampaignManagerHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            api_version=self.api_version,
            impersonation_chain=self.impersonation_chain,
        )
        gcs_hook = GCSHook(
            google_cloud_storage_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            impersonation_chain=self.impersonation_chain,
        )
        # Get name of the report
        report = hook.get_report(file_id=self.file_id,
                                 profile_id=self.profile_id,
                                 report_id=self.report_id)
        report_name = self.report_name or report.get("fileName",
                                                     str(uuid.uuid4()))
        report_name = self._resolve_file_name(report_name)

        # Download the report
        self.log.info("Starting downloading report %s", self.report_id)
        request = hook.get_report_file(profile_id=self.profile_id,
                                       report_id=self.report_id,
                                       file_id=self.file_id)
        with tempfile.NamedTemporaryFile() as temp_file:
            downloader = http.MediaIoBaseDownload(fd=temp_file,
                                                  request=request,
                                                  chunksize=self.chunk_size)
            download_finished = False
            while not download_finished:
                _, download_finished = downloader.next_chunk()

            temp_file.flush()
            # Upload the local file to bucket
            gcs_hook.upload(
                bucket_name=self.bucket_name,
                object_name=report_name,
                gzip=self.gzip,
                filename=temp_file.name,
                mime_type="text/csv",
            )

        self.xcom_push(context, key="report_name", value=report_name)

示例#30

0

显示文件

文件： facebook_ads_to_gcs.py 项目： kushsharma/airflow

 def _flush_rows(self, converted_rows: Optional[List[Any]], object_name: str):
     if converted_rows:
         headers = converted_rows[0].keys()
         with tempfile.NamedTemporaryFile("w", suffix=".csv") as csvfile:
             writer = csv.DictWriter(csvfile, fieldnames=headers)
             writer.writeheader()
             writer.writerows(converted_rows)
             csvfile.flush()
             hook = GCSHook(
                 gcp_conn_id=self.gcp_conn_id,
                 impersonation_chain=self.impersonation_chain,
             )
             hook.upload(
                 bucket_name=self.bucket_name,
                 object_name=object_name,
                 filename=csvfile.name,
                 gzip=self.gzip,
             )
             self.log.info("%s uploaded to GCS", csvfile.name)