Exemplo n.º 1
0
    def finish_process(self):
        source = self.file_url_info.path  # /BUCKET_NAME/OBJ_PATH
        suffix = "".join(Path(source).suffixes)
        dest_name = f"{self.table.dataset.slug}/{self.table.name}{suffix}"
        bucket = settings.MINIO_STORAGE_DATASETS_BUCKET_NAME
        is_same_file = source == f"/{bucket}/{dest_name}"

        if self.should_upload:
            self.output_file.close()
            progress = MinioProgress()
            self.log(f"Uploading file to bucket: {bucket}")

            content_type, encoding = mimetypes.guess_type(dest_name)
            if encoding == "gzip":
                # quando é '.csv.gz' o retorno de guess_type é ('text/csv', 'gzip')
                content_type = "application/gzip"
            elif encoding is None:
                content_type = "text/plain"

            self.minio.fput_object(
                bucket, dest_name, self.output_file.name, progress=progress, content_type=content_type
            )
        elif not is_same_file:
            self.log(f"Copying {source} to bucket {bucket}")
            self.minio.copy_object(bucket, dest_name, source)
            if self.delete_source:
                self.log(f"Deleting {source}")
                split_source = source.split("/")
                source_bucket, source_obj = split_source[1], "/".join(split_source[2:])
                self.minio.remove_object(source_bucket, source_obj)
        else:
            self.log(f"Using {source} as the dataset file.", end="")

        os.remove(self.output_file.name)
        return f"{settings.AWS_S3_ENDPOINT_URL}{bucket}/{dest_name}"
Exemplo n.º 2
0
    def update_list_html(self, files_list):
        files_url = f"https://{settings.APP_HOST}{self.dataset.files_url}"
        content = f'<html><head><meta http-equiv="Refresh" content="0; url=\'{files_url}\'" /></head></html>'

        temp_file = NamedTemporaryFile(delete=False, mode="w")
        temp_file.write(content)
        temp_file.close()

        self.log("\nUploading list HTML...")
        dest_name = f"{self.dataset.slug}/{settings.MINIO_DATASET_TABLES_FILES_LIST_FILENAME}"
        progress = MinioProgress()
        self.minio.fput_object(
            self.bucket, dest_name, temp_file.name, progress=progress, content_type="text/html; charset=utf-8"
        )

        os.remove(temp_file.name)
        return f"{settings.AWS_S3_ENDPOINT_URL}{self.bucket}/{dest_name}"
Exemplo n.º 3
0
    def update_sha512_sums_file(self):
        sha_sums = self.dataset.sha512sums
        temp_file = NamedTemporaryFile(delete=False, mode="w")
        temp_file.write(sha_sums.content)
        temp_file.close()

        self.log(f"Uploading {sha_sums.filename}...")
        progress = MinioProgress()
        self.minio.fput_object(
            self.bucket,
            urlparse(sha_sums.file_url).path.replace(f"/{settings.MINIO_STORAGE_DATASETS_BUCKET_NAME}/", ""),
            temp_file.name,
            progress=progress,
            content_type="text/plain",
        )

        os.remove(temp_file.name)
        return sha_sums
Exemplo n.º 4
0
def upload_file(input_filename, bucket, remote_filename, progress=False):
    content_type, encoding = mimetypes.guess_type(remote_filename)
    if encoding == "gzip":
        # quando é '.csv.gz' o retorno de guess_type é ('text/csv', 'gzip')
        content_type = "application/gzip"
    elif encoding is None:
        content_type = "text/plain"

    service = Minio(
        urlparse(settings.AWS_S3_ENDPOINT_URL).netloc,
        access_key=settings.AWS_ACCESS_KEY_ID,
        secret_key=settings.AWS_SECRET_ACCESS_KEY,
    )

    return service.fput_object(
        bucket,
        remote_filename,
        input_filename,
        content_type=content_type,
        progress=MinioProgress() if progress else None,
    )