def finish_process(self): source = self.file_url_info.path # /BUCKET_NAME/OBJ_PATH suffix = "".join(Path(source).suffixes) dest_name = f"{self.table.dataset.slug}/{self.table.name}{suffix}" bucket = settings.MINIO_STORAGE_DATASETS_BUCKET_NAME is_same_file = source == f"/{bucket}/{dest_name}" if self.should_upload: self.output_file.close() progress = MinioProgress() self.log(f"Uploading file to bucket: {bucket}") content_type, encoding = mimetypes.guess_type(dest_name) if encoding == "gzip": # quando é '.csv.gz' o retorno de guess_type é ('text/csv', 'gzip') content_type = "application/gzip" elif encoding is None: content_type = "text/plain" self.minio.fput_object( bucket, dest_name, self.output_file.name, progress=progress, content_type=content_type ) elif not is_same_file: self.log(f"Copying {source} to bucket {bucket}") self.minio.copy_object(bucket, dest_name, source) if self.delete_source: self.log(f"Deleting {source}") split_source = source.split("/") source_bucket, source_obj = split_source[1], "/".join(split_source[2:]) self.minio.remove_object(source_bucket, source_obj) else: self.log(f"Using {source} as the dataset file.", end="") os.remove(self.output_file.name) return f"{settings.AWS_S3_ENDPOINT_URL}{bucket}/{dest_name}"
def update_list_html(self, files_list): files_url = f"https://{settings.APP_HOST}{self.dataset.files_url}" content = f'<html><head><meta http-equiv="Refresh" content="0; url=\'{files_url}\'" /></head></html>' temp_file = NamedTemporaryFile(delete=False, mode="w") temp_file.write(content) temp_file.close() self.log("\nUploading list HTML...") dest_name = f"{self.dataset.slug}/{settings.MINIO_DATASET_TABLES_FILES_LIST_FILENAME}" progress = MinioProgress() self.minio.fput_object( self.bucket, dest_name, temp_file.name, progress=progress, content_type="text/html; charset=utf-8" ) os.remove(temp_file.name) return f"{settings.AWS_S3_ENDPOINT_URL}{self.bucket}/{dest_name}"
def update_sha512_sums_file(self): sha_sums = self.dataset.sha512sums temp_file = NamedTemporaryFile(delete=False, mode="w") temp_file.write(sha_sums.content) temp_file.close() self.log(f"Uploading {sha_sums.filename}...") progress = MinioProgress() self.minio.fput_object( self.bucket, urlparse(sha_sums.file_url).path.replace(f"/{settings.MINIO_STORAGE_DATASETS_BUCKET_NAME}/", ""), temp_file.name, progress=progress, content_type="text/plain", ) os.remove(temp_file.name) return sha_sums
def upload_file(input_filename, bucket, remote_filename, progress=False): content_type, encoding = mimetypes.guess_type(remote_filename) if encoding == "gzip": # quando é '.csv.gz' o retorno de guess_type é ('text/csv', 'gzip') content_type = "application/gzip" elif encoding is None: content_type = "text/plain" service = Minio( urlparse(settings.AWS_S3_ENDPOINT_URL).netloc, access_key=settings.AWS_ACCESS_KEY_ID, secret_key=settings.AWS_SECRET_ACCESS_KEY, ) return service.fput_object( bucket, remote_filename, input_filename, content_type=content_type, progress=MinioProgress() if progress else None, )