def show(self, firestore: Firestore, report: str, _print: bool = False, **unused): definition = firestore.get_document(Type.SA360_RPT, '_reports').get(report) if _print: print(f'SA360 Dynamic Report "{report}"') print() pprint.pprint(definition, indent=2, compact=False) return definition
class SA360Dynamic(ReportFetcher): report_type = Type.SA360_RPT email = None project = None profile = None def __init__(self, email: str, project: str, append: bool = False, infer_schema: bool = False): self.email = email self.project = project self.creds = Credentials(email=email, project=project) self.credentials = storage.Client()._credentials self.transport = AuthorizedSession(credentials=self.credentials) self.append = append self.infer_schema = infer_schema self.firestore = Firestore(email=email, project=project) self.chunk_multiplier = int(os.environ.get('CHUNK_MULTIPLIER', 64)) self.bucket = f'{self.project}-report2bq-upload' def service(self) -> Resource: return discovery.get_service(service=Service.SA360, credentials=self.creds) def handle_report(self, run_config: Dict[str, Any]) -> bool: sa360_service = self.service() request = sa360_service.reports().get(reportId=run_config['file_id']) try: report = request.execute() if report['isReportReady']: report_config = self.firestore.get_document( type=Type.SA360_RPT, id=run_config['report_id']) csv_header, _ = self.read_header(report) schema = csv_helpers.create_table_schema(csv_header, None) report_config['schema'] = schema report_config['files'] = report['files'] if 'dest_project' in run_config: report_config['dest_project'] = run_config['dest_project'] if 'dest_dataset' in run_config: report_config['dest_dataset'] = run_config['dest_dataset'] if 'notify_message' in run_config: report_config['notifier']['message'] = run_config[ 'notify_message'] # update the report details please... self.firestore.update_document(Type.SA360_RPT, run_config['report_id'], report_config) # ... then stream the file to GCS a la DV360/CM self.stream_to_gcs(report_details=report_config, run_config=run_config) return report['isReportReady'] except Exception as e: logging.error( f'Report fetch error: Run {run_config["file_id"]} for report {run_config["report_id"]}' ) return False def read_header(self, report_config: dict) -> list: r = urllib.request.Request(report_config['files'][0]['url']) for header in self.creds.auth_headers: r.add_header(header, self.creds.auth_headers[header]) with closing(urlopen(r)) as report: data = report.read(self.chunk_multiplier * 1024 * 1024) bytes_io = BytesIO(data) return csv_helpers.get_column_types(bytes_io) @measure_memory def stream_to_gcs(self, report_details: Dict[str, Any], run_config: Dict[str, Any]) -> None: """Multi-threaded stream to GCS Arguments: bucket (str): GCS Bucket report_details (dict): Report definition """ queue = Queue() report_id = run_config['report_id'] # chunk_multiplier is set in the environment, but defaults to 64 - this leads to a # 64M chunk size we can throw around. Given the memory constraints of a cloud function # this seems like a good, safe number. chunk_size = self.chunk_multiplier * 1024 * 1024 out_file = BytesIO() streamer = \ ThreadedGCSObjectStreamUpload( client=Cloud_Storage.client(), creds=credentials.Credentials( email=self.email, project=self.project).credentials, bucket_name=self.bucket, blob_name=f'{report_id}.csv', chunk_size=chunk_size, streamer_queue=queue) streamer.start() r = urllib.request.Request(report_details['files'][0]['url']) for header in self.creds.auth_headers: r.add_header(header, self.creds.auth_headers[header]) with closing(urlopen(r)) as _report: _downloaded = 0 chunk_id = 1 _report_size = int(_report.headers['content-length']) while _downloaded < _report_size: chunk = _report.read(chunk_size) _downloaded += len(chunk) queue.put(chunk) chunk_id += 1 queue.join() streamer.stop()
class SA360ReportRunner(ReportRunner): report_type = Type.SA360_RPT def __init__(self, report_id: str, email: str, project: str=None, timezone: str=None): self.email = email self.report_id = report_id self.project = project self.timezone = timezone self.firestore = Firestore() def run(self, unattended: bool = True) -> Dict[str, Any]: # TODO: Make SA360 object here sa360 = SA360(self.email, self.project) if unattended: return self._unattended_run(sa360=sa360) else: return self._attended_run(sa360=sa360) def _unattended_run(self, sa360: SA360) -> Dict[str, Any]: runner = None report_config = None try: report_config = self.firestore.get_document(type=Type.SA360_RPT, id=self.report_id) if not report_config: raise NotImplementedError(f'No such runner: {self.report_id}') _tz = pytz.timezone(report_config.get('timezone') or self.timezone or 'America/Toronto') _today = datetime.now(_tz) report_config['StartDate'] = (_today - timedelta(days=(report_config.get('offset') or 0))).strftime('%Y-%m-%d') report_config['EndDate'] = (_today - timedelta(days=(report_config.get('lookback') or 0))).strftime('%Y-%m-%d') template = self.firestore.get_document(Type.SA360_RPT, '_reports').get(report_config['report']) request_body = SA360ReportTemplate().prepare(template=template, values=report_config) sa360_service = DiscoverService.get_service(Service.SA360, sa360.creds) request = sa360_service.reports().request(body=request_body) response = request.execute() logging.info(response) runner = { 'type': Type.SA360_RPT.value, 'project': self.project, 'report_id': self.report_id, 'email': self.email, 'file_id': response['id'] } self.firestore.store_report_runner(runner) except Exception as e: self._email_error(email=self.email, error=e, report_config=report_config, message=f'Error in SA360 Report Runner for report {self.report_id}') finally: return runner def _email_error(self, message: str, email: str=None, error: Exception=None, report_config: Dict[str, Any]=None) -> None: _to = [email] if email else [] _administrator = os.environ.get('ADMINISTRATOR_EMAIL') or self.FIRESTORE.get_document(Type._ADMIN, 'admin').get('email') _cc = [_administrator] if _administrator else [] if _to or _cc: message = GMailMessage( to=_to, cc=_cc, subject=message, body=f''' {message} Config: {report_config if report_config else 'Config unknown.'} Error: {error if error else 'No exception.'} ''', project=os.environ.get('GCP_PROJECT')) GMail().send_message( message=message, credentials=Credentials(email=email, project=os.environ.get('GCP_PROJECT')) ) def _attended_run(self, sa360: SA360) -> None: raise NotImplementedError()