Exemplo n.º 1
0
    def show(self,
             firestore: Firestore,
             report: str,
             _print: bool = False,
             **unused):
        definition = firestore.get_document(Type.SA360_RPT,
                                            '_reports').get(report)
        if _print:
            print(f'SA360 Dynamic Report "{report}"')
            print()
            pprint.pprint(definition, indent=2, compact=False)

        return definition
Exemplo n.º 2
0
class SA360Dynamic(ReportFetcher):
    report_type = Type.SA360_RPT
    email = None
    project = None
    profile = None

    def __init__(self,
                 email: str,
                 project: str,
                 append: bool = False,
                 infer_schema: bool = False):
        self.email = email
        self.project = project
        self.creds = Credentials(email=email, project=project)
        self.credentials = storage.Client()._credentials
        self.transport = AuthorizedSession(credentials=self.credentials)
        self.append = append
        self.infer_schema = infer_schema

        self.firestore = Firestore(email=email, project=project)

        self.chunk_multiplier = int(os.environ.get('CHUNK_MULTIPLIER', 64))
        self.bucket = f'{self.project}-report2bq-upload'

    def service(self) -> Resource:
        return discovery.get_service(service=Service.SA360,
                                     credentials=self.creds)

    def handle_report(self, run_config: Dict[str, Any]) -> bool:
        sa360_service = self.service()
        request = sa360_service.reports().get(reportId=run_config['file_id'])

        try:
            report = request.execute()

            if report['isReportReady']:
                report_config = self.firestore.get_document(
                    type=Type.SA360_RPT, id=run_config['report_id'])

                csv_header, _ = self.read_header(report)
                schema = csv_helpers.create_table_schema(csv_header, None)
                report_config['schema'] = schema
                report_config['files'] = report['files']

                if 'dest_project' in run_config:
                    report_config['dest_project'] = run_config['dest_project']
                if 'dest_dataset' in run_config:
                    report_config['dest_dataset'] = run_config['dest_dataset']
                if 'notify_message' in run_config:
                    report_config['notifier']['message'] = run_config[
                        'notify_message']

                # update the report details please...
                self.firestore.update_document(Type.SA360_RPT,
                                               run_config['report_id'],
                                               report_config)

                # ... then stream the file to GCS a la DV360/CM
                self.stream_to_gcs(report_details=report_config,
                                   run_config=run_config)

            return report['isReportReady']

        except Exception as e:
            logging.error(
                f'Report fetch error: Run {run_config["file_id"]} for report {run_config["report_id"]}'
            )
            return False

    def read_header(self, report_config: dict) -> list:
        r = urllib.request.Request(report_config['files'][0]['url'])
        for header in self.creds.auth_headers:
            r.add_header(header, self.creds.auth_headers[header])

        with closing(urlopen(r)) as report:
            data = report.read(self.chunk_multiplier * 1024 * 1024)
            bytes_io = BytesIO(data)

        return csv_helpers.get_column_types(bytes_io)

    @measure_memory
    def stream_to_gcs(self, report_details: Dict[str, Any],
                      run_config: Dict[str, Any]) -> None:
        """Multi-threaded stream to GCS

    Arguments:
        bucket (str):  GCS Bucket
        report_details (dict):  Report definition
    """
        queue = Queue()

        report_id = run_config['report_id']

        # chunk_multiplier is set in the environment, but defaults to 64 - this leads to a
        # 64M chunk size we can throw around. Given the memory constraints of a cloud function
        # this seems like a good, safe number.
        chunk_size = self.chunk_multiplier * 1024 * 1024
        out_file = BytesIO()

        streamer = \
          ThreadedGCSObjectStreamUpload(
            client=Cloud_Storage.client(),
            creds=credentials.Credentials(
              email=self.email, project=self.project).credentials,
            bucket_name=self.bucket,
            blob_name=f'{report_id}.csv',
            chunk_size=chunk_size,
            streamer_queue=queue)
        streamer.start()

        r = urllib.request.Request(report_details['files'][0]['url'])
        for header in self.creds.auth_headers:
            r.add_header(header, self.creds.auth_headers[header])

        with closing(urlopen(r)) as _report:
            _downloaded = 0
            chunk_id = 1
            _report_size = int(_report.headers['content-length'])
            while _downloaded < _report_size:
                chunk = _report.read(chunk_size)
                _downloaded += len(chunk)
                queue.put(chunk)
                chunk_id += 1

        queue.join()
        streamer.stop()
Exemplo n.º 3
0
class SA360ReportRunner(ReportRunner):
  report_type = Type.SA360_RPT

  def __init__(self, report_id: str, email: str, project: str=None, timezone: str=None):
    self.email = email
    self.report_id = report_id
    self.project = project
    self.timezone = timezone

    self.firestore = Firestore()


  def run(self, unattended: bool = True) -> Dict[str, Any]:
    # TODO: Make SA360 object here
    sa360 = SA360(self.email, self.project)

    if unattended:
      return self._unattended_run(sa360=sa360)
    else:
      return self._attended_run(sa360=sa360)


  def _unattended_run(self, sa360: SA360) -> Dict[str, Any]:
    runner = None
    report_config = None
    try:
    
      report_config = self.firestore.get_document(type=Type.SA360_RPT, id=self.report_id)
      if not report_config:
        raise NotImplementedError(f'No such runner: {self.report_id}')

      _tz = pytz.timezone(report_config.get('timezone') or self.timezone or 'America/Toronto')
      _today = datetime.now(_tz)

      report_config['StartDate'] = (_today - timedelta(days=(report_config.get('offset') or 0))).strftime('%Y-%m-%d')
      report_config['EndDate'] = (_today - timedelta(days=(report_config.get('lookback') or 0))).strftime('%Y-%m-%d')

      template = self.firestore.get_document(Type.SA360_RPT, '_reports').get(report_config['report'])
      request_body = SA360ReportTemplate().prepare(template=template, values=report_config)
      sa360_service = DiscoverService.get_service(Service.SA360, sa360.creds)
      request = sa360_service.reports().request(body=request_body)
      response = request.execute()
      logging.info(response)

      runner = {
        'type': Type.SA360_RPT.value,
        'project': self.project,
        'report_id': self.report_id,
        'email': self.email,
        'file_id': response['id']
      }
      self.firestore.store_report_runner(runner)

    except Exception as e:
      self._email_error(email=self.email, error=e, report_config=report_config,
        message=f'Error in SA360 Report Runner for report {self.report_id}')

    finally:
      return runner

  def _email_error(self, message: str, email: str=None,
    error: Exception=None, report_config: Dict[str, Any]=None) -> None:
    _to = [email] if email else []
    _administrator = os.environ.get('ADMINISTRATOR_EMAIL') or self.FIRESTORE.get_document(Type._ADMIN, 'admin').get('email')
    _cc = [_administrator] if _administrator else []

    if _to or _cc:
      message = GMailMessage(
        to=_to, 
        cc=_cc,
        subject=message,
        body=f'''
{message}

Config: {report_config if report_config else 'Config unknown.'}

Error: {error if error else 'No exception.'}
''', 
        project=os.environ.get('GCP_PROJECT'))

      GMail().send_message(
        message=message,
        credentials=Credentials(email=email, project=os.environ.get('GCP_PROJECT'))
      )

  def _attended_run(self, sa360: SA360) -> None: 
    raise NotImplementedError()