Пример #1
0
    def _aggregate_operation_aspect_events(
        self,
        events: List[RedshiftJoinedAccessEvent],
        operation_type: Union[str, "OperationTypeClass"],
    ) -> Iterable[MetadataWorkUnit]:
        for event in events:
            if (event.database and event.usename and event.schema_
                    and event.table and event.endtime):
                resource = f"{event.database}.{event.schema_}.{event.table}"
                last_updated_timestamp: int = int(event.endtime.timestamp() *
                                                  1000)
                user_email = event.usename

                operation_aspect = OperationClass(
                    timestampMillis=last_updated_timestamp,
                    lastUpdatedTimestamp=last_updated_timestamp,
                    actor=builder.make_user_urn(user_email.split("@")[0]),
                    operationType=operation_type,
                )
                mcp = MetadataChangeProposalWrapper(
                    entityType="dataset",
                    aspectName="operation",
                    changeType=ChangeTypeClass.UPSERT,
                    entityUrn=builder.make_dataset_urn("redshift",
                                                       resource.lower(),
                                                       self.config.env),
                    aspect=operation_aspect,
                )
                wu = MetadataWorkUnit(
                    id=
                    f"operation-aspect-{event.table}-{event.endtime.isoformat()}",
                    mcp=mcp,
                )
                yield wu
Пример #2
0
 def _get_operation_aspect_work_units(
     self, events: Iterable[SnowflakeJoinedAccessEvent]
 ) -> Iterable[MetadataWorkUnit]:
     for event in events:
         if event.query_start_time and event.query_type in OPERATION_STATEMENT_TYPES:
             start_time = event.query_start_time
             query_type = event.query_type
             user_email = event.email
             operation_type = OPERATION_STATEMENT_TYPES[query_type]
             last_updated_timestamp: int = int(start_time.timestamp() *
                                               1000)
             user_urn = builder.make_user_urn(user_email.split("@")[0])
             for obj in event.base_objects_accessed:
                 resource = obj.objectName
                 dataset_urn = builder.make_dataset_urn(
                     "snowflake", resource.lower(), self.config.env)
                 operation_aspect = OperationClass(
                     timestampMillis=last_updated_timestamp,
                     lastUpdatedTimestamp=last_updated_timestamp,
                     actor=user_urn,
                     operationType=operation_type,
                 )
                 mcp = MetadataChangeProposalWrapper(
                     entityType="dataset",
                     aspectName="operation",
                     changeType=ChangeTypeClass.UPSERT,
                     entityUrn=dataset_urn,
                     aspect=operation_aspect,
                 )
                 wu = MetadataWorkUnit(
                     id=
                     f"operation-aspect-{resource}-{start_time.isoformat()}",
                     mcp=mcp,
                 )
                 yield wu
Пример #3
0
 def _create_operation_aspect_work_unit(
         self, event: QueryEvent) -> Optional[MetadataWorkUnit]:
     if event.statementType in OPERATION_STATEMENT_TYPES and event.destinationTable:
         destination_table: BigQueryTableRef
         try:
             destination_table = event.destinationTable.remove_extras()
         except Exception as e:
             self.report.report_warning(
                 str(event.destinationTable),
                 f"Failed to clean up destination table, {e}",
             )
             return None
         reported_time: int = int(time.time() * 1000)
         last_updated_timestamp: int = int(event.timestamp.timestamp() *
                                           1000)
         affected_datasets = []
         if event.referencedTables:
             for table in event.referencedTables:
                 try:
                     affected_datasets.append(
                         _table_ref_to_urn(
                             table.remove_extras(),
                             self.config.env,
                         ))
                 except Exception as e:
                     self.report.report_warning(
                         str(table),
                         f"Failed to clean up table, {e}",
                     )
         operation_aspect = OperationClass(
             timestampMillis=reported_time,
             lastUpdatedTimestamp=last_updated_timestamp,
             actor=builder.make_user_urn(event.actor_email.split("@")[0]),
             operationType=OPERATION_STATEMENT_TYPES[event.statementType],
             affectedDatasets=affected_datasets,
         )
         mcp = MetadataChangeProposalWrapper(
             entityType="dataset",
             aspectName="operation",
             changeType=ChangeTypeClass.UPSERT,
             entityUrn=_table_ref_to_urn(
                 destination_table,
                 env=self.config.env,
             ),
             aspect=operation_aspect,
         )
         return MetadataWorkUnit(
             id=
             f"{event.timestamp.isoformat()}-operation-aspect-{destination_table}",
             mcp=mcp,
         )
     return None
Пример #4
0
    def _gen_operation_aspect_workunits_from_access_events(
        self,
        events_iterable: Iterable[RedshiftAccessEvent],
    ) -> Iterable[MetadataWorkUnit]:
        self.report.num_operational_stats_workunits_emitted = 0
        for event in events_iterable:
            if not (
                event.database
                and event.username
                and event.schema_
                and event.table
                and event.endtime
                and event.operation_type
            ):
                continue

            assert event.operation_type in ["insert", "delete"]

            resource: str = f"{event.database}.{event.schema_}.{event.table}"
            reported_time: int = int(time.time() * 1000)
            last_updated_timestamp: int = int(event.endtime.timestamp() * 1000)
            user_email: str = event.username
            operation_aspect = OperationClass(
                timestampMillis=reported_time,
                lastUpdatedTimestamp=last_updated_timestamp,
                actor=builder.make_user_urn(user_email.split("@")[0]),
                operationType=(
                    OperationTypeClass.INSERT
                    if event.operation_type == "insert"
                    else OperationTypeClass.DELETE
                ),
            )
            mcp = MetadataChangeProposalWrapper(
                entityType="dataset",
                aspectName="operation",
                changeType=ChangeTypeClass.UPSERT,
                entityUrn=builder.make_dataset_urn_with_platform_instance(
                    "redshift",
                    resource.lower(),
                    self.config.platform_instance,
                    self.config.env,
                ),
                aspect=operation_aspect,
            )
            wu = MetadataWorkUnit(
                id=f"operation-aspect-{event.table}-{event.endtime.isoformat()}",
                mcp=mcp,
            )
            self.report.report_workunit(wu)
            self.report.num_operational_stats_workunits_emitted += 1
            yield wu
Пример #5
0
    def _gen_operation_aspect_workunits_by_type_from_access_events(
        self,
        events_iterable: Iterable[RedshiftAccessEvent],
        operation_type: Union[str, "OperationTypeClass"],
    ) -> Iterable[MetadataWorkUnit]:
        for event in events_iterable:
            if not (event.database and event.username and event.schema_
                    and event.table and event.endtime):
                continue

            resource: str = f"{event.database}.{event.schema_}.{event.table}"
            last_updated_timestamp: int = int(event.endtime.timestamp() * 1000)
            user_email: str = event.username

            operation_aspect = OperationClass(
                timestampMillis=last_updated_timestamp,
                lastUpdatedTimestamp=last_updated_timestamp,
                actor=builder.make_user_urn(user_email.split("@")[0]),
                operationType=operation_type,
            )
            mcp = MetadataChangeProposalWrapper(
                entityType="dataset",
                aspectName="operation",
                changeType=ChangeTypeClass.UPSERT,
                entityUrn=builder.make_dataset_urn_with_platform_instance(
                    "redshift",
                    resource.lower(),
                    self.config.platform_instance,
                    self.config.env,
                ),
                aspect=operation_aspect,
            )
            wu = MetadataWorkUnit(
                id=
                f"operation-aspect-{event.table}-{event.endtime.isoformat()}",
                mcp=mcp,
            )
            self.report.report_workunit(wu)
            yield wu