def _save_campaign_id_mappings( *, aws_conn_id: str, mysql_conn_id: str, bucket_name: str, bucket_data_prefix: str, partner: str, notifications_topic_arn: str, ds_nodash: str, task: BaseOperator, task_instance: TaskInstance, **_ ): """Python callable for the operator that saves campaign id mappings in the firewall DB.""" log = task.log # read new mappings from S3 extracted_mappings = _load_extracted_mappings( aws_conn_id=aws_conn_id, bucket_name=bucket_name, bucket_data_prefix=bucket_data_prefix, partner=partner, ds_nodash=ds_nodash, log=log ) if not extracted_mappings: return "no mappings have been extracted, bailing out" # connect to firewall database and update it log.info("connecting to firewall database") mysql = MySqlHook(mysql_conn_id=mysql_conn_id) with closing(mysql.get_conn()) as conn: mysql.set_autocommit(conn, False) with closing(conn.cursor()) as cur: (invalid_ias_adv_entity_ids, overriding_campaign_id_mappings) = _update_firewall_db( cur=cur, log=log, extracted_mappings=extracted_mappings ) log.info("committing transaction") conn.commit() # send notification about any exceptions if invalid_ias_adv_entity_ids or overriding_campaign_id_mappings: log.info("sending mapping exceptions notification") invalid_ias_adv_entity_ids_msg = ', '.join( str(eid) for eid in invalid_ias_adv_entity_ids ) if invalid_ias_adv_entity_ids else 'None' overriding_campaign_id_mappings_msg = '\n'.join( '{:<19} | {:<24} | {:<19}'.format(*row) for row in overriding_campaign_id_mappings ) if overriding_campaign_id_mappings else 'None' sns = AwsSnsHook(aws_conn_id=aws_conn_id) sns.publish_to_target( target_arn=notifications_topic_arn, subject=f'Campaign ID mapping exceptions ({partner})', message=( f"Encountered campaign ID mapping exceptions:\n" f"\n" f"\nDAG: {task_instance.dag_id}" f"\nTask: {task_instance.task_id}" f"\nExecution Date: {task_instance.execution_date}" f"\nHost: {task_instance.hostname}" f"\n" f"\nUnknown IAS adv entity IDs:" f"\n" f"\n{invalid_ias_adv_entity_ids_msg}" f"\n" f"\nAttempts to change existing mappings:" f"\n" f"\npartner campaign ID | existing IAS campaign ID | new IAS campaign ID" f"\n{overriding_campaign_id_mappings_msg}" ) ) # done return "campaign id mappings have been updated"
def _update_firewall_name_mappings(*, aws_conn_id: str, mysql_conn_id: str, state_table_name: str, mappings_bucket_name: str, mappings_prefix: str, mappings_timestamp_pattern: str, firewall_table: str, msource_id: int, task: BaseOperator, **_) -> str: """Python callable for the `UpdateFirewallNameMappingsOperator`.""" # pylint: disable=too-many-locals log = task.log pipeline_state = PipelineStateHook(state_table_name, aws_conn_id=aws_conn_id) mappings_bucket = S3Hook(aws_conn_id).get_bucket(mappings_bucket_name) # get latest processed mappings file timestamp state_key = f'name_mappings.{firewall_table}.{msource_id}.latest_processed' state_value = pipeline_state.get_state(state_key) if state_value is not None: latest_processed_ts = _get_latest_processed_ts(state_value) latest_processed_etags = _get_latest_processed_etags(state_value) else: latest_processed_ts = '0' latest_processed_etags = set() # list files in the bucket and find the newest one mappings_timestamp_re = re.compile(mappings_timestamp_pattern) latest_available_ts = '0' latest_available_files = dict() for mappings_file in mappings_bucket.objects.filter( Prefix=mappings_prefix): match = mappings_timestamp_re.search(mappings_file.key) if match is not None: available_ts = match.group(1) if available_ts > latest_available_ts: latest_available_ts = available_ts latest_available_files = {mappings_file.e_tag: mappings_file} elif latest_available_ts == available_ts: latest_available_files.update( {mappings_file.e_tag: mappings_file}) # If the files for the same date, we must skip processed files if latest_available_ts == latest_processed_ts: for latest_processed_etag in latest_processed_etags: log.info( "skipping the file=%s, etag=%s", _get_filename( latest_available_files[latest_processed_etag].key), latest_available_files[latest_processed_etag].e_tag) del latest_available_files[latest_processed_etag] # check if no newer file if (not latest_available_files or latest_available_ts < latest_processed_ts or (latest_available_ts == latest_processed_ts and not latest_available_files)): raise AirflowSkipException("no newer mappings file found") # connect to the firewall database and process the mappings log.info("connecting to firewall database") mysql = MySqlHook(mysql_conn_id=mysql_conn_id) with closing(mysql.get_conn()) as conn: mysql.set_autocommit(conn, False) for latest_available_file in latest_available_files.values(): log.info("processing filename=%s, etag=%s, ts=%s", _get_filename(latest_available_file.key), latest_available_file.e_tag, latest_available_ts) with closing(conn.cursor()) as cur: _load_and_process_mappings(cur=cur, mappings_file=latest_available_file, firewall_table=firewall_table, msource_id=msource_id, log=log) log.info("committing transaction") conn.commit() # update state table new_state_value = _create_state(state_value, latest_available_ts, latest_available_files.values()) log.info('saving new state: key=%s, value=%s', state_key, new_state_value) pipeline_state.save_state(state_key, json.dumps(new_state_value)) # done return "new mappings have been processed"