def __init__(self, meta, params, logger=None): self.meta = meta self.params = params self.file_ext = self.params.get("fileExt") if self.params.get( "fileExt") else "txt" self.zip_flag = True if str(self.params.get( "zipFlag", "N")).upper() in ["Y", "YES", "T", "TRUE"] else False self._debug = "Y" if str(self.params.get( 'debug', 'N')).upper() in ["Y", "YES", "T", "TRUE"] else 'N' __log_level = 'DEBUG' if self._debug == "Y" else 'INFO' self.logger = logger if logger else Logger(log_level="info", target="console", vendor_key=-1, retailer_key=-1, sql_conn=None) self.logger.set_level(log_level=__log_level) self.logger.set_keys(log_id="{}".format(self.params["jobId"])) self.provision_url = self.meta[ "api_provision_str"] + "/filesDelivery/layout/attributes?cycle_key=" self.capacity = Capacity(meta=self.meta) self.rsi_username = self.meta.get("fileshare_username") self.rsi_folder = self.meta.get("fileshare_folder") self.cmn_schema = self.meta.get("db_conn_vertica_common_schema", "common") self.app_conn = MSOperation(meta=self.meta) self.vertica_conn = DWOperation(meta=self.meta) self.dumper_context = self.get_context() self._layout_payload = {}
def __init__(self, context): self.context = context self.meta = self.context["meta"] self._logger = self.context["logger"] self._db = MSOperation(meta=self.meta) self._dw = DWOperation(meta=self.meta) self.bundle_base_url = self.meta["api_osa_bundle_str"] self.alert_base_url = self.meta["api_alerts_str"] self._common_schema = self.meta["db_conn_vertica_common_schema"] self._dim_calendar = "DIM_CALENDAR" self._dim_product = "DIM_PRODUCT" self._dim_store = "DIM_STORE" self._meta_intvn = "ALT_META_INTERVENTION" self._dim_base_dir = self.meta["azure_storage_dim_root"] self._azure_storage_account_name = self.meta[ "azure_storage_account_name"] self._azure_storage_blob_container = self.meta[ "azure_storage_blob_container"] self._prefix_name = "IRIS_" self.azure_uploader = UploadToBlobStorage(meta=self.meta, logger=self._logger) self.body = { "job_id": self.context["jobId"], "step_id": self.context["stepId"], "status": StepStatus.RUNNING.name } self.capacity = Capacity(meta=self.meta) self.cycle_key = self._get_cycle_key()
def __init__(self, meta, params, logger=None, filename="afm_config.json"): """ :param meta: common config :param params: used by calling REST API :param filename: only used for local testing """ self.meta = meta self.params = params self.filename = filename self._cap = Capacity(meta=self.meta) self.app_conn = MSOperation(meta=self.meta) self._logger = logger if logger else Logger(log_level="debug", target="console", vendor_key=-1, retailer_key=-1, sql_conn=None) self.scd_url = "{0}/scd/process".format( self.meta["api_osa_bundle_str"])
def __init__(self, context): self.context = context self.meta = self.context["meta"] self.params = self.context["params"] self.logger = self.context["logger"] self.dw_conn = self.context["dw_conn"] self.app_conn = self.context["app_conn"] self.alert_url = self.meta["api_alerts_str"] self.capacity = Capacity(meta=self.meta)
def __init__(self, meta={}, to_physical_table=False, no_delete_key=False, logger=None): logger = logger if logger else Logger(log_level="info", vendor_key=-1, retailer_key=-1, module_name="Persist_Feedback") vertica_conn = DWOperation(meta=meta, logger=logger) redis_conn = RedisOperation(meta=meta, logger=logger) c = redis_conn.get_connection() column_mapping = { 'from_key': [ # Save a value from key into dataset # ['key name in dataset', 'index of key elements splitted by :'] [ 'UPDATE_TIME', 4 ] #FEEDBACK:342:6:38726000097879:20190219013621 --> get key.split(':')[4] ], 'hardcode': [ # ['key name in dataset', 'hardcode value'] ['EVENT_KEY', '-1'], ], 'from_redis': [ # ['table column', 'key name in dataset', 'table column type', 'value updatable', 'target'] # Be noted, the table column type, if the value needs to be quoted, use VARCHAR, else use NUMBER # so int, float, number -> NUMBER # varchar, date, timestamp -> VARCHAR # updatable means the value of the column would be updated if alerts already exist [ 'ALERT_ID', 'ALERT_ID', 'NUMBER', 'NON-UPDATABLE', 'TABLE|MOBILE' ], [ 'VENDOR_KEY', 'VENDOR_KEY', 'NUMBER', 'NON-UPDATABLE', 'TABLE|MOBILE' ], [ 'RETAILER_KEY', 'RETAILER_KEY', 'NUMBER', 'NON-UPDATABLE', 'TABLE|MOBILE' ], ['ITEM_KEY', 'ITEM_KEY', 'NUMBER', 'NON-UPDATABLE', 'TABLE'], ['STORE_KEY', 'STORE_KEY', 'NUMBER', 'NON-UPDATABLE', 'TABLE'], [ 'PERIOD_KEY', 'PERIOD_KEY', 'NUMBER', 'NON-UPDATABLE', 'TABLE' ], [ 'STORE_REP', 'STORE_REP', 'VARCHAR', 'NON-UPDATABLE', 'TABLE' ], [ 'STORE_VISITED_PERIOD_KEY', 'STORE_VISITED_PERIOD_KEY', 'NUMBER', 'NON-UPDATABLE', 'TABLE' ], [ 'FEEDBACK_DESCRIPTION', 'FEEDBACK_DESCRIPTION', 'VARCHAR', 'UPDATABLE', 'TABLE|MOBILE' ], [ 'ON_HAND_PHYSICAL_COUNT', 'ON_HAND_PHYSICAL_COUNT', 'NUMBER', 'UPDATABLE', 'TABLE|MOBILE' ], [ 'ON_HAND_CAO_COUNT', 'ON_HAND_CAO_COUNT', 'NUMBER', 'UPDATABLE', 'TABLE|MOBILE' ], ['SOURCE', 'SOURCE', 'VARCHAR', 'NON-UPDATABLE', 'TABLE'], [ 'CYCLE_KEY', 'CYCLE_KEY', 'NUMBER', 'NON-UPDATABLE', 'MOBILE' ], [ 'UPDATE_TIME', 'UPDATE_TIME', 'VARCHAR', 'NON-UPDATABLE', 'MOBILE' ], ['EVENT_KEY', 'EVENT_KEY', 'NUMBER', 'NON-UPDATABLE', 'TABLE'], ] } self.vars = { "meta": meta, "logger": logger, "column_mapping": column_mapping, "vertica_conn": vertica_conn, "redis_conn": c, "to_physical_table": to_physical_table, "no_delete_key": no_delete_key } capacity = Capacity(meta) self.vars['capacity'] = capacity
class DumpMain(object): def __init__(self, meta, params, logger=None): self.meta = meta self.params = params self.file_ext = self.params.get("fileExt") if self.params.get( "fileExt") else "txt" self.zip_flag = True if str(self.params.get( "zipFlag", "N")).upper() in ["Y", "YES", "T", "TRUE"] else False self._debug = "Y" if str(self.params.get( 'debug', 'N')).upper() in ["Y", "YES", "T", "TRUE"] else 'N' __log_level = 'DEBUG' if self._debug == "Y" else 'INFO' self.logger = logger if logger else Logger(log_level="info", target="console", vendor_key=-1, retailer_key=-1, sql_conn=None) self.logger.set_level(log_level=__log_level) self.logger.set_keys(log_id="{}".format(self.params["jobId"])) self.provision_url = self.meta[ "api_provision_str"] + "/filesDelivery/layout/attributes?cycle_key=" self.capacity = Capacity(meta=self.meta) self.rsi_username = self.meta.get("fileshare_username") self.rsi_folder = self.meta.get("fileshare_folder") self.cmn_schema = self.meta.get("db_conn_vertica_common_schema", "common") self.app_conn = MSOperation(meta=self.meta) self.vertica_conn = DWOperation(meta=self.meta) self.dumper_context = self.get_context() self._layout_payload = {} def main_process(self): """ Getting AFM related parameters. 2 cases here. 1, called by AFM directly: Alerts Delivery is coupled with AFM. So it is better to call by AFM. Then we can get parameters directly from AFM via self.params 2, called via REST API. in case failure when called by AFM, we also support the REST API while we can manually trigger this. And getting params from REST API Body. :return: """ try: # Checking the cancel status from previous step. _cancel_flag = self.params.get("cancel_flag", False) if _cancel_flag: return self.params.get("message") self.logger.debug("The context is: %s" % str(self.dumper_context)) init = Initialize(self.dumper_context) afm_params = init.init_process() _cycle_key = afm_params.get("cycleKey") _retailer_name = self.capacity.get_retailer_schema_name( retailer_key=afm_params["retailerKey"]) self.dumper_context["retailer_name"] = _retailer_name self.dumper_context["schema"] = _retailer_name self.dumper_context["cycle_key"] = _cycle_key self._layout_payload = self._get_layout() self._process(afm_params) except Warning as e: return str(e) except Exception as e: self.logger.warning(str(e)) raise def get_context(self): context = { "meta": self.meta, "params": self.params, "dw_conn": self.vertica_conn, "app_conn": self.app_conn, "logger": self.logger } return context def _get_layout(self): # Getting layout fields for alert delivery. _cycle_key = self.dumper_context["cycle_key"] _delivery_layout_api = "{0}{1}".format(self.provision_url, _cycle_key) self.logger.info( "Alert delivery related columns(payload param) is not given, " "So just calling provision API: %s to retrieve them." % _delivery_layout_api) # _header = {"tokenid": "eyJhbGciOiJIUzI1NiJ9.eyJjb29raWVOYW1lIjoicnNpU3NvTmV4dEdlbiIsImNvb2tpZVZhbHVlIjoiQVFJQzV3TTJMWTRTZmN6ZmRuLVVQazA2b2NnRzVWaTlZRFc1cHZZQzF6b3djbXMuKkFBSlRTUUFDTURFQUFsTkxBQkkyTVRBNU1EUTBPRFUwTXpZeU1UWTBORFlBQWxNeEFBQS4qIiwic3RhdHVzIjoic3VjY2VzcyIsInVzZXJJZCI6ImJlbi53dUByc2ljb3JwLmxvY2FsIiwiaWF0IjoxNTQ0Njg5OTY3fQ.AzCgFFHXHo3J1M4fk-17T8fBLwReDQDb4p-DXUcBm_M"} _header = {} resp = requests.get(_delivery_layout_api, _header, verify=False) if resp.text == "invalid token": self.logger.error( "ERROR: invalid tokenid, Please update the tokenid manually. " "Then rerun this script again!!!") if resp.status_code != requests.codes.ok or str( resp.json().get("status")).lower() != "success": self.logger.warning("The response result is: %s" % resp.json()) self.logger.error("Calling API failed. Refer to API: %s" % _delivery_layout_api) # _payload = resp.json()["data"]["payload"] # e.g. list({"key1":"value1"}, {}, {}) _payload = resp.json()["data"] self.logger.debug("The layout payload is: %s" % _payload) return _payload def _gen_query(self, delivery_key): # getting the payload for given delivery key. _payload = self._layout_payload.get(str(delivery_key))["payload"] self.logger.debug("The payload is: %s" % str(_payload)) # only getting data where field preselect is True. _payload = list(filter(lambda x: x["preselect"] is True, _payload)) self.logger.debug("The filtered payload is: %s" % str(_payload)) if not _payload: self.logger.warning( "There is no layout configed for this delivery.") return None, None # It is required to order by seq, so need to convert list(dict) to list(tuple) to sort the data. # Otherwise, the following code will not be working correctly. # _payload_list should be like: e.g. [(1, 'Alert', 'ALERT_ID', 'ALERT ID', None), (), ...] # Noted: Please don't change below fields order for generating _payload_list. _payload_list = [(ele["sequence"], ele["dimension_type"], ele["Field"], ele["display_name"], ele["sort_by"]) for ele in _payload] self.logger.debug("Converted payload is: %s" % str(_payload_list)) # Fields should be listed based on sequence. In case seq is None, then putting those columns to the last. # if many elements with None value, then they will be sorted randomly. _payload_list.sort( key=lambda x: x[0] if x[0] is not None else 999) # Assuming less than 999 columns. self.logger.debug("Sorted payload is: %s" % str(_payload_list)) display_fields_lst = [ ] # getting display name like: ["alert type", ...] for displaying on delivery file. columns_with_alias = [ ] # getting fields with format like: [alert."alert_type" as "alert type", ...] table_list = set( ) # getting required table list. ("Alert", "Store", ...) for columns in _payload_list: self.logger.debug( "Current display name is: \"%s\" for column: %s" % (columns[3], columns[2])) table_list.add(str(columns[1]).lower()) if columns[3] is None: # display name could be None display_fields_lst.append(columns[2]) columns_with_alias.append(columns[1] + '."' + columns[2] + '" AS "' + columns[2] + '"') else: display_fields_lst.append(columns[3]) columns_with_alias.append(columns[1] + '."' + columns[2] + '" AS "' + columns[3] + '"') # if there is no "Alert Date" enabled, then manually added this field to the beginning. if "Alert Date".lower() not in list( map(lambda x: str(x).lower(), display_fields_lst)): display_fields_lst.insert(0, "Alert Date") columns_with_alias.insert(0, 'ALERT.period_key AS "Alert Date"') _display_fields_str = ",".join([ '"' + column + '"' for column in display_fields_lst ]) # combine them with comma(,) _required_columns_prefix_tmp = ",".join( [column for column in columns_with_alias]) # combine them # replace one mandatory fields alert_type to intervention_name. Since it is not the column name in fact table. _required_columns_prefix = re.sub( '(?i)' + re.escape('ALERT."ALERT_TYPE"'), 'type.intervention_name', _required_columns_prefix_tmp) # Getting columns which sort_by is True(which is enabled sorting). And sort it. sort_by_list = sorted(list( filter(lambda x: x[4] is True, _payload_list)), key=lambda x: x[4]) if sort_by_list: _sort_columns = ",".join([ ele[1] + '."' + ele[2] + '" DESC NULLS LAST' for ele in sort_by_list ]) else: _sort_columns = '1' self.logger.debug(_sort_columns) _sort_columns = re.sub('(?i)' + re.escape('ALERT."ALERT_TYPE"'), 'type.intervention_name', _sort_columns) _fdbk_required = True if "feedback" in table_list else False _item_required = True if "product" in table_list else False _store_required = True # mandatory # Generating the initial query. _init_sql = """ SELECT {columns}, ROW_NUMBER() OVER(ORDER BY store.STORE_ID, {sortedFields} ) AS rn FROM {schema}.FACT_PROCESSED_ALERT alert INNER JOIN {cmnSchema}.alt_meta_intervention type ON alert.InterventionKey = type.Intervention_Key """\ .format(columns=_required_columns_prefix, sortedFields=_sort_columns, schema=self.dumper_context["schema"], cmnSchema=self.cmn_schema) if _fdbk_required: _init_sql += """ LEFT JOIN {schema}.FACT_FEEDBACK Feedback ON alert.alert_id = Feedback.alert_id"""\ .format(schema=self.dumper_context["schema"]) if _item_required: _init_sql += """ INNER JOIN {cmnSchema}.dim_product Product ON alert.item_key = Product.item_key AND alert.vendor_key = Product.vendor_key AND alert.retailer_key = Product.retailer_key"""\ .format(cmnSchema=self.cmn_schema) if _store_required: _init_sql += """ INNER JOIN {cmnSchema}.dim_store Store ON alert.store_key = store.store_key AND alert.vendor_key = store.vendor_key AND alert.retailer_key = store.retailer_key"""\ .format(cmnSchema=self.cmn_schema) # always apply the period_key filter for given vendor/retailer _init_sql += """ WHERE 1=1 AND alert.IssuanceId = 0 AND (alert.vendor_key, alert.retailer_key, alert.period_key) IN ( SELECT vendor_key, retailer_key, alert_day FROM TMP_RAW_ALERTS_INFO ) """ return _display_fields_str, _init_sql def _process(self, afm_params): try: _cycle_key = afm_params.get("cycleKey") # Reading configuration from meta table under IRIS MSSQL. # Getting all owners(includes both SVR & RETAILER rule) according to given cycle_key. # the delivery file will be dumped by owner. sql = """ SELECT d.ID AS DELIVERY_KEY, d.CYCLE_KEY, d.RETAILER_KEY, d.DELIVERY_NAME, d.FILTERS, d.DELIMITER, d.OWNER, ep.SERVER, ep.EXTRACTION_FOLDER, ep.USERNAME, ep.PASSWORD, ep.MAIL_SUBJECT, ep.MAIL_BODY, ep.MAIL_RECPSCC, ep.MAIL_RECPSTO, ep.DELIVERY_TYPE FROM AP_META_DELIVERIES d INNER JOIN AP_META_ENDPOINTS ep ON d.ENDPOINT_ID = ep.ID WHERE d.cycle_key = {0} AND d.ENABLED = 'T' AND ep.ENABLED = 'T' """.format(_cycle_key) self.logger.info(sql) meta_rows = self.app_conn.query(sql) self.logger.debug("The meta data is: %s" % str(meta_rows)) if not meta_rows: raise Warning( "There is no endpoint or delivery configed. Please check meta table!" ) # There could be multi owners for the given cycle but with different filters. # This is required by PM. And we need to generate separate files for every single row. for meta_data in meta_rows: # 1, Getting the initial source query _delivery_key = meta_data.DELIVERY_KEY required_columns, _init_src_query = self._gen_query( delivery_key=_delivery_key) if required_columns is None and _init_src_query is None: self.logger.warning( "Seems no layout configed for delivery key: %s" % _delivery_key) continue self.logger.info("The initial source query is: %s" % _init_src_query) delivery_type = meta_data.DELIVERY_TYPE if str.lower(delivery_type) == 'customer': meta_data = meta_data._replace( EXTRACTION_FOLDER=self.rsi_folder, USERNAME=self.rsi_username) if meta_data.USERNAME is None: self.logger.warning( "There is no username configed for delivery key: %s" % _delivery_key) continue _pmp_pwd = get_password(username=meta_data.USERNAME, meta=self.meta) if _pmp_pwd: meta_data = meta_data._replace(PASSWORD=_pmp_pwd) self.logger.info("Start to dump & delivery for meta: %s" % str(meta_data)) _src_query = _init_src_query # 2, checking if any filters applied. (e.g alert_type, category etc.) # User might wants to dump only given alert types of data. This should be configurable. # So far, we support 2 types of filters: alert_type & category # TODO: confirm the filter format with UI team. Currently filters are configed with json format. # e.g. {"alert_type": "d-void,phantom", "category":"cat1,cat2"} _filters_raw = meta_data.FILTERS if not _filters_raw or _filters_raw == "": self.logger.info("No filters applied.") else: self.logger.info("The filters are: %s" % _filters_raw) _filters = json.loads(str(_filters_raw).lower().strip()) alert_type_str = _filters.get( "alert_type", None) # e.g. phantom,d-void,shelf oos if alert_type_str is not None and str( alert_type_str).strip() != '': alert_type = ','.join( "'" + str(ele).strip() + "'" for ele in str(alert_type_str).split(',')) _src_query += " AND type.intervention_name IN ({type})".format( type=alert_type) category_str = _filters.get("category", None) if category_str is not None and str( category_str).strip() != '': category_type = ','.join( "'" + str(ele).strip() + "'" for ele in str(category_str).split(',')) _src_query += " AND Product.OSM_CATEGORY IN ({cat_type})".format( cat_type=category_type) # The owner format should be like: owner1 or owner1,owner2,... _owners = str(meta_data.OWNER) if not _owners: # owner is the mandatory filter for every delivery. raise ValueError( "There is no owner configed in delivery meta table") _owner_in_str = ",".join("'" + ele.strip() + "'" for ele in _owners.split(",")) _src_query += " AND alert.owner IN ({owner}) ".format( owner=_owner_in_str) _final_src_query = """ SELECT {columns} FROM ({query}) x ORDER BY rn """.format(columns=required_columns, query=_src_query) self.logger.info("The final source sql is: %s" % _final_src_query) # delivery file name should be: <delivery_name>_<YYYYMMDD>.<fileExt>. e.g. <delivery_name>_20180101.txt curr_folder = os.path.dirname(os.path.realpath(__file__)) target_filename = meta_data.DELIVERY_NAME + "_" + datetime.datetime.now( ).strftime('%Y%m%d') # delivery file will be dumped to "<curr_dir>/data" folder temporarily. abs_target_filename = curr_folder + os.sep + "data" + os.sep + target_filename + '.' + self.file_ext zip_filename = curr_folder + os.sep + "data" + os.sep + target_filename + '.zip' # Getting data delimiter. e.g. ',' delimiter = str(meta_data.DELIMITER).strip() if len(delimiter) != 1: raise ValueError("delimiter should be 1 char") # start to dump data self.dumper = dd.DumpData(context=self.dumper_context) # dump data from source db self.logger.info("Dumping data into file: %s" % abs_target_filename) _dump_flag = self.dumper.dump_data( src_sql=_final_src_query, output_file=abs_target_filename, delimiter=delimiter) self.logger.debug("The dump flag is: %s" % _dump_flag) # dump alerts succeeded. if _dump_flag is True: self.logger.info("Dumping data is done!") # check the zip flag if self.zip_flag: _flat_file_size = round( os.path.getsize(abs_target_filename) / 1024 / 1024) self.logger.debug("The flat file size is: %s" % _flat_file_size) self.logger.info("zipping file: %s" % abs_target_filename) with zipfile.ZipFile(zip_filename, 'w') as z: z.write(abs_target_filename, os.path.basename(abs_target_filename)) abs_target_filename = zip_filename self.logger.info("The zip file name is: %s" % abs_target_filename) # start to send data file self.logger.info( "Starting uploading delivery file to dest folder!") self.sender = sd.SendData(context=self.dumper_context) self.sender.delivery_file(meta_data=meta_data, src_file=abs_target_filename) else: self.logger.warning( "There is no data returned or dump data failed. " "Please refer to previous log to get the related source query." ) self.logger.info("Alert delivery process is done") except Warning as e: raise except Exception: raise finally: if self.vertica_conn: self.vertica_conn.close_connection() if self.app_conn: self.app_conn.close_connection()
class PacificNotification: """ Check alert generation completeness status for all cycles. """ def __init__(self, context): self.context = context self.meta = self.context["meta"] self._logger = self.context["logger"] self._db = MSOperation(meta=self.meta) self._dw = DWOperation(meta=self.meta) self.bundle_base_url = self.meta["api_osa_bundle_str"] self.alert_base_url = self.meta["api_alerts_str"] self._common_schema = self.meta["db_conn_vertica_common_schema"] self._dim_calendar = "DIM_CALENDAR" self._dim_product = "DIM_PRODUCT" self._dim_store = "DIM_STORE" self._meta_intvn = "ALT_META_INTERVENTION" self._dim_base_dir = self.meta["azure_storage_dim_root"] self._azure_storage_account_name = self.meta[ "azure_storage_account_name"] self._azure_storage_blob_container = self.meta[ "azure_storage_blob_container"] self._prefix_name = "IRIS_" self.azure_uploader = UploadToBlobStorage(meta=self.meta, logger=self._logger) self.body = { "job_id": self.context["jobId"], "step_id": self.context["stepId"], "status": StepStatus.RUNNING.name } self.capacity = Capacity(meta=self.meta) self.cycle_key = self._get_cycle_key() def _get_cycle_key(self): """ Getting cycle_key from groupName. groupName format should be like: jobDefId:cycleKey :return: cycleKey """ _group_name = str(self.context["groupName"]) _cycle_key = _group_name.split(":")[1] return _cycle_key def get_url_response(self, url, method="POST", **kwargs): """ Getting response from url :return: resp """ self._logger.info(url) if method.upper() not in ["GET", "POST", "PUT"]: method = "GET" resp = requests.request(method=method, url=url, verify=False, **kwargs) self._logger.info(resp.text) return resp def _dump_dim_data(self, retailer_key): """ Dump dim data into Azure Blob Storage This is cycle_key(retailer) level :return: """ self._logger.info("Syncing dim tables...") # for retailer_key in retailer_list: _retailer_name = self.capacity.get_data_by_retailer_key( retailer_key=retailer_key)['retailerName'] self._logger.info( "Uploading 4 tables into Azure blob storage for retailer: %s." % _retailer_name) sql_for_item = """ SELECT VENDOR_KEY, ITEM_KEY, UPC, ITEM_GROUP, item_description AS ITEM_DESC, OSM_BRAND AS BRAND, OSM_CATEGORY AS CATEGORY, OSM_SUB_CATEGORY AS SUBCATEGORY, VENDOR_NAME FROM (SELECT *, ROW_NUMBER() OVER(PARTITION BY VENDOR_KEY, ITEM_KEY) rn FROM {schema}.{table} ) x WHERE retailer_key = {retailerKey} AND rn = 1""" \ .format(schema=self._common_schema, table=self._dim_product, retailerKey=retailer_key) sql_for_store = """ SELECT RETAILER_KEY, STORE_KEY, STORE_ID, RETAILER_NAME, OSM_REGION AS REGION, RSI_BANNER AS BANNER, PRIME_DC AS DISTRIBUTE_CENTER FROM (SELECT *, ROW_NUMBER() OVER(PARTITION BY RETAILER_KEY, STORE_KEY) rn FROM {schema}.{table} ) x WHERE retailer_key = {retailerKey} AND rn = 1""" \ .format(schema=self._common_schema, table=self._dim_store, retailerKey=retailer_key) sql_for_cal = """ SELECT PERIOD_KEY, CALENDAR_KEY, CALENDARNAME AS CALENDAR_NAME, YEAR, YEARNAME AS YEAR_NAME, QUARTER, QUARTERNAME AS QUARTER_NAME, MONTH, MONTHNAME AS MONTH_NAME, PERIOD, PERIODNAME AS PERIOD_NAME, WEEKENDED AS WEEK_ENDED, WEEKENDEDNAME AS WEEK_ENDED_NAME, WEEKBEGIN AS WEEK_BEGIN, WEEKBEGINNAME AS WEEK_BEGIN_NAME, YEARWEEKNAME AS YEAR_WEEK_NAME, YEARWEEK AS YEAR_WEEK, YEARMONTHWEEKNAME AS YEAR_MONTH_WEEK_NAME, LY_PERIOD_KEY, NY_PERIOD_KEY, DATE_NAME, TO_CHAR(date_value) AS DATE_VALUE, CAL_PERIOD_KEY, "2ya_period_key" AS _2YA_PERIOD_KEY, "3ya_period_key" AS _3YA_PERIOD_KEY, "4ya_period_key" AS _4YA_PERIOD_KEY FROM {schema}.{table}""".format(schema=self._common_schema, table=self._dim_calendar) sql_for_alert = """ SELECT INTERVENTION_KEY as OSA_TYPE_KEY, INTERVENTION_ENABLED as OSA_TYPE_STATUS, INTERVENTION_NAME as OSA_TYPE_NAME, INTERVENTION_DESC as OSA_TYPE_DESC, CASE WHEN application = 'oos' THEN 1 ELSE 0 END AS OSA_INDICATOR FROM {schema}.{table}""".format(schema=self._common_schema, table=self._meta_intvn) self.azure_uploader.upload_azure_main( parq_filename=self._prefix_name + self._dim_product, sql=sql_for_item, azure_base_dir=_retailer_name + '/' + self._dim_base_dir, azure_sub_dir="ITEM") self.azure_uploader.upload_azure_main( parq_filename=self._prefix_name + self._dim_store, sql=sql_for_store, azure_base_dir=_retailer_name + '/' + self._dim_base_dir, azure_sub_dir="STORE") self.azure_uploader.upload_azure_main( parq_filename=self._prefix_name + self._dim_calendar, sql=sql_for_cal, azure_base_dir=_retailer_name + '/' + self._dim_base_dir, azure_sub_dir="CALENDAR") self.azure_uploader.upload_azure_main( parq_filename=self._prefix_name + self._meta_intvn, sql=sql_for_alert, azure_base_dir=_retailer_name + '/' + self._dim_base_dir, azure_sub_dir="OSA_CLASSIFICATION") def _notify_pacific(self, msg): """ Send message to pacific :return: """ self._logger.info("Sending kafka message to Pacific") p = Producer(meta=self.meta) p.produce_data(msg=msg) self._logger.info("Sending kafka message to Pacific completes") def process(self): """ main process :return: """ try: self._logger.info("Data complete Blob getting start") self.body["status"] = StepStatus.RUNNING.name self.body["message"] = "Dumping dim tables and notify pacific." _vendors = self.context["vendors"] # list _retailer_key = self.context["retailerKey"] _retailer_name = self.capacity.get_data_by_retailer_key( retailer_key=_retailer_key)['retailerName'] _data_range = self.context["dateRange"] # list _min_period = min(_data_range) _max_period = max(_data_range) # dump dim tables to blob storage and then notify pacific self._dump_dim_data(_retailer_key) _base_azure_url = "wasbs://{}@{}.blob.core.windows.net/".format( self._azure_storage_blob_container, self._azure_storage_account_name) _spd_status = { "jobId": self.context["jobId"], "aggregationType": "OSA", "aggregationLevel": "RETAILER", "dimBaseUri": _base_azure_url, "osaFactBaseUri": _base_azure_url, "calendarId": ["2"], "retailer": _retailer_name, "vendors": _vendors, "dateRange": { "startPeriodKey": _min_period, "endPeriodKey": _max_period } } self._logger.info("The message to send to Pacific is: %s" % _spd_status) self._notify_pacific(msg=_spd_status) # update notify status once sync dim is done. _notify_status_api = "{}/availablecycle/rc/status".format( self.alert_base_url) rc_ids = self.context["rcId"] # list _request_body = {"payload": []} for _rc_id in rc_ids: _notified_rc = {"id": _rc_id, "notified": True} _request_body["payload"].append(_notified_rc) self._logger.info("update rc status with request body: %s" % str(_request_body)) resp = self.get_url_response(url=_notify_status_api, method="PUT", json=_request_body) if resp.status_code != requests.codes.ok: self._logger.error( "Updating notify status failed, Refer to API: %s" % _notify_status_api) self.body["status"] = StepStatus.SUCCESS.name self.body[ "message"] = "Done dumped dim tables and notified pacific." except Exception as msg: self.body["message"] = str(msg) self.body["status"] = StepStatus.ERROR.name self._logger.error(self.body)
class AFMConfigToTable(object): """ :input: afm_config.json under same folder. Note: this config file name is hardcoded. :output: below 4 tables. AFM_RULE_SET AFM_RULES AFM_RETAILER_RULE AFM_SVR_RULE :usage: python AFMConfigToTable.py :description: This is a temp solution for loading AFM configuration data from json format into AFM rule tables. Since the frontend UI is not yet working. Refer to above json string for the format template. When the frontend finished, this module will be also retired. There are only 4 tables involved here. See output tables list. """ def __init__(self, meta, params, logger=None, filename="afm_config.json"): """ :param meta: common config :param params: used by calling REST API :param filename: only used for local testing """ self.meta = meta self.params = params self.filename = filename self._cap = Capacity(meta=self.meta) self.app_conn = MSOperation(meta=self.meta) self._logger = logger if logger else Logger(log_level="debug", target="console", vendor_key=-1, retailer_key=-1, sql_conn=None) self.scd_url = "{0}/scd/process".format( self.meta["api_osa_bundle_str"]) def get_data(self): # json_data = json.loads(afm_config_data) with open(self.filename, 'r') as fp: json_data = json.load(fp=fp) return json_data def insert_data(self): # _json_config_data = self.get_data() # for local testing _json_config_data = self.params # getting data from params directly instead of reading from config file. _rule_type = _json_config_data["ruleType"].upper() _cycle_key = _json_config_data["cycleKey"] _vendor_key = _json_config_data["vendorKey"] _retailer_key = _json_config_data["retailerKey"] _owner = _json_config_data["owner"] _rule_set_name = None try: if _rule_type not in ("RETAILER", "SVR"): self._logger.warning( "ruleType should be only either RETAILER or SVR! " "Please check config file: afm_config.json.") exit(1) if _rule_type == "RETAILER": _rule_set_name = _owner + '_' + self._cap.get_retailer_name_by_key( retailer_key=_retailer_key) elif _rule_type == "SVR": _rule_set_name = _owner + '_' + self._cap.get_retailer_name_by_key( retailer_key=_retailer_key ) + '_' + self._cap.get_vendor_name_by_key( vendor_key=_vendor_key) rule_set_data = { "RULE_SET_NAME": _rule_set_name, "ENGINE_PROVIDER_NAME": "AFM", "DATA_PROVIDER_NAME": "AHOLD", "OWNER": _owner, "ITEM_SCOPE": _json_config_data["itemScope"], "STORE_SCOPE": _json_config_data["storeScope"], "TYPES_LIST": _json_config_data["typeList"], "ENABLED": "T", "CREATED_BY": _json_config_data["createdBy"], "UPDATED_BY": _json_config_data["createdBy"] } self._logger.info(rule_set_data) _rule_set_sql = """ INSERT INTO AFM_RULE_SET ( [RULE_SET_NAME], [ENGINE_PROVIDER_NAME], [DATA_PROVIDER_NAME], [OWNER], [ITEM_SCOPE], [STORE_SCOPE], [TYPES_LIST], [ENABLED], [CREATED_BY], [CREATED_DATE], [UPDATED_BY], [UPDATED_DATE] ) OUTPUT inserted.RULE_SET_ID VALUES( '{RULE_SET_NAME}', '{ENGINE_PROVIDER_NAME}', '{DATA_PROVIDER_NAME}', '{OWNER}', '{ITEM_SCOPE}', '{STORE_SCOPE}', '{TYPES_LIST}', '{ENABLED}', '{CREATED_BY}', GETDATE(), '{UPDATED_BY}', GETDATE() ) """.format(**rule_set_data) self._logger.info("SQL for AFM_RULE_SET table is: %s" % _rule_set_sql) _rule_set_id = self.app_conn.query_scalar(_rule_set_sql) self._logger.info("Generated rule set id is: %s" % _rule_set_id) rules_data = _json_config_data["rules"] for rule in rules_data: rule_data = { "RULE_ID": rule["ruleId"], "RULE_SET_ID": _rule_set_id, "SUB_LEVEL_METRICS": rule["subLevelMetrics"], "PARAMETER1": rule["parameter1"], "PARAMETER2": rule["parameter2"], "PARAMETER3": rule["parameter3"], "ENABLED": "T", "CREATED_BY": _json_config_data["createdBy"], "CREATED_DATE": "", "UPDATED_BY": _json_config_data["createdBy"], "UPDATED_DATE": "" } self._logger.debug(rule_data) _rules_sql = """ INSERT INTO AFM_RULES ( [RULE_ID], [RULE_SET_ID], [SUB_LEVEL_METRICS], [PARAMETER1], [PARAMETER2], [PARAMETER3], [ENABLED], [CREATED_BY], [CREATED_DATE], [UPDATED_BY], [UPDATED_DATE] ) VALUES ( {RULE_ID}, {RULE_SET_ID}, '{SUB_LEVEL_METRICS}', '{PARAMETER1}', '{PARAMETER2}', '{PARAMETER3}', '{ENABLED}', '{CREATED_BY}', getdate(), '{UPDATED_BY}', getdate() )""".format(**rule_data) self._logger.info("SQL for AFM_RULES table is: %s" % _rules_sql) self.app_conn.execute(_rules_sql) if _rule_type == "RETAILER": _sql = """SELECT COUNT(*) FROM AFM_RETAILER_RULE WHERE cycle_key = {cycleKey}""".format(cycleKey=_cycle_key) _exist = self.app_conn.query_scalar(_sql) if _exist != 0: _update_sql = "UPDATE AFM_RETAILER_RULE SET owner='{owner}', rule_set_id = {rule_set_id} " \ "WHERE cycle_key = {cycleKey}; "\ .format(cycleKey=_cycle_key, owner=_owner, rule_set_id=_rule_set_id) self._logger.info( "Sql for updating table AFM_RETAILER_RULE is: %s" % _update_sql) # calling scd api to execute update/delete statements _body = { "sql": _update_sql, "actioner": "ben.wu", "log_detail": True, "batch_size": 100, "db_type": "MSSQL", "table_schema": "(COMMON)" } # resp = requests.post(self.scd_url, data=json.dumps(_body)) resp = requests.post(self.scd_url, json=_body) if resp.status_code != requests.codes.ok: self._logger.warning("The response result is: %s" % resp.json()) self._logger.error( "Calling API failed with body: %s. Refer to API: %s" % (_body, self.scd_url)) else: _insert_sql = """ INSERT INTO AFM_RETAILER_RULE(cycle_key, owner, rule_set_id) VALUES({0}, '{1}', {2});""".format(_cycle_key, _owner, _rule_set_id) self._logger.info( "SQL for inserting table AFM_RETAILER_RULE is: %s" % _insert_sql) self.app_conn.execute(_insert_sql) if _rule_type == "SVR": _sql = """ SELECT COUNT(*) FROM AFM_SVR_RULE WHERE vendor_key = {vendorKey} AND retailer_key = {retailerKey} """.format(vendorKey=_vendor_key, retailerKey=_retailer_key) _exist = self.app_conn.query_scalar(_sql) if _exist != 0: _update_sql = """ UPDATE AFM_SVR_RULE SET owner = '{owner}', rule_set_id = {rule_set_id} WHERE vendor_key = {vendorKey} AND retailer_key = {retailerKey} AND cycle_key = {cycleKey} """.format(owner=_owner, rule_set_id=_rule_set_id, vendorKey=_vendor_key, retailerKey=_retailer_key, cycleKey=_cycle_key) self._logger.info( "SQL for updating table AFM_SVR_RULE is: %s" % _update_sql) # calling scd api to execute update/delete statements _body = { "sql": _update_sql, "actioner": "ben.wu", "log_detail": True, "batch_size": 100, "db_type": "MSSQL", "table_schema": "(COMMON)" } # resp = requests.post(self.scd_url, data=json.dumps(_body)) resp = requests.post(self.scd_url, json=_body) if resp.status_code != requests.codes.ok: self._logger.warning("The response result is: %s" % resp.text) self._logger.error( "Calling API failed with body: %s. Refer to API: %s" % (_body, self.scd_url)) else: _insert_sql = """ INSERT INTO AFM_SVR_RULE(vendor_key, retailer_key, owner, rule_set_id, cycle_key) VALUES({0}, {1}, '{2}', {3}, {4}); """.format(_vendor_key, _retailer_key, _owner, _rule_set_id, _cycle_key) self._logger.info( "SQL for inserting table AFM_SVR_RULE is: %s" % _insert_sql) self.app_conn.execute(_insert_sql) # insert related schedule. self.gen_schedule() self._logger.info( "Insert AFM config data completed. Please check rule_set_id: %s in related tables." % _rule_set_id) except Exception as e: self._logger.warning( "WARNING: Error found. Please fix it and re-run this script.") self._logger.warning(e) raise finally: if self.app_conn: self.app_conn.close_connection() def get_job_def_id(self, job_name): _job_name = job_name _job_url = self.meta[ "api_schedule_str"] # http://engv3dstr2.eng.rsicorp.local/common _headers = { #"tokenid": "eyJhbGciOiJIUzI1NiJ9.eyJjb29raWVWYWx1ZSI6IkFRSUM1d00yTFk0U2Zjd1ZidkJILXZOWFhEYS1HQm1ETlVpd240dWtMSzBsNEJjLipBQUpUU1FBQ01ERUFBbE5MQUJNek16azRPVEkyTXpFNU5EZzJNemMwTnpZeUFBSlRNUUFBKiIsInVzZXJJZCI6ImJlbi53dUByc2ljb3JwLmxvY2FsIiwiY29va2llTmFtZSI6InJzaVNzb05leHRHZW4iLCJzdGF0dXMiOiJzdWNjZXNzIiwiaWF0IjoxNTM0ODE1NTAxfQ.Hbv_wcsEqmUBFTy64BTf15nWC94fsFTfmt3LZMq24Ag", "content-type": "application/json" } _job_def_url = _job_url + '/schedule/jobdefinitions' self._logger.info("URL to find the job definition is: %s " % _job_def_url) res = requests.get(url=_job_def_url, headers=_headers, verify=False) if res.text == "invalid token": self._logger.warning( "WARNING: Please update the tokenid manually. " "Then rerun this script again!!!") exit(1) x = [dct["id"] for dct in res.json() if dct["jobDefName"] == _job_name] if not x: self._logger.info( "There is no job id found for job: %s. You can refer to API: %s" % (_job_name, _job_def_url)) exit(1) # returning job definition id return x[0] def gen_schedule(self): self._logger.info( "Inserting related schedule schedule for OSARetailerAFM.") _rule_type = self.params["ruleType"].upper() # only creating schedule for RETAILER rule. if _rule_type == "RETAILER": _job_name = "OSARetailerAFM" _job_schedule_id = self.get_job_def_id(_job_name) _cycle_key = self.params["cycleKey"] _group_name = "{0}:{1}".format(_job_schedule_id, _cycle_key) _schedule_name = "{0}:{1}".format(_group_name, _job_name) sch_params = dict( creater="*****@*****.**", groupName=_group_name, jobDefinitionId=_job_schedule_id, # parametersContext="", # priority=1, # scheduleExpression="", scheduleName=_schedule_name, scheduleType="EVENT") loader = scheduler.ScheduleParamsToTable(self.meta, sch_params) loader.load_data() # OSMAlerting schedule will take care of SVR rule. if _rule_type == "SVR": pass
_meta_intvn = "ALT_META_INTERVENTION" _dim_base_dir = meta.get("azure_storage_dim_root") _prefix_name = "IRIS_" _dw_conn = DWOperation(meta=meta) # Getting retailer list from dim table retailer_list_in_table = [] _retailer_sql = "SELECT DISTINCT retailer_key FROM {schema}.{table} where retailer_key in (6, 5240)"\ .format(schema=_common_schema, table=_dim_store) res = _dw_conn.query(_retailer_sql) for ele in res: retailer_list_in_table.append(ele.retailer_key) print(retailer_list_in_table) capacity = Capacity(meta=meta) azure_uploader = UploadToBlobStorage(meta=meta) # sql_for_item = "SELECT * FROM {schema}.{table}".format(schema=_common_schema, table=_dim_product) # sql_for_store = "SELECT * FROM {schema}.{table}".format(schema=_common_schema, table=_dim_store) # sql_for_cal = "SELECT * FROM {schema}.{table}".format(schema=_common_schema, table=_dim_calendar) # sql_for_alert = "SELECT * FROM {schema}.{table}".format(schema=_common_schema, table=_meta_intvn) # retailer_list = [6, 5240] retailer_list = retailer_list_in_table for retailer_key in retailer_list: _retailer_name = capacity.get_data_by_retailer_key( retailer_key=retailer_key)['retailerName'] print("Uploading 4 tables into Azure blob storage for retailer: %s." % _retailer_name)
def __init__(self, meta, params=None, init_flag=False, logger=None): """ # sync feedback data from RDP side to IRIS(OSA) side by incremental via event_key. # 1, only sync those vendor&retailer which applied OSA Service. # 2, for Those new vendor&retailer, copy all historical data when initialization. :param meta: [mandatory] config data from config.properties file :param params: 2 cases here. depends on whether sync rdp feedback for whole RDP or new customer. see below. 1, rdp_id: if rdp_id was given, then sync all data for this given RDP. otherwise, sync data from all related RDPs. Noted: rdp_id will be passed when calling this service via REST API. 2, vendor_key: mandatory only when init_flag is True. retailer_key: mandatory only when init_flag is True Noted: These 2 parameters will not be passed from REST API but called directly by deploy scripts. :param init_flag: if init_flag is True: then only sync feedback data for given vendor & retailer. This is used when introducing new customer. if init_flat is False: sync all customers' data from RDP periodically(e.g. sync daily). :param logger: """ self.meta = meta self._params = {} if params is None else params self._rdp_id = self._params.get("rdpId", None) self._fact_type = 'fdbk' self._init_flag = init_flag self._vendor_key = self._params.get("vendor_key", None) self._retailer_key = self._params.get("retailer_key", None) self._debug = self._params.get('debug', 'N') self._default_rdp = "RDP_AUX" self._log_file = './log/sync_fdbk_%s_%s.log' % ( self._rdp_id, datetime.datetime.now().strftime('%Y%m%d')) self.logger = logger if logger else Logger(log_level="debug", target="console|file", vendor_key=-1, retailer_key=-1, log_file=self._log_file, sql_conn=None) self.osa_app_conn = MSOperation(meta=self.meta, logger=self.logger) self.osa_dw_conn = DWOperation(meta=self.meta, logger=self.logger) self.max_event_key = None # we already know feedback table name of RDP self.source_table_rdp = "DS_FACT_FEEDBACK" # source table in RDP side. self.staging_import_table_osa = "STAGE_FACT_FEEDBACK_RDP" # used to store sync data from RDP table (same structure as table DS_FACT_FEEDBACK) self.target_table_osa = "FACT_FEEDBACK" # final table in OSA side self.capacity = Capacity(meta=meta) self.dct_sync_data = copy.deepcopy( self.meta) # required for calling sync_data module self.dct_sync_data[ "meta"] = self.meta # required for calling sync_data module self.dct_sync_data["target_osa_conn"] = self.osa_dw_conn self.dct_sync_data["target_dw_schema"] = self.meta[ 'db_conn_vertica_common_schema'] self.dct_sync_data["target_dw_table"] = self.staging_import_table_osa self.dct_sync_data["logger"] = self.logger # [True|False(default)] True: direct connection between Vertica clusters. False: using vsql. self.dct_sync_data["dw_conn_vertica"] = False # self.dct_sync_data["dw_conn_vertica"] = True self.transfer = TransferData(dct_sync_data=self.dct_sync_data)
class Feedback(object): def __init__(self, meta, params=None, init_flag=False, logger=None): """ # sync feedback data from RDP side to IRIS(OSA) side by incremental via event_key. # 1, only sync those vendor&retailer which applied OSA Service. # 2, for Those new vendor&retailer, copy all historical data when initialization. :param meta: [mandatory] config data from config.properties file :param params: 2 cases here. depends on whether sync rdp feedback for whole RDP or new customer. see below. 1, rdp_id: if rdp_id was given, then sync all data for this given RDP. otherwise, sync data from all related RDPs. Noted: rdp_id will be passed when calling this service via REST API. 2, vendor_key: mandatory only when init_flag is True. retailer_key: mandatory only when init_flag is True Noted: These 2 parameters will not be passed from REST API but called directly by deploy scripts. :param init_flag: if init_flag is True: then only sync feedback data for given vendor & retailer. This is used when introducing new customer. if init_flat is False: sync all customers' data from RDP periodically(e.g. sync daily). :param logger: """ self.meta = meta self._params = {} if params is None else params self._rdp_id = self._params.get("rdpId", None) self._fact_type = 'fdbk' self._init_flag = init_flag self._vendor_key = self._params.get("vendor_key", None) self._retailer_key = self._params.get("retailer_key", None) self._debug = self._params.get('debug', 'N') self._default_rdp = "RDP_AUX" self._log_file = './log/sync_fdbk_%s_%s.log' % ( self._rdp_id, datetime.datetime.now().strftime('%Y%m%d')) self.logger = logger if logger else Logger(log_level="debug", target="console|file", vendor_key=-1, retailer_key=-1, log_file=self._log_file, sql_conn=None) self.osa_app_conn = MSOperation(meta=self.meta, logger=self.logger) self.osa_dw_conn = DWOperation(meta=self.meta, logger=self.logger) self.max_event_key = None # we already know feedback table name of RDP self.source_table_rdp = "DS_FACT_FEEDBACK" # source table in RDP side. self.staging_import_table_osa = "STAGE_FACT_FEEDBACK_RDP" # used to store sync data from RDP table (same structure as table DS_FACT_FEEDBACK) self.target_table_osa = "FACT_FEEDBACK" # final table in OSA side self.capacity = Capacity(meta=meta) self.dct_sync_data = copy.deepcopy( self.meta) # required for calling sync_data module self.dct_sync_data[ "meta"] = self.meta # required for calling sync_data module self.dct_sync_data["target_osa_conn"] = self.osa_dw_conn self.dct_sync_data["target_dw_schema"] = self.meta[ 'db_conn_vertica_common_schema'] self.dct_sync_data["target_dw_table"] = self.staging_import_table_osa self.dct_sync_data["logger"] = self.logger # [True|False(default)] True: direct connection between Vertica clusters. False: using vsql. self.dct_sync_data["dw_conn_vertica"] = False # self.dct_sync_data["dw_conn_vertica"] = True self.transfer = TransferData(dct_sync_data=self.dct_sync_data) def _populate_source_config(self, source_config): self.logger.debug("The source config is: %s" % source_config) _src_config = {} if os.name == 'nt': _src_config["temp_file_path"] = "d:" elif os.name == 'posix': _src_config["temp_file_path"] = "/tmp" # Getting user account from config.properties file first. if self.meta.get("db_conn_vertica_rdp_username"): _src_config["dw.etluser.id"] = self.meta.get( "db_conn_vertica_rdp_username") if self.meta.get("db_conn_vertica_rdp_password"): _src_config["dw.etluser.password"] = self.meta.get( "db_conn_vertica_rdp_password") else: _pmp_pwd = get_password( username=self.meta.get("db_conn_vertica_rdp_username"), meta=self.meta) # The pwd should be encrypted in order to: 1, align with else part, 2, pass it to db.sync_data module _src_config["dw.etluser.password"] = Crypto().encrypt(_pmp_pwd) # if not configed then get them directly from RDP config. else: _src_config["dw.etluser.id"] = source_config.get("dw.etluser.id") # the pwd is encrypted _src_config["dw.etluser.password"] = source_config.get( "dw.etluser.password") # required info for calling sync_data module. _src_config["dw.server.name"] = source_config.get("dw.server.name") _src_config["dw.db.name"] = source_config.get("dw.db.name") _src_config["dw.db.portno"] = source_config.get("dw.db.portno", 5433) _src_config["dw.schema.name"] = source_config.get("dw.schema.name") self.logger.debug("srouce config is: %s" % _src_config) self.dct_sync_data["source_config"] = _src_config # Create the connection to RDP Vertica Cluster. which is the source Vertica cluster rdp_meta = copy.deepcopy(self.meta) tmp_rdp_meta = { 'db_conn_vertica_servername': _src_config["dw.server.name"], 'db_conn_vertica_port': _src_config["dw.db.portno"], 'db_conn_vertica_dbname': _src_config["dw.db.name"], 'db_conn_vertica_username': _src_config["dw.etluser.id"], 'db_conn_vertica_password': _src_config["dw.etluser.password"], 'db_conn_vertica_password_encrypted': "true" } rdp_meta.update(tmp_rdp_meta) self.logger.debug("rdp config is: %s" % rdp_meta) rdp_connection = DWOperation(meta=rdp_meta) self.dct_sync_data["source_dw"] = rdp_connection def main_process(self): try: # if not introducing new customer and _rdp_id was given, # then we will sync all feedback data from given RDP for registered users. if self._init_flag is False and self._rdp_id: try: rdp_config = Config(meta=self.meta, hub_id=self._rdp_id).json_data if not rdp_config['configs']: raise Warning( "There is no configs returned for RDP: %s." "Please check if this RDP registered in CP with below URL." "%s/properties/rdps?factType=fdbk" % (self._rdp_id, self.meta["api_config_str"])) # exit(StepStatus.SUCCESS.value) _rdp_schema = rdp_config['configs'].get('dw.schema.name') self.logger.info("Started to sync data from rdp: %s" % _rdp_schema) # self.dct_sync_data["source_config"] = rdp_config['configs'] self._populate_source_config(rdp_config['configs']) self.initialize() _flag = self.load_data() if _flag: # if no data, then no need to process & update variables table. self.process_data() sql = """ IF NOT EXISTS(SELECT * FROM VARIABLES WHERE VARIABLE_NAME = '{eventType}') INSERT INTO VARIABLES (VARIABLE_NAME, VARIABLE_VALUE, PREVIOUS_VALUE, INSERT_TIME, UPDATE_TIME) VALUES ('{eventType}', '{value}', '', getdate(), getdate()) ELSE UPDATE VARIABLES SET PREVIOUS_VALUE = VARIABLE_VALUE, VARIABLE_VALUE = '{value}',UPDATE_TIME = getdate() WHERE VARIABLE_NAME = '{eventType}' """.format(eventType=_rdp_schema, value=self.max_event_key) self.logger.info(sql) self.osa_app_conn.execute(sql) self.logger.info("Data sync done for RDP: %s" % _rdp_schema) except Exception as e: self.logger.warning(e) raise # exit(StepStatus.SUCCESS.value) # exit(0) otherwise Docker container will fail. # Else we will get all RDPs from REST API: http://10.172.36.75/config/properties/rdps?factType=fdbk # There could be multi RDPs(e.g. for SVR & WM). if so, loop all RDPs elif self._init_flag is False and self._rdp_id is None: try: rdp_configs = Config( meta=self.meta, rdp_info=True, rdp_fact_type=self._fact_type).json_data if not rdp_configs: raise Warning( "No feedback related RDP found." "Please check if any data returned from below URL." "%s/properties/rdps?factType=fdbk" % (self.meta["api_config_str"])) # exit(StepStatus.SUCCESS.value) for rdp_config in rdp_configs: _rdp_schema = rdp_config['configs'].get( 'dw.schema.name') self.logger.info("Started to sync data from rdp: %s" % _rdp_schema) # self.dct_sync_data["source_config"] = rdp_config['configs'] self._populate_source_config(rdp_config['configs']) self.initialize() _flag = self.load_data() if _flag: # if no data, then no need to process & update variables table. self.process_data() sql = """ IF NOT EXISTS(SELECT * FROM VARIABLES WHERE VARIABLE_NAME = '{eventType}') INSERT INTO VARIABLES (VARIABLE_NAME, VARIABLE_VALUE, PREVIOUS_VALUE, INSERT_TIME, UPDATE_TIME) VALUES ('{eventType}', '{value}', '', getdate(), getdate()) ELSE UPDATE VARIABLES SET PREVIOUS_VALUE = VARIABLE_VALUE, VARIABLE_VALUE = '{value}',UPDATE_TIME = getdate() WHERE VARIABLE_NAME = '{eventType}' """.format(eventType=_rdp_schema, value=self.max_event_key) self.logger.info(sql) self.osa_app_conn.execute(sql) self.logger.info("Data sync done for RDP: %s" % _rdp_schema) except Exception as e: self.logger.warning(e) raise elif self._init_flag is True: if self._vendor_key is None or self._retailer_key is None: self.logger.warning( "vendor_key and retailer_key are required when initilize feedback for new customer" ) raise ValueError # getting fdbk related rdps. try: rdp_configs = Config( meta=self.meta, rdp_info=True, rdp_fact_type=self._fact_type).json_data if not rdp_configs: self.logger.warning( "No feedback related RDP found." "Please check if any data returned from below URL." "%s/properties/rdps?factType=fdbk" % (self.meta["api_config_str"])) exit(StepStatus.SUCCESS.value) fdbk_rdps = [ str(rdp_config["rdpId"]).upper() for rdp_config in rdp_configs ] # change table name in case conflict with normal sync process self.dct_sync_data[ "target_dw_table"] = "{0}_{1}_{2}".format( self.staging_import_table_osa, self._vendor_key, self._retailer_key) _silo_config = Config( meta=self.meta, vendor_key=self._vendor_key, retailer_key=self._retailer_key).json_data _silo_type = _silo_config['configs'].get( 'etl.silo.type', 'SVR') _rdp_id = _silo_config['configs'].get('rdp.db.name') # RDP_AUX is default rdp id for feedback etl on PRODUCTION. # 1, if there is no RDP for given silo. then exit. if not _rdp_id or str(_rdp_id).strip() == '': self.logger.warning( "There is no RDP silo configed for the given vendor:%s " "and retailer:%s. So no need to sync feedback." % (self._vendor_key, self._retailer_key)) exit(StepStatus.SUCCESS.value) # 2, Getting configed RDP list, and check if there are feedback related RDPs. _tmp_rdp_lst = str(_rdp_id).upper().split(sep=",") _rdp_lst = [_tmp.strip() for _tmp in _tmp_rdp_lst] # common_rdps is RDP silo configed for syncing feedback data for given silo(vendor&retailer) common_rdps = list(set(_rdp_lst).intersection(fdbk_rdps)) if common_rdps is None: self.logger.warning( "There is no RDP silo configed for the given vendor:%s " "and retailer:%s. So no need to sync feedback." % (self._vendor_key, self._retailer_key)) exit(StepStatus.SUCCESS.value) # If there is 1 or more than 1 feedback related rdps configed, then loop them to sync feedback data, # Normally, there should be only 1. or no feedback rdp configed. for common_rdp in common_rdps: _rdp_id = common_rdp self.logger.info( "Started to sync data from rdp: %s for given vendor:%s and retailer:%s. " % (_rdp_id, self._vendor_key, self._retailer_key)) # if RDP is not RDP_AUX, Won't exit but log a warning. if _rdp_id != self._default_rdp: self.logger.warning( "Please be noted: The RDP is:%s. It is not RDP_AUX." % _rdp_id) # WM silos are also following above logic. # all hosted silo are ultilizing RDP_AUX to transfer feedback data. not sure about Walmart. # if str(_silo_type).upper() in ["WMSSC", "WMCAT", "SASSC", "WMINTL"]: # _rdp_id = self._default_rdp # WM rdp is RDP_AUX as well? rdp_config = Config(meta=self.meta, hub_id=_rdp_id).json_data if not rdp_config['configs']: self.logger.warning( "There is no configs for RDP: %s. Please check following URL:" "%s/properties/%s/%s" % (_rdp_id, self.meta["api_config_str"], _rdp_id, _rdp_id)) exit(StepStatus.SUCCESS.value) _rdp_schema = rdp_config['configs'].get( 'dw.schema.name') self.logger.info( "Started to init feedback data from rdp: %s for " "given vendor:%s and retailer:%s " % (_rdp_schema, self._vendor_key, self._retailer_key)) # self.dct_sync_data["source_config"] = rdp_config['configs'] self._populate_source_config(rdp_config['configs']) self.initialize() _flag = self.load_data() if _flag: # if no data, then no need to process. self.process_data() self.logger.info("Data sync done for RDP: %s" % _rdp_id) except Exception as e: self.logger.warning(e) self.logger.warning( "Please check if any warning or error messages when doing the initialization!" ) finally: if self.osa_app_conn: self.osa_app_conn.close_connection() if self.osa_dw_conn: self.osa_dw_conn.close_connection() def initialize(self): """ Create local temp tables , and DDLs required to process this fact type :return: """ self.logger.info("Initialize...") # recreate this table for every RDP. no need to truncate any longer. # sql = "TRUNCATE TABLE {cmnSchema}.{targetTable}"\ # .format(cmnSchema=self.dct_sync_data["target_dw_schema"], targetTable=self.staging_import_table_osa) # self.logger.info(sql) # self.osa_dw.execute(sql) sql = """ --Store data from RDP table. DROP TABLE IF EXISTS {cmnSchema}.{importTable}; CREATE TABLE {cmnSchema}.{importTable} ( EVENT_KEY int NOT NULL, RETAILER_KEY int, VENDOR_KEY int, STORE_VISIT_DATE date, PERIOD_KEY int NOT NULL, TYPE varchar(1), TYPE_DATE varchar(10), ALERT_ID int, ALERT_TYPE varchar(64), MERCHANDISER_STORE_NUMBER varchar(512), STORE_ID varchar(512), MERCHANDISER_UPC varchar(512), INNER_UPC varchar(512), MERCHANDISER varchar(100), STORE_REP varchar(1000), SOURCE varchar(1000), BEGIN_STATUS varchar(255), ACTION varchar(255), FEEDBACK_DESCRIPTION varchar(255), FEEDBACK_HOTLINEREPORTDATE date, FEEDBACK_ISININVENTORY varchar(5), ZIP_CODE varchar(64), ARTS_CHAIN_NAME varchar(255), UPC_STATUS varchar(255), MSI varchar(255) ) UNSEGMENTED ALL NODES; """.format(cmnSchema=self.dct_sync_data["target_dw_schema"], importTable=self.dct_sync_data["target_dw_table"]) self.logger.info(sql) self.osa_dw_conn.execute(sql) def load_data(self): """ # Load data from RDP table ds_fact_feedback to local temp tables. There is an column event_key which is incremental for all customers in ds_fact_feedback table. we can save the snapshot of this column to variable table, and do the incremental every time based on this column. There are few cases here: 1, Routinely, There will be a scheduled job to sync the whole feedback data for valid customers from related RDP silo. And save the snapshot of the event_key from previous loading for next incremental loading. 2, if on-boarding a new vendor & retailer customer. Getting rdp_event_key from variable for related RDP silo. (rdp_event_key is from previous loading) and then sync feedback data from related RDP silo only for this given customer when event_key < rdp_event_key. Then case1 will take care the rest of feedback data. :return: """ rdp_schema = self.dct_sync_data["source_config"].get('dw.schema.name') # rdp_aux.ds_fact_feedback source_table = "{rdpSchema}.{rdptableName}"\ .format(rdpSchema=rdp_schema, rdptableName=self.source_table_rdp) # common.stage_fact_feedback_rdp target_table = "{dwSchema}.{importTable}"\ .format(dwSchema=self.dct_sync_data["target_dw_schema"], importTable=self.dct_sync_data["target_dw_table"]) self.logger.info( "Ready to load Data from {srouceTable} to {targetTable}".format( targetTable=target_table, srouceTable=source_table)) insert_columns = " EVENT_KEY, RETAILER_KEY, VENDOR_KEY, STORE_VISIT_DATE, PERIOD_KEY, TYPE, TYPE_DATE," \ " ALERT_ID, ALERT_TYPE, MERCHANDISER_STORE_NUMBER, STORE_ID, MERCHANDISER_UPC, INNER_UPC," \ " MERCHANDISER, STORE_REP, SOURCE, BEGIN_STATUS, ACTION, FEEDBACK_DESCRIPTION," \ " FEEDBACK_HOTLINEREPORTDATE, FEEDBACK_ISININVENTORY, ZIP_CODE, ARTS_CHAIN_NAME, UPC_STATUS, MSI " try: self.logger.info( "Getting the previous Event_key from last run for incremental load." ) _event_sql = "SELECT VARIABLE_VALUE FROM variables " \ "WHERE VARIABLE_NAME = '{rdpName}'".format(rdpName=rdp_schema) self.logger.info(_event_sql) event_key = self.osa_app_conn.query_scalar(_event_sql) self.logger.info( "Getting customer info which only applied OSA services as filter" ) sql = "SELECT DISTINCT retailer_key, vendor_key FROM AP_ALERT_CYCLE_MAPPING " \ "UNION " \ "SELECT DISTINCT retailer_key, vendor_key FROM AP_ALERT_CYCLE_RC_MAPPING" self.logger.info(sql) results = self.osa_app_conn.query(sql) if not results: raise Warning( "There is no data in table AP_ALERT_CYCLE_MAPPING. Please check sql: %s" % sql) # exit(StepStatus.SUCCESS.value) user_filters = [ 'SELECT ' + str(result.retailer_key) + ',' + str(result.vendor_key) for result in results ] user_filter_str = ' UNION ALL '.join(user_filters) self.logger.info("Customer filters are: %s" % user_filter_str) # incremental filter from RDP table where_sql = "EVENT_KEY > {eventKey} AND SOURCE != 'ARIA' " \ "AND (RETAILER_KEY, VENDOR_KEY) in ({userFilter})"\ .format(eventKey=event_key, userFilter=user_filter_str) # TODO2DONE: how to set default value? use -1 # copy all if there is no value in variables table. if not event_key: self.logger.warning( "There is no value set in variables table for RDP:{name}, " "So copy the whole table".format(name=rdp_schema)) where_sql = " SOURCE != 'ARIA' AND (RETAILER_KEY, VENDOR_KEY) in ({userFilter})"\ .format(eventKey=event_key, userFilter=user_filter_str) event_key = -1 # check if this is the first run. if self._init_flag is True: if event_key == -1: # event_key is None self.logger.warning( "There is no event_key logged in variables table for the given RDP: %s." "So Let's wait for the routine job to sync the whole rdp feedback data" % rdp_schema) return False self.logger.info("Generating init feedback filters") where_sql = "EVENT_KEY <= {eventKey} AND SOURCE != 'ARIA' " \ "AND (RETAILER_KEY, VENDOR_KEY) in ({userFilter}) " \ "AND RETAILER_KEY={retailerKey} AND VENDOR_KEY={vendorKey} "\ .format(eventKey=event_key, userFilter=user_filter_str, retailerKey=self._retailer_key, vendorKey=self._vendor_key) self.logger.debug("The filters are: %s" % where_sql) # form the fetch query from RDP and then Insert into the target table fetch_query = """ SELECT /*+ label(GX_IRIS_SYNCFEEDBACK)*/ {insertQuery} FROM {sourceTable} WHERE {whereSql} """.format(insertQuery=insert_columns, sourceTable=source_table, whereSql=where_sql) self.logger.info("fetch_query is : %s" % fetch_query) self.logger.info( ">>Loading {factType} Data from event_key:{eventKey} start at: {timestamp}<<" .format(factType=self._fact_type, eventKey=event_key, timestamp=datetime.datetime.now())) self.dct_sync_data["target_column"] = insert_columns self.dct_sync_data["source_sql"] = fetch_query row_count = self.transfer.transfer_data( dct_sync_data=self.dct_sync_data) self.logger.info( ">>Done loaded {cnt} rows from event_key:{eventKey} completed at: {timestamp}<<" .format(cnt=row_count, factType=self._fact_type, eventKey=event_key, timestamp=datetime.datetime.now())) # if no data transfered, then update variables with previous value. sql = "SELECT /*+ label(GX_IRIS_SYNCFEEDBACK)*/ nvl(max(event_key), {oldEventKey}) " \ "FROM {schemaName}.{importTable} "\ .format(schemaName=self.dct_sync_data["target_dw_schema"], importTable=self.dct_sync_data["target_dw_table"], oldEventKey=event_key) self.logger.info(sql) self.max_event_key = self.osa_dw_conn.query_scalar(sql) # max_event_key = -1 # testing purpose if self.max_event_key == -1: self.logger.warning( "There is no feedback data in RDP table: {0}".format( source_table)) return False return True except Exception as e: self.logger.warning(e) raise finally: pass def process_data(self): """ after load_data part completes. sync data from temp table to related schemas. :return: """ try: self.logger.info("Processing feedback start...") # loop retailer to insert feedback data sql = "SELECT DISTINCT retailer_key " \ "FROM {cmnSchema}.{importTable}"\ .format(cmnSchema=self.dct_sync_data["target_dw_schema"], importTable=self.dct_sync_data["target_dw_table"]) self.logger.info(sql) retailers = self.osa_dw_conn.query(sql) if retailers.rowcount == 0: self.logger.warning( "There is no data in table {cmnSchema}.{importTable}." "It could be no incremental data. Please check fetch_query against RDP database" .format(cmnSchema=self.dct_sync_data["target_dw_schema"], importTable=self.dct_sync_data["target_dw_table"])) for retailer in retailers: retailer_key = retailer.retailer_key osa_schema = self.capacity.get_retailer_schema_name( retailer_key) # Finally, run the sql msg = "Processing fdbk data within retailer {retailerKey}:{retailerName}"\ .format(retailerKey=retailer_key, retailerName=osa_schema) self.logger.info(msg) # Normally, There should NOT be duplicated alert_id transfered by incremental. # But should consider this case here. Delete existing alertid from target table # TODO: delete could have performance issue. consider using switch partition delete_sql = "DELETE FROM {osaSchema}.{targetTable} " \ "WHERE alert_id IN (SELECT alert_id FROM {cmnSchema}.{importTable} )"\ .format(targetTable=self.target_table_osa, osaSchema=osa_schema, cmnSchema=self.dct_sync_data["target_dw_schema"], importTable=self.dct_sync_data["target_dw_table"]) self.logger.info(delete_sql) self.osa_dw_conn.execute(delete_sql) # inserting feedback data into final table fact_feedback from processed table. sql = """ INSERT INTO {osaSchema}.{targetTable} (EVENT_KEY, RETAILER_KEY, VENDOR_KEY, STORE_VISITED_PERIOD_KEY, PERIOD_KEY, ALERT_ID, STORE_KEY, MERCHANDISER_STORE_NUMBER, STORE_ID, ITEM_KEY, MERCHANDISER_UPC, UPC, MERCHANDISER, STORE_REP, SOURCE, BEGIN_STATUS, ACTION, FEEDBACK_DESCRIPTION, ON_HAND_PHYSICAL_COUNT, ON_HAND_CAO_COUNT ) SELECT stage.EVENT_KEY, stage.RETAILER_KEY, stage.VENDOR_KEY, TO_CHAR(stage.STORE_VISIT_DATE, 'YYYYMMDD')::int AS STORE_VISITED_PERIOD_KEY, stage.PERIOD_KEY, stage.ALERT_ID, store.STORE_KEY AS STORE_KEY, stage.MERCHANDISER_STORE_NUMBER, COALESCE(store.STORE_ID , alert.STOREID, stage.STORE_ID) AS STORE_ID, item.ITEM_KEY AS ITEM_KEY, stage.MERCHANDISER_UPC, COALESCE(item.UPC, alert.UPC, stage.INNER_UPC, stage.MERCHANDISER_UPC) AS UPC, stage.MERCHANDISER, stage.STORE_REP, stage.SOURCE, stage.BEGIN_STATUS, stage.ACTION, stage.FEEDBACK_DESCRIPTION, 0 AS ON_HAND_PHYSICAL_COUNT, 0 AS ON_HAND_CAO_COUNT FROM {cmnSchema}.{importTable} stage LEFT JOIN {osaSchema}.FACT_PROCESSED_ALERT alert ON stage.alert_id = alert.alert_id AND alert.issuanceid = 0 AND alert.retailer_key = {retailerKey} AND stage.vendor_key = alert.vendor_key INNER JOIN {cmnSchema}.DIM_PRODUCT item ON item.retailer_key = {retailerKey} AND alert.vendor_key = item.vendor_key AND item.item_key = alert.item_key INNER JOIN {cmnSchema}.DIM_STORE store ON store.retailer_key = {retailerKey} AND alert.vendor_key = store.vendor_key AND store.store_key = alert.store_key WHERE stage.retailer_key = {retailerKey} """.format(osaSchema=osa_schema, targetTable=self.target_table_osa, cmnSchema=self.dct_sync_data["target_dw_schema"], importTable=self.dct_sync_data["target_dw_table"], retailerKey=retailer_key) self.logger.info( "SQL used to load data to related schema. %s" % sql) self.osa_dw_conn.execute(sql) self.logger.info("Processing feedback ended...") except Exception as e: self.logger.warning("Process data for RDP {0} failed: {1}".format( self._rdp_id, e)) raise finally: if self._debug.upper() == 'N': _drop_sql = "DROP TABLE IF EXISTS {schemaName}.{importTable};" \ .format(schemaName=self.dct_sync_data["target_dw_schema"], importTable=self.dct_sync_data["target_dw_table"]) self.logger.info(_drop_sql) self.osa_dw_conn.execute(_drop_sql)