def __init__(self, data_provider, output, kpi_static_data, geometric_kpi_flag=False, **data): self.k_engine = BaseCalculationsGroup(data_provider, output) self.data_provider = data_provider self.project_name = self.data_provider.project_name self.session_uid = self.data_provider.session_uid self.scif = self.data_provider[Data.SCENE_ITEM_FACTS] self.rds_conn = PSProjectConnector(self.project_name, DbUsers.CalcAdmin) self.scif = self.scif.merge(self.data_provider[Data.STORE_INFO], how='left', left_on='store_id', right_on='store_fk') self.match_display_in_scene = data.get('match_display_in_scene') self.all_products = self.data_provider[Data.ALL_PRODUCTS] self.survey_response = self.data_provider[Data.SURVEY_RESPONSES] self.kpi_static_data = kpi_static_data # self.get_atts() if geometric_kpi_flag: self.position_graph_data = CCUS_SANDPositionGraphs( self.data_provider) self.matches = self.position_graph_data.match_product_in_scene self.position_graph = self.position_graph_data.position_graphs else: self.position_graph_data = None self.matches = self.data_provider[Data.MATCHES] self.matches = self.matches.merge(self.match_display_in_scene, how='left', on=['scene_fk', 'bay_number'])
def __init__(self, data_provider, output, set_name=None): self.data_provider = data_provider self.output = output self.products = self.data_provider[Data.ALL_PRODUCTS] self.k_engine = BaseCalculationsGroup(data_provider, output) self.project_name = data_provider.project_name self.session_uid = self.data_provider.session_uid self.products = self.data_provider[Data.ALL_PRODUCTS] self.match_product_in_scene = self.data_provider[Data.MATCHES] self.templates = self.data_provider[Data.ALL_TEMPLATES] self.visit_date = self.data_provider[Data.VISIT_DATE] self.scenes_info = self.data_provider[Data.SCENES_INFO] self.rds_conn = PSProjectConnector(self.project_name, DbUsers.CalculationEng) self.session_info = SessionInfo(data_provider) self.store_id = self.data_provider[Data.STORE_FK] self.scif = self.data_provider[Data.SCENE_ITEM_FACTS] if set_name is None: self.set_name = self.get_set() else: self.set_name = set_name self.kpi_fetcher = CCRUFIFAQueries(self.project_name, self.scif, self.match_product_in_scene, self.set_name) self.survey_response = self.data_provider[Data.SURVEY_RESPONSES] self.sales_rep_fk = self.data_provider[ Data.SESSION_INFO]['s_sales_rep_fk'].iloc[0] self.session_fk = self.data_provider[Data.SESSION_INFO]['pk'].iloc[0] self.thresholds_and_results = {} self.result_df = [] self.kpi_results_queries = []
def __init__(self, data_provider, output, geometric_kpi_flag=False, **kwargs): self.k_engine = BaseCalculationsGroup(data_provider, output) self.data_provider = data_provider self.project_name = self.data_provider.project_name self.session_uid = self.data_provider.session_uid self.scif = self.data_provider[Data.SCENE_ITEM_FACTS] self.store_info = self.data_provider[Data.STORE_INFO] self.scif = self.scif.merge(self.data_provider[Data.STORE_INFO], how='left', left_on='store_id', right_on='store_fk') for sub in SUB: if sub in (SUB.keys()): self.scif = self.scif.rename(columns={sub: SUB.get(sub)}) for title in TITLE: if title in (self.scif.columns.unique().tolist()): self.scif = self.scif.rename(columns={title: TITLE.get(title)}) self.all_products = self.data_provider[Data.ALL_PRODUCTS] self.survey_response = self.data_provider[Data.SURVEY_RESPONSES] for data in kwargs.keys(): setattr(self, data, kwargs[data]) if geometric_kpi_flag: self.position_graph_data = KCUS_SANDPositionGraphs( self.data_provider) self.matches = self.position_graph_data.match_product_in_scene self.position_graph = self.position_graph_data.position_graphs else: self.position_graph_data = None self.matches = self.data_provider[Data.MATCHES]
def __init__(self, data_provider, output, rds_conn=None, ignore_stacking=False, front_facing=False, **kwargs): self.k_engine = BaseCalculationsGroup(data_provider, output) self.rds_conn = rds_conn self.data_provider = data_provider self.project_name = self.data_provider.project_name self.session_uid = self.data_provider.session_uid self.scif = self.data_provider[Data.SCENE_ITEM_FACTS] self.all_products = self.data_provider[Data.ALL_PRODUCTS] self.survey_response = self.data_provider[Data.SURVEY_RESPONSES] self.scenes_info = self.data_provider[Data.SCENES_INFO].merge( self.data_provider[Data.ALL_TEMPLATES], how='left', on='template_fk', suffixes=['', '_y']) self.ignore_stacking = ignore_stacking self.facings_field = 'facings' if not self.ignore_stacking else 'facings_ign_stack' self.front_facing = front_facing for data in kwargs.keys(): setattr(self, data, kwargs[data]) if self.front_facing: self.scif = self.scif[self.scif['front_face_count'] == 1]
def __init__(self, data_provider, output, **kwargs): self.k_engine = BaseCalculationsGroup(data_provider, output) self.data_provider = data_provider self.project_name = self.data_provider.project_name self.session_uid = self.data_provider.session_uid self.scif = self.data_provider[Data.SCENE_ITEM_FACTS] self.all_products = self.data_provider[Data.ALL_PRODUCTS] for data in kwargs.keys(): setattr(self, data, kwargs[data])
def __init__(self, data_provider, output, **kwargs): self.k_engine = BaseCalculationsGroup(data_provider, output) self.data_provider = data_provider self.project_name = self.data_provider.project_name self.session_uid = self.data_provider.session_uid self.scif = self.data_provider[Data.SCENE_ITEM_FACTS] self.all_products = self.data_provider[Data.ALL_PRODUCTS] self.survey_response = self.data_provider[Data.SURVEY_RESPONSES] self.amz_conn = StorageFactory.get_connector(BUCKET) self.templates_path = self.TEMPLATES_PATH + self.project_name + '/' self.local_templates_path = os.path.join(CACHE_PATH, 'templates') self.cloud_templates_path = '{}{}/{}'.format(self.TEMPLATES_PATH, self.project_name, {}) for data in kwargs.keys(): setattr(self, data, kwargs[data])
def __init__(self, data_provider, output): self.k_engine = BaseCalculationsGroup(data_provider, output) self.data_provider = data_provider self.project_name = self.data_provider.project_name self.session_uid = self.data_provider.session_uid self.products = self.data_provider[Data.PRODUCTS] self.all_products = self.data_provider[Data.ALL_PRODUCTS] self.match_product_in_scene = self.data_provider[Data.MATCHES] self.visit_date = self.data_provider[Data.VISIT_DATE] self.session_info = self.data_provider[Data.SESSION_INFO] self.rds_conn = PSProjectConnector(self.project_name, DbUsers.CalcAdmin) self.store_info = self.data_provider[Data.STORE_INFO] self.store_type = self.store_info['store_type'].values[0] self.scene_info = self.data_provider[Data.SCENES_INFO] self.store_id = self.data_provider[Data.STORE_FK] self.scif = self.data_provider[Data.SCENE_ITEM_FACTS] self.scif = self.scif.merge(self.data_provider[Data.STORE_INFO], how='left', left_on='store_id', right_on='store_fk') self.match_display_in_scene = self.get_match_display() self.scif = self.scif.merge(self.match_display_in_scene, how='left', left_on='scene_id', right_on='scene_fk') self.scif = self.scif.replace(['Display\r\n'],['Display']) # self.get_atts() self.set_templates_data = {} self.kpi_static_data = self.get_kpi_static_data() self.tools = ToolBox(self.data_provider, output, kpi_static_data=self.kpi_static_data, match_display_in_scene=self.match_display_in_scene) self.download_time = timedelta(0) self.kpi_results_queries = [] self.session_fk = self.data_provider[Data.SESSION_INFO]['pk'].iloc[0] self.survey_response = self.data_provider[Data.SURVEY_RESPONSES]
def __init__(self, data_provider, output): self.output = output self.data_provider = data_provider self.project_name = self.data_provider.project_name self.session_uid = self.data_provider.session_uid self.products = self.data_provider[Data.PRODUCTS] self.all_products = self.data_provider[Data.ALL_PRODUCTS] self.match_product_in_scene = self.data_provider[Data.MATCHES] self.visit_date = self.data_provider[Data.VISIT_DATE] self.session_info = self.data_provider[Data.SESSION_INFO] self.scene_info = self.data_provider[Data.SCENES_INFO] self.store_id = self.data_provider[Data.STORE_FK] self.scif = self.data_provider[Data.SCENE_ITEM_FACTS] self.rds_conn = PSProjectConnector(self.project_name, DbUsers.CalculationEng) self.tools = CUBAUCUBAUGENERALToolBox(self.data_provider, self.output, rds_conn=self.rds_conn) self.common = Common(self.data_provider) self.common_sos = SOS(self.data_provider, self.output) self.kpi_results_queries = [] self.k_engine = BaseCalculationsGroup(data_provider, output) self.store_type = data_provider.store_type self.matches_with_direction = self.get_match_product_in_scene_with_direction( )
def __init__(self, data_provider, output): self.output = output self.data_provider = data_provider self.common = Common(self.data_provider) self.commonV2 = CommonV2(self.data_provider) self.project_name = self.data_provider.project_name self.session_uid = self.data_provider.session_uid self.k_engine = BaseCalculationsGroup(data_provider, output) self.products = self.data_provider[Data.PRODUCTS] # self.all_products = self.data_provider[Data.ALL_PRODUCTS] self.match_product_in_scene = self.data_provider[Data.MATCHES] self.visit_date = self.data_provider[Data.VISIT_DATE] self.session_info = self.data_provider[Data.SESSION_INFO] self.scene_info = self.data_provider[Data.SCENES_INFO] self.store_id = self.data_provider[Data.STORE_FK] self.store_info = self.data_provider[Data.STORE_INFO] self.scif = self.data_provider[Data.SCENE_ITEM_FACTS] self.rds_conn = PSProjectConnector(self.project_name, DbUsers.CalculationEng) self.kpi_static_data = self.commonV2.get_kpi_static_data() self.kpi_results_queries = [] self.templates = {} self.all_products = self.commonV2.data_provider[Data.ALL_PRODUCTS] self.session_id = self.data_provider.session_id self.score_templates = {} self.get_templates() self.get_score_template() self.manufacturer_fk = self.all_products[ self.all_products['manufacturer_name'] == 'Coca Cola'].iloc[0] self.sos = SOS(self.data_provider, self.output) self.total_score = 0 self.session_fk = self.data_provider[Data.SESSION_INFO]['pk'].iloc[0] self.toolbox = GENERALToolBox(self.data_provider) self.scenes_info = self.data_provider[Data.SCENES_INFO] self.kpi_results_new_tables_queries = []
def __init__(self, data_provider, output): self.output = output self.data_provider = data_provider self.common = Common(self.data_provider) self.project_name = self.data_provider.project_name self.session_uid = self.data_provider.session_uid self.products = self.data_provider[Data.PRODUCTS] self.all_products = self.data_provider[Data.ALL_PRODUCTS] self.match_product_in_scene = self.data_provider[Data.MATCHES] self.visit_date = self.data_provider[Data.VISIT_DATE] self.session_info = self.data_provider[Data.SESSION_INFO] self.scene_info = self.data_provider[Data.SCENES_INFO] self.store_id = self.data_provider[Data.STORE_FK] self.scif = self.data_provider[Data.SCENE_ITEM_FACTS] self.rds_conn = PSProjectConnector(self.project_name, DbUsers.CalculationEng) self.kpi_static_data = self.common.get_kpi_static_data() self.kpi_results_queries = [] self.pepsico_fk = self.get_relevant_pk_by_name(Const.MANUFACTURER, Const.PEPSICO) self.k_engine = BaseCalculationsGroup(data_provider, output) self.categories_to_calculate = self.get_relevant_categories_for_session( ) self.toolbox = GENERALToolBox(data_provider) self.main_shelves = [ scene_type for scene_type in self.scif[Const.TEMPLATE_NAME].unique().tolist() if Const.MAIN_SHELF in scene_type ]
def __init__(self, data_provider, output, set_name=None): self.k_engine = BaseCalculationsGroup(data_provider, output) self.data_provider = data_provider self.output = output self.products = self.data_provider[Data.ALL_PRODUCTS] self.all_products = self.data_provider[Data.ALL_PRODUCTS] self.project_name = data_provider.project_name self.session_uid = self.data_provider.session_uid self.products = self.data_provider[Data.ALL_PRODUCTS] self.match_product_in_scene = self.data_provider[Data.MATCHES] self.templates = self.data_provider[Data.ALL_TEMPLATES] self.visit_date = self.data_provider[Data.VISIT_DATE] self.scenes_info = self.data_provider[Data.SCENES_INFO] self.rds_conn = PSProjectConnector(self.project_name, DbUsers.CalculationEng) self.session_info = SessionInfo(data_provider) self.store_id = self.data_provider[Data.STORE_FK] self.store_data = self.data_provider[Data.STORE_INFO] self.scif = self.data_provider[Data.SCENE_ITEM_FACTS] self.scif = self.scif.merge(self.data_provider[Data.STORE_INFO], how='left', left_on='store_id', right_on='store_fk') for sub in SUB: if sub in (SUB.keys()): self.scif = self.scif.rename(columns={sub: SUB.get(sub)}) for title in TITLE: if title in (self.scif.columns.unique().tolist()): self.scif = self.scif.rename(columns={title: TITLE.get(title)}) # self.generaltoolbox = KCUSGENERALToolBox(data_provider, output, geometric_kpi_flag=True) # self.scif = self.scif.replace(' ', '', regex=True) self.set_name = set_name self.kpi_fetcher = KCUSFetcher(self.project_name, self.scif, self.match_product_in_scene, self.set_name, self.products, self.session_uid) self.all_template_data = parse_template(TEMPLATE_PATH, "Simple KPI's") self.survey_response = self.data_provider[Data.SURVEY_RESPONSES] self.sales_rep_fk = self.data_provider[ Data.SESSION_INFO]['s_sales_rep_fk'].iloc[0] self.session_fk = self.data_provider[Data.SESSION_INFO]['pk'].iloc[0] self.store_type = self.data_provider[ Data.STORE_INFO]['store_type'].iloc[0] self.region = self.data_provider[ Data.STORE_INFO]['region_name'].iloc[0] self.thresholds_and_results = {} self.result_df = [] self.writing_to_db_time = datetime.timedelta(0) self.kpi_results_queries = [] self.potential_products = {} self.shelf_square_boundaries = {} self.average_shelf_values = {} self.kpi_static_data = self.get_kpi_static_data() self.ignore_stacking = False self.facings_field = 'facings' if not self.ignore_stacking else 'facings_ign_stack' self.max_shelf_of_bay = [] self.INCLUDE_FILTER = 1 self.MM_TO_FEET_CONVERSION = MM_TO_FEET_CONVERSION self.EXCLUDE_EMPTY = True
def __init__(self, data_provider, output): self.data_provider = data_provider self.output = output self.products = self.data_provider[Data.ALL_PRODUCTS] self.k_engine = BaseCalculationsGroup(data_provider, output) self.scenes_info = self.data_provider[Data.SCENES_INFO] self.rds_conn = PSProjectConnector(self.data_provider.project_name, DbUsers.CalculationEng)
def __init__(self, data_provider, output, kpi_static_data, geometric_kpi_flag=False): self.k_engine = BaseCalculationsGroup(data_provider, output) self.data_provider = data_provider self.project_name = self.data_provider.project_name self.session_uid = self.data_provider.session_uid self.scif = self.data_provider[Data.SCENE_ITEM_FACTS] self.all_products = self.data_provider[Data.ALL_PRODUCTS] self.survey_response = self.data_provider[Data.SURVEY_RESPONSES] self.kpi_static_data = kpi_static_data if geometric_kpi_flag: self.position_graphs = PositionGraphs(self.data_provider) self.matches = self.position_graphs.match_product_in_scene else: self.position_graphs = None self.matches = self.data_provider[Data.MATCHES]
def __init__(self, data_provider, output, **data): self.k_engine = BaseCalculationsGroup(data_provider, output) self.data_provider = data_provider self.project_name = self.data_provider.project_name self.session_uid = self.data_provider.session_uid self.scif = self.data_provider[Data.SCENE_ITEM_FACTS] self.all_products = self.data_provider[Data.ALL_PRODUCTS] self.survey_response = self.data_provider[Data.SURVEY_RESPONSES] self.cloud_templates_path = '{}{}/{}'.format(self.TEMPLATES_PATH, self.project_name, {}) self.local_templates_path = os.path.join(CACHE_PATH, 'templates')
def __init__(self, data_provider, output, set_name=None): self.k_engine = BaseCalculationsGroup(data_provider, output) self.data_provider = data_provider self.output = output self.products = self.data_provider[Data.ALL_PRODUCTS] self.all_products = self.data_provider[Data.ALL_PRODUCTS] self.project_name = data_provider.project_name self.session_uid = self.data_provider.session_uid self.products = self.data_provider[Data.ALL_PRODUCTS] self.match_product_in_scene = self.data_provider[Data.MATCHES] self.templates = self.data_provider[Data.ALL_TEMPLATES] self.visit_date = self.data_provider[Data.VISIT_DATE] self.scenes_info = self.data_provider[Data.SCENES_INFO] self.rds_conn = PSProjectConnector(self.project_name, DbUsers.CalculationEng) self.session_info = SessionInfo(data_provider) self.store_id = self.data_provider[Data.STORE_FK] self.store_data = self.data_provider[Data.STORE_INFO] self.scif = self.data_provider[Data.SCENE_ITEM_FACTS] self.scif = self.scif.merge(self.data_provider[Data.STORE_INFO], how='left', left_on='store_id', right_on='store_fk') for sub in SUB: if sub in (SUB.keys()): self.scif = self.scif.rename(columns={sub: SUB.get(sub)}) for title in TITLE: if title in (self.scif.columns.unique().tolist()): self.scif = self.scif.rename(columns={title: TITLE.get(title)}) self.generaltoolbox = KCUS_SANDGENERALToolBox(data_provider, output, geometric_kpi_flag=True) # self.scif = self.scif.replace(' ', '', regex=True) self.set_name = set_name self.kpi_fetcher = KCUS_SANDFetcher(self.project_name, self.scif, self.match_product_in_scene, self.set_name, self.products, self.session_uid) self.survey_response = self.data_provider[Data.SURVEY_RESPONSES] self.sales_rep_fk = self.data_provider[ Data.SESSION_INFO]['s_sales_rep_fk'].iloc[0] self.session_fk = self.data_provider[Data.SESSION_INFO]['pk'].iloc[0] self.store_type = self.data_provider[ Data.STORE_INFO]['store_type'].iloc[0] self.region = self.data_provider[ Data.STORE_INFO]['region_name'].iloc[0] self.thresholds_and_results = {} self.result_df = [] self.writing_to_db_time = datetime.timedelta(0) # self.match_product_in_probe_details = self.kpi_fetcher.get_match_product_in_probe_details(self.session_uid) self.kpi_results_queries = [] # self.position_graphs = MARSRU_PRODPositionGraphs(self.data_provider) self.potential_products = {} self.shelf_square_boundaries = {}
def __init__(self, data_provider, output): self.k_engine = BaseCalculationsGroup(data_provider, output) self.data_provider = data_provider self.project_name = data_provider.project_name self.output = output self.session_uid = self.data_provider.session_uid self.visit_date = self.data_provider[Data.VISIT_DATE] self.rds_conn = PSProjectConnector(self.project_name, DbUsers.CalculationEng) self.session_info = SessionInfo(data_provider) self.store_id = self.data_provider[Data.STORE_FK] self.tool_box = DIAGEOGRSANDToolBox(self.data_provider, self.output)
def __init__(self, data_provider, output): #All relevant session data with KPI static info will trigger the KPI calculation self.k_engine = BaseCalculationsGroup(data_provider, output) self.data_provider = data_provider self.project_name = data_provider.project_name self.output = output self.session_uid = self.data_provider.session_uid self.visit_date = self.data_provider[Data.VISIT_DATE] self.rds_conn = PSProjectConnector(self.project_name, DbUsers.CalculationEng) self.session_info = SessionInfo(data_provider) self.store_id = self.data_provider[Data.STORE_FK] self.tool_box = INTEG4KPIToolBox(self.data_provider, self.output, FT) self.results = {}
def __init__(self, data_provider, output, **data): self.k_engine = BaseCalculationsGroup(data_provider, output) self.data_provider = data_provider self.project_name = self.data_provider.project_name self.session_uid = self.data_provider.session_uid self.scif = self.data_provider[Data.SCENE_ITEM_FACTS] self.all_products = self.data_provider[Data.ALL_PRODUCTS] self.survey_response = self.data_provider[Data.SURVEY_RESPONSES] self.kpi_static_data = data.get('kpi_static_data') self.match_display_in_scene = data.get('match_display_in_scene') self.general_tools = CCBOTTLERSUS_SANDBCIGENERALToolBox(data_provider, output, self.kpi_static_data, geometric_kpi_flag=True) self.amz_conn = StorageFactory.get_connector(BUCKET) self.templates_path = self.TEMPLATES_PATH + self.project_name + '/' self.cloud_templates_path = '{}{}/{}'.format(self.TEMPLATES_PATH, self.project_name, {}) self.local_templates_path = os.path.join(CACHE_PATH, 'templates')
def __init__(self, data_provider, output, set_name=None): self.data_provider = data_provider self.output = output self.products = self.data_provider[Data.ALL_PRODUCTS] self.k_engine = BaseCalculationsGroup(data_provider, output) self.project_name = data_provider.project_name self.session_uid = self.data_provider.session_uid self.products = self.data_provider[Data.ALL_PRODUCTS] self.match_product_in_scene = self.data_provider[Data.MATCHES] self.visit_date = self.data_provider[Data.VISIT_DATE] self.session_info = SessionInfo(data_provider) self.scif = self.data_provider[Data.SCENE_ITEM_FACTS] if not self.scif.empty: self.kpi_fetcher = CARREFOUR_ARKPIFetcher(self.project_name, self.scif, self.match_product_in_scene) self.store_number = self.kpi_fetcher.get_store_number() self.session_fk = self.data_provider[Data.SESSION_INFO]['pk'].iloc[0] self.set_name = set_name self.results_columns = ['session_id', 'product_fk', 'qty_in_store_stock', 'qty_in_dc', 'on_shelf', 'oos_but_in_store_stock', 'oos_but_in_dc', 'full_oos', 'sales_price'] self.results = pd.DataFrame([], columns=self.results_columns) self.results_list = []
def __init__(self, data_provider, output): self.output = output self.data_provider = data_provider self.common = Common(self.data_provider) self.common_v1 = CommonV1(self.data_provider) self.project_name = self.data_provider.project_name self.session_uid = self.data_provider.session_uid self.products = self.data_provider[Data.PRODUCTS] self.all_products = self.data_provider[Data.ALL_PRODUCTS] self.match_product_in_scene = self.data_provider[Data.MATCHES] self.visit_date = self.data_provider[Data.VISIT_DATE] self.session_info = self.data_provider[Data.SESSION_INFO] self.scene_info = self.data_provider[Data.SCENES_INFO] self.store_id = self.data_provider[Data.STORE_FK] self.store_info = self.data_provider[Data.STORE_INFO] self.visit_type = self.store_info[ Const.ADDITIONAL_ATTRIBUTE_2].values[0] self.all_templates = self.data_provider[Data.ALL_TEMPLATES] self.scif = self.data_provider[Data.SCENE_ITEM_FACTS] self.scif = self.scif.loc[~(self.scif[Const.PRODUCT_TYPE] == Const.IRRELEVANT)] # Vitaly's request self.rds_conn = PSProjectConnector(self.project_name, DbUsers.CalculationEng) self.kpi_static_data = self.common.get_kpi_static_data() self.kpi_results_queries = [] self.k_engine = BaseCalculationsGroup(data_provider, output) self.toolbox = GENERALToolBox(data_provider) self.assortment = Assortment(self.data_provider, self.output, common=self.common_v1) if not self.scif.empty: self.pepsico_fk = self.get_relevant_pk_by_name( Const.MANUFACTURER, Const.PEPSICO) self.categories_to_calculate = self.get_relevant_categories_for_session( ) self.main_shelves = self.get_main_shelves()
class CCRUFIFAKPIToolBox: def __init__(self, data_provider, output, set_name=None): self.data_provider = data_provider self.output = output self.products = self.data_provider[Data.ALL_PRODUCTS] self.k_engine = BaseCalculationsGroup(data_provider, output) self.project_name = data_provider.project_name self.session_uid = self.data_provider.session_uid self.products = self.data_provider[Data.ALL_PRODUCTS] self.match_product_in_scene = self.data_provider[Data.MATCHES] self.templates = self.data_provider[Data.ALL_TEMPLATES] self.visit_date = self.data_provider[Data.VISIT_DATE] self.scenes_info = self.data_provider[Data.SCENES_INFO] self.rds_conn = PSProjectConnector(self.project_name, DbUsers.CalculationEng) self.session_info = SessionInfo(data_provider) self.store_id = self.data_provider[Data.STORE_FK] self.scif = self.data_provider[Data.SCENE_ITEM_FACTS] if set_name is None: self.set_name = self.get_set() else: self.set_name = set_name self.kpi_fetcher = CCRUFIFAQueries(self.project_name, self.scif, self.match_product_in_scene, self.set_name) self.survey_response = self.data_provider[Data.SURVEY_RESPONSES] self.sales_rep_fk = self.data_provider[ Data.SESSION_INFO]['s_sales_rep_fk'].iloc[0] self.session_fk = self.data_provider[Data.SESSION_INFO]['pk'].iloc[0] self.thresholds_and_results = {} self.result_df = [] self.kpi_results_queries = [] def change_set(self, set_name): self.set_name = set_name self.kpi_fetcher = CCRUFIFAQueries(self.project_name, self.scif, self.match_product_in_scene, self.set_name) def get_static_list(self, type): object_static_list = [] if type == 'SKUs': object_static_list = self.products[ 'product_ean_code'].values.tolist() elif type == 'CAT' or type == 'MAN in CAT': object_static_list = self.products['category'].values.tolist() elif type == 'BRAND': object_static_list = self.products['brand_name'].values.tolist() elif type == 'MAN': object_static_list = self.products[ 'manufacturer_name'].values.tolist() else: Log.warning( 'The type {} does not exist in the data base'.format(type)) return object_static_list def insert_new_kpis_old(self, project, kpi_list=None): """ This function inserts KPI metadata to static tables """ session = OrmSession(project, writable=True) try: voting_process_pk_dic = {} with session.begin(subtransactions=True): for kpi in kpi_list.values()[0]: if kpi.get('To include in first calculation?') == 4: Log.info('Trying to write KPI {}'.format( kpi.get('KPI name Eng'))) # # kpi_level_1_hierarchy = pd.DataFrame(data=[('Canteen', None, None, 'WEIGHTED_AVERAGE', # # 1, '2016-11-28', None, None)], # # columns=['name', 'short_name', 'eng_name', 'operator', # # 'version', 'valid_from', 'valid_until', 'delete_date']) # # self.output.add_kpi_hierarchy(Keys.KPI_LEVEL_1, kpi_level_1_hierarchy) # if kpi.get('level') == 2: # kpi_level_2_hierarchy = pd.DataFrame(data=[ # (1, kpi.get('KPI Name ENG'), None, None, None, None, kpi.get('weight'), 1, '2016-12-25', None, None)], # columns=['kpi_level_1_fk', 'name', 'short_name', 'eng_name', 'operator', # 'score_func', 'original_weight', 'version', 'valid_from', 'valid_until', # 'delete_date']) # self.output.add_kpi_hierarchy(Keys.KPI_LEVEL_2, kpi_level_2_hierarchy) # elif kpi.get('level') == 3: # kpi_level_3_hierarchy = pd.DataFrame(data=[(1, kpi.get('KPI Name ENG'), None, None, None, # None, kpi.get('weight'), 1, '2016-12-25', None, None)], # columns=['kpi_level_2_fk', 'name', 'short_name', 'eng_name', 'operator', # 'score_func', 'original_weight', 'version', 'valid_from', # 'valid_until', 'delete_date']) # self.output.add_kpi_hierarchy(Keys.KPI_LEVEL_3, kpi_level_3_hierarchy) # else: # Log.info('No KPIs to insert') # self.data_provider.export_kpis_hierarchy(self.output) # insert_trans = """ # INSERT INTO static.kpi_level_1 (name, # operator, version, valid_from) # VALUES ('{0}', '{1}', '{2}', '{3}');""".format('test', 'WEIGHTED_AVERAGE', 1, # '2016-11-28') # insert_trans_level1 = """ # INSERT INTO static.kpi_set (name, # missing_kpi_score, enable, normalize_weight, expose_to_api, is_in_weekly_report) # VALUES ('{0}', '{1}', '{2}', '{3}', '{4}', '{5}');""".format('Hypermarket', 'Bad', # 'Y', 'N', 'N', 'N') # Log.get_logger().debug(insert_trans_level1) # result = session.execute(insert_trans_level1) insert_trans_level2 = """ INSERT INTO static.kpi (kpi_set_fk, logical_operator, weight, display_text) VALUES ('{0}', '{1}', '{2}', '{3}');""".format( 34, kpi.get('Logical Operator'), kpi.get('KPI Weight'), kpi.get('KPI name Eng')) # # # # # # # # insert_trans = """ # # # # # UPDATE static.kpi_level_1 SET short_name=null, eng_name=null, valid_until=null, delete_date=null # # # # # WHERE pk=1;""" # Log.get_logger().debug(insert_trans_level2) result = session.execute(insert_trans_level2) kpi_fk = result.lastrowid insert_trans_level3 = """ INSERT INTO static.atomic_kpi (kpi_fk, name, description, display_text, presentation_order, display) VALUES ('{0}', '{1}', '{2}', '{3}', '{4}', '{5}');""".format( kpi_fk, kpi.get('KPI name Eng'), kpi.get('KPI name Eng'), kpi.get('KPI name Eng'), 1, 'Y') Log.get_logger().debug(insert_trans_level3) result = session.execute(insert_trans_level3) # voting_process_pk = result.lastrowid # voting_process_pk_dic[kpi] = voting_process_pk # Log.info('KPI level 1 was inserted to the DB') # Log.info('Inserted voting process {} in project {} SQL DB'.format(voting_process_pk, project)) # voting_session_fk = self.insert_production_session(voting_process_pk, kpi, session) # self.insert_production_tag(voting_process_pk, voting_session_fk, kpi, session) session.close() # return voting_process_pk_dic return except Exception as e: Log.error( 'Caught exception while inserting new voting process to SQL: {}' .format(str(e))) return -1 def insert_new_kpis(self, project, kpi_list): """ This function is used to insert KPI metadata to the new tables, and currently not used """ for kpi in kpi_list.values()[0]: if kpi.get('To include in first calculation?') == 7: # kpi_level_1_hierarchy = pd.DataFrame(data=[('Canteen', None, None, 'WEIGHTED_AVERAGE', # 1, '2016-11-28', None, None)], # columns=['name', 'short_name', 'eng_name', 'operator', # 'version', 'valid_from', 'valid_until', 'delete_date']) # self.output.add_kpi_hierarchy(Keys.KPI_LEVEL_1, kpi_level_1_hierarchy) if kpi.get('level') == 2: kpi_level_2_hierarchy = pd.DataFrame( data=[(3, kpi.get('KPI name Eng'), None, None, None, kpi.get('score_func'), kpi.get('KPI Weight'), 1, '2016-12-01', None, None)], columns=[ 'kpi_level_1_fk', 'name', 'short_name', 'eng_name', 'operator', 'score_func', 'original_weight', 'version', 'valid_from', 'valid_until', 'delete_date' ]) self.output.add_kpi_hierarchy(Keys.KPI_LEVEL_2, kpi_level_2_hierarchy) elif kpi.get('level') == 3: kpi_level_3_hierarchy = pd.DataFrame( data=[(82, kpi.get('KPI Name'), None, None, 'PRODUCT AVAILABILITY', None, kpi.get('KPI Weight'), 1, '2016-12-25', None, None)], columns=[ 'kpi_level_2_fk', 'name', 'short_name', 'eng_name', 'operator', 'score_func', 'original_weight', 'version', 'valid_from', 'valid_until', 'delete_date' ]) self.output.add_kpi_hierarchy(Keys.KPI_LEVEL_3, kpi_level_3_hierarchy) self.data_provider.export_kpis_hierarchy(self.output) else: Log.info('No KPIs to insert') def check_number_of_facings_given_answer_to_survey(self, params): set_total_res = 0 for p in params.values()[0]: if p.get( 'Formula' ) != "number of facings given answer to survey" or not p.get( "children"): continue kpi_fk = self.kpi_fetcher.get_kpi_fk(p.get('KPI name Eng')) first_atomic_score = 0 children = map(int, p.get("children").split(", ")) for c in params.values()[0]: if c.get("KPI ID") in children and c.get( "Formula") == "atomic answer to survey": first_atomic_score = self.check_answer_to_survey_level3(c) # saving to DB attributes_for_level3 = self.create_attributes_for_level3_df( c, first_atomic_score, kpi_fk) self.write_to_db_result(attributes_for_level3, 'level3') second_atomic_res = 0 for c in params.values()[0]: if c.get("KPI ID") in children and c.get( "Formula") == "atomic number of facings": second_atomic_res = self.calculate_availability(c) second_atomic_score = self.calculate_score( second_atomic_res, c) # write to DB attributes_for_level3 = self.create_attributes_for_level3_df( c, second_atomic_score, kpi_fk) self.write_to_db_result(attributes_for_level3, 'level3') if first_atomic_score > 0: kpi_total_res = second_atomic_res else: kpi_total_res = 0 score = self.calculate_score(kpi_total_res, p) set_total_res += score * p.get('KPI Weight') # saving to DB attributes_for_level2 = self.create_attributes_for_level2_df( p, score, kpi_fk) self.write_to_db_result(attributes_for_level2, 'level2') return set_total_res def check_answer_to_survey_level3(self, params): d = {'Yes': u'Да', 'No': u'Нет'} score = 0 survey_data = self.survey_response.loc[ self.survey_response['question_text'] == params.get('Values')] if not survey_data['selected_option_text'].empty: result = survey_data['selected_option_text'].values[0] targets = [ d.get(target) if target in d.keys() else target for target in unicode(params.get('Target')).split(", ") ] if result in targets: score = 100 else: score = 0 elif not survey_data['number_value'].empty: result = survey_data['number_value'].values[0] if result == params.get('Target'): score = 100 else: score = 0 else: Log.warning('No survey data for this session') return score def check_availability(self, params): """ This function is used to calculate availability given a set pf parameters """ set_total_res = 0 availability_types = ['SKUs', 'BRAND', 'MAN', 'CAT', 'MAN in CAT'] formula_types = ['number of SKUs', 'number of facings'] for p in params.values()[0]: if p.get('Type') not in availability_types or p.get( 'Formula') not in formula_types: continue if p.get('level') != 2: continue is_atomic = False kpi_total_res = 0 kpi_fk = self.kpi_fetcher.get_kpi_fk(p.get('KPI name Eng')) if p.get('children') is not None: is_atomic = True children = [ int(child) for child in str(p.get('children')).split(', ') ] atomic_scores = [] for child in params.values()[0]: if child.get('KPI ID') in children: if child.get( 'children') is not None: # atomic of atomic atomic_score = 0 atomic_children = [ int(a_child) for a_child in str( child.get('children')).split(', ') ] for atomic_child in params.values()[0]: if atomic_child.get( 'KPI ID') in atomic_children: atomic_child_res = self.calculate_availability( atomic_child) atomic_child_score = self.calculate_score( atomic_child_res, atomic_child) atomic_score += atomic_child.get( 'additional_weight', 1.0 / len(atomic_children) ) * atomic_child_score else: atomic_res = self.calculate_availability(child) atomic_score = self.calculate_score( atomic_res, child) # write to DB attributes_for_table3 = self.create_attributes_for_level3_df( child, atomic_score, kpi_fk) self.write_to_db_result(attributes_for_table3, 'level3', kpi_fk) if p.get('Logical Operator') in ('OR', 'AND', 'MAX'): atomic_scores.append(atomic_score) elif p.get('Logical Operator') == 'SUM': kpi_total_res += child.get( 'additional_weight', 1 / len(children)) * atomic_score if p.get('Logical Operator') == 'OR': if len([sc for sc in atomic_scores if sc > 0]) > 0: score = 100 else: score = 0 elif p.get('Logical Operator') == 'AND': if 0 not in atomic_scores: score = 100 else: score = 0 elif p.get('Logical Operator') == 'SUM': score = kpi_total_res / 100.0 if score < p.get('score_min', 0): score = 0 elif score > p.get('score_max', 1): score = p.get('score_max', 1) score *= 100 elif p.get('Logical Operator') == 'MAX': if atomic_scores: score = max(atomic_scores) if not ((score > p.get('score_min', 0) * 100) and (score < p.get('score_max', 1) * 100)): score = 0 else: score = 0 else: kpi_total_res = self.calculate_availability(p) score = self.calculate_score(kpi_total_res, p) # Saving to old tables attributes_for_table2 = self.create_attributes_for_level2_df( p, score, kpi_fk) self.write_to_db_result(attributes_for_table2, 'level2', kpi_fk) if not is_atomic: # saving also to level3 in case this KPI has only one level attributes_for_table3 = self.create_attributes_for_level3_df( p, score, kpi_fk) self.write_to_db_result(attributes_for_table3, 'level3', kpi_fk) set_total_res += score * p.get('KPI Weight') return set_total_res def calculate_availability(self, params, scenes=[]): values_list = str(params.get('Values')).split(', ') # object_static_list = self.get_static_list(params.get('Type')) if not scenes: scenes = self.get_relevant_scenes(params) if params.get("Form Factor"): form_factors = [ str(form_factor) for form_factor in params.get("Form Factor").split(", ") ] else: form_factors = [] if params.get("Size"): sizes = [ float(size) for size in str(params.get('Size')).split(", ") ] sizes = [ int(size) if int(size) == size else size for size in sizes ] else: sizes = [] if params.get("Products to exclude"): products_to_exclude = [int(float(product)) for product in \ str(params.get("Products to exclude")).split(", ")] else: products_to_exclude = [] if params.get("Form factors to exclude"): form_factors_to_exclude = str( params.get("Form factors to exclude")).split(", ") else: form_factors_to_exclude = [] object_facings = self.kpi_fetcher.get_object_facings( scenes, values_list, params.get('Type'), formula=params.get('Formula'), shelves=params.get("shelf_number", None), size=sizes, form_factor=form_factors, products_to_exclude=products_to_exclude, form_factors_to_exclude=form_factors_to_exclude) return object_facings def get_relevant_scenes(self, params): all_scenes = self.scenes_info['scene_fk'].unique().tolist() filtered_scenes = [] scenes_data = {} location_data = {} sub_location_data = {} for scene in all_scenes: scene_type = list(self.scif.loc[self.scif['scene_id'] == scene] ['template_name'].values) if scene_type: scene_type = scene_type[0] if scene_type not in scenes_data.keys(): scenes_data[scene_type] = [] scenes_data[scene_type].append(scene) filtered_scenes.append(scene) else: Log.warning( 'Scene {} is not defined in reporting.scene_item_facts table' .format(scene)) continue location = list(self.scif.loc[self.scif['scene_id'] == scene] ['location_type'].values) if location: location = location[0] if location not in location_data.keys(): location_data[location] = [] location_data[location].append(scene) sub_location = list( self.scif.loc[self.scif['template_name'] == scene_type]['additional_attribute_2'].values) if sub_location: sub_location = sub_location[0] if sub_location not in sub_location_data.keys(): sub_location_data[sub_location] = [] sub_location_data[sub_location].append(scene) include_list = [] if not params.get('Scenes to include') and not params.get('Locations to include') and \ not params.get('Sub locations to include'): include_list.extend(filtered_scenes) else: if params.get('Scenes to include'): scenes_to_include = params.get('Scenes to include').split(', ') for scene in scenes_to_include: if scene in scenes_data.keys(): include_list.extend(scenes_data[scene]) if params.get('Locations to include'): locations_to_include = params.get( 'Locations to include').split(', ') for location in locations_to_include: if location in location_data.keys(): include_list.extend(location_data[location]) if params.get('Sub locations to include'): sub_locations_to_include = str( params.get('Sub locations to include')).split(', ') for sub_location in sub_locations_to_include: if sub_location in sub_location_data.keys(): include_list.extend(sub_location_data[sub_location]) include_list = list(set(include_list)) exclude_list = [] if params.get('Scenes to exclude'): scenes_to_exclude = params.get('Scenes to exclude').split(', ') for scene in scenes_to_exclude: if scene in scenes_data.keys(): exclude_list.extend(scenes_data[scene]) if params.get('Locations to exclude'): locations_to_exclude = params.get('Locations to exclude').split( ', ') for location in locations_to_exclude: if location in location_data.keys(): exclude_list.extend(location_data[location]) if params.get('Sub locations to exclude'): sub_locations_to_exclude = str( params.get('Sub locations to exclude')).split(', ') for sub_location in sub_locations_to_exclude: if sub_location in sub_location_data.keys(): exclude_list.extend(sub_location_data[sub_location]) exclude_list = list(set(exclude_list)) relevant_scenes = [] for scene in include_list: if scene not in exclude_list: relevant_scenes.append(scene) return relevant_scenes def check_number_of_scenes(self, params): """ This function is used to calculate number of scenes """ set_total_res = 0 for p in params.values()[0]: if p.get('Formula') != 'number of scenes': continue kpi_total_res = 0 scenes = self.get_relevant_scenes(p) if p.get('Type') == 'SCENES': values_list = [str(s) for s in p.get('Values').split(', ')] for scene in scenes: try: scene_type = self.scif.loc[ self.scif['scene_id'] == scene]['template_name'].values[0] if scene_type in values_list: res = 1 else: res = 0 kpi_total_res += res except IndexError as e: continue else: # checking for number of scenes with a complex condition (only certain products/brands/etc) p_copy = p.copy() p_copy["Formula"] = "number of facings" for scene in scenes: if self.calculate_availability(p_copy, scenes=[scene]) > 0: res = 1 else: res = 0 kpi_total_res += res score = self.calculate_score(kpi_total_res, p) set_total_res += score * p.get('KPI Weight') kpi_fk = self.kpi_fetcher.get_kpi_fk(p.get('KPI name Eng')) if p.get('level') == 2: attributes_for_level2 = self.create_attributes_for_level2_df( p, score, kpi_fk) self.write_to_db_result(attributes_for_level2, 'level2', kpi_fk) attributes_for_level3 = self.create_attributes_for_level3_df( p, score, kpi_fk) self.write_to_db_result(attributes_for_level3, 'level3', kpi_fk) return set_total_res def check_number_of_doors(self, params): set_total_res = 0 for p in params.values()[0]: if p.get('Type') != 'DOORS' or p.get( 'Formula') != 'number of doors': continue kpi_total_res = self.calculate_number_of_doors(p) score = self.calculate_score(kpi_total_res, p) set_total_res += score * p.get('KPI Weight') # writing to DB kpi_fk = self.kpi_fetcher.get_kpi_fk(p.get('KPI name Eng')) attributes_for_level3 = self.create_attributes_for_level3_df( p, score, kpi_fk) self.write_to_db_result(attributes_for_level3, 'level3', kpi_fk) if p.get('level') == 2: attributes_for_level2 = self.create_attributes_for_level2_df( p, score, kpi_fk) self.write_to_db_result(attributes_for_level2, 'level2', kpi_fk) return set_total_res def calculate_number_of_doors(self, params): total_res = 0 relevant_scenes = self.get_relevant_scenes(params) for scene in relevant_scenes: res = 0 scene_type = self.scif.loc[self.scif['scene_id'] == scene]['template_name'].values[0] num_of_doors = self.templates[ self.templates['template_name'] == scene_type]['additional_attribute_1'].values[0] if num_of_doors is not None: res = float(num_of_doors) total_res += res return total_res def check_survey_answer(self, params): """ This function is used to calculate survey answer according to given target """ set_total_res = 0 d = {'Yes': u'Да', 'No': u'Нет'} for p in params.values()[0]: kpi_total_res = 0 score = 0 # default score if p.get('Type') != 'SURVEY' or p.get( 'Formula') != 'answer for survey': continue survey_data = self.survey_response.loc[ self.survey_response['question_text'] == p.get('Values')] if not survey_data['selected_option_text'].empty: result = survey_data['selected_option_text'].values[0] targets = [ d.get(target) if target in d.keys() else target for target in unicode(p.get('Target')).split(", ") ] if result in targets: score = 100 else: score = 0 elif not survey_data['number_value'].empty: result = survey_data['number_value'].values[0] if result == p.get('Target'): score = 100 else: score = 0 else: Log.warning('No survey data for this session') set_total_res += score * p.get('KPI Weight') # score = self.calculate_score(kpi_total_res, p) if p.get('level' ) == 3: # todo should be a separate generic function # level3_output = {'result': d.get(result), 'score': score, # 'target': p.get('Target'), 'weight': p.get('KPI Weight'), # 'kpi_name': p.get('KPI name Eng')} # self.output.add_kpi_results(Keys.KPI_LEVEL_3_RESULTS, self.convert_kpi_level_3(level3_output)) kpi_fk = self.kpi_fetcher.get_kpi_fk(p.get('KPI name Eng')) attributes_for_level3 = self.create_attributes_for_level3_df( p, score, kpi_fk) self.write_to_db_result(attributes_for_level3, 'level3') elif p.get('level') == 2: # level2_output = {'result': d.get(result), 'score': score, # 'target': p.get('Target'), 'weight': p.get('KPI Weight'), # 'kpi_name': p.get('KPI name Eng')} # self.output.add_kpi_results(Keys.KPI_LEVEL_2_RESULTS, self.convert_kpi_level_2(level2_output)) kpi_fk = self.kpi_fetcher.get_kpi_fk(p.get('KPI name Eng')) attributes_for_level3 = self.create_attributes_for_level3_df( p, score, kpi_fk) self.write_to_db_result(attributes_for_level3, 'level3') attributes_for_level2 = self.create_attributes_for_level2_df( p, score, kpi_fk) self.write_to_db_result(attributes_for_level2, 'level2') else: Log.warning('No level indicated for this KPI') return set_total_res def facings_sos(self, params): """ This function is used to calculate facing share of shelf """ set_total_res = 0 for p in params.values()[0]: if (p.get('Type') == 'MAN in CAT' or p.get('Type') == 'MAN') and \ p.get('Formula') in ['sos', 'sos with empty']: ratio = self.calculate_facings_sos(p) else: continue score = self.calculate_score(ratio, p) set_total_res += score * p.get('KPI Weight') # saving to DB kpi_fk = self.kpi_fetcher.get_kpi_fk(p.get('KPI name Eng')) attributes_for_level3 = self.create_attributes_for_level3_df( p, score, kpi_fk) self.write_to_db_result(attributes_for_level3, 'level3') if p.get('level') == 2: attributes_for_level2 = self.create_attributes_for_level2_df( p, score, kpi_fk) self.write_to_db_result(attributes_for_level2, 'level2') return set_total_res def calculate_facings_sos(self, params): relevant_scenes = self.get_relevant_scenes(params) if params.get('Formula') == 'sos with empty': if params.get('Type') == 'MAN': pop_filter = (self.scif['scene_id'].isin(relevant_scenes)) subset_filter = (self.scif[Fd.M_NAME].isin( self.kpi_fetcher.TCCC)) elif params.get('Type') == 'MAN in CAT': pop_filter = ((self.scif[Fd.CAT].isin(params.get('Values'))) & (self.scif['scene_id'].isin(relevant_scenes))) subset_filter = (self.scif[self.scif[Fd.M_NAME].isin( self.kpi_fetcher.TCCC)]) else: return 0 else: if params.get('Type') == 'MAN': pop_filter = ((self.scif['scene_id'].isin(relevant_scenes)) & (~self.scif['product_type'].isin(['Empty']))) subset_filter = (self.scif[Fd.M_NAME].isin( self.kpi_fetcher.TCCC)) elif params.get('Type') == 'MAN in CAT': pop_filter = ((self.scif[Fd.CAT].isin(params.get('Values'))) & (self.scif['scene_id'].isin(relevant_scenes)) & (~self.scif['product_type'].isin(['Empty']))) subset_filter = (self.scif[self.scif[Fd.M_NAME].isin( self.kpi_fetcher.TCCC)]) else: return 0 try: ratio = self.k_engine.calculate_sos_by_facings( pop_filter, subset_filter) except Exception as e: ratio = 0 if ratio is None: ratio = 0 return ratio def calculate_score(self, kpi_total_res, params): """ This function calculates score according to predefined score functions """ kpi_name = params.get('KPI name Eng') self.thresholds_and_results[kpi_name] = {'result': kpi_total_res} if params.get('Target') == 'range of targets': if not (params.get('target_min', 0) < kpi_total_res <= params.get( 'target_max', 100)): score = 0 if kpi_total_res < params.get('target_min', 0): self.thresholds_and_results[kpi_name][ 'threshold'] = params.get('target_min') else: self.thresholds_and_results[kpi_name][ 'threshold'] = params.get('target_max') else: self.thresholds_and_results[kpi_name][ 'threshold'] = params.get('target_min') numerator = kpi_total_res - params.get('target_min', 0) denominator = params.get('target_max', 1) - params.get( 'target_min', 0) score = (numerator / float(denominator)) * 100 return score elif params.get('Target') == 'targets by guide': target = self.kpi_fetcher.get_category_target_by_region( params.get('Values'), self.store_id) else: target = params.get('Target') self.thresholds_and_results[kpi_name]['threshold'] = target target = float(target) if not target: score = 0 else: if params.get('score_func') == PROPORTIONAL: score = (kpi_total_res / target) * 100 if score > 100: score = 100 elif params.get('score_func') == CONDITIONAL_PROPORTIONAL: score = kpi_total_res / target if score > params.get('score_max', 1): score = params.get('score_max', 1) elif score < params.get('score_min', 0): score = 0 score *= 100 elif params.get('score_func') == 'Customer_CCRU_1': if kpi_total_res < target: score = 0 else: score = ((kpi_total_res - target) + 1) * 100 else: if kpi_total_res >= target: score = 100 else: score = 0 return score def check_number_of_skus_in_single_scene_type(self, params): """ This function calculates number of SKUs per single scene type """ set_total_res = 0 for p in params.values()[0]: if p.get('Formula') != 'number of SKUs in one scene type' or p.get( 'level') == 3: continue score = self.calculate_number_of_skus_in_single_scene_type( params, p) set_total_res += score * p.get('KPI Weight') return set_total_res def calculate_number_of_skus_in_single_scene_type(self, params, p, kpi_fk=None): if kpi_fk is None: kpi_fk = self.kpi_fetcher.get_kpi_fk(p.get('KPI name Eng')) results_dict = {} relevant_scenes = self.get_relevant_scenes(params) for scene in relevant_scenes: scene_type = self.scif.loc[self.scif['scene_id'] == scene]['template_name'].values[0] location = list(self.scif.loc[self.scif['scene_id'] == scene] ['location_type'].values) if location: location = location[0] sub_location = list( self.scif.loc[self.scif['template_name'] == scene_type]['additional_attribute_2'].values) if sub_location: sub_location = sub_location[0] if p.get('children') is not None: children_scores = [] for child in params.values()[0]: if child.get('KPI ID') in [ int(kpi) for kpi in p.get('children').split(', ') ]: res = self.calculate_number_of_skus_in_single_scene_type( params, child, kpi_fk) children_scores.append(res) score = max(children_scores) # saving to level2 attributes_for_table2 = self.create_attributes_for_level2_df( p, score, kpi_fk) self.write_to_db_result(attributes_for_table2, 'level2') return score else: res = self.calculate_availability(p, scenes=[scene]) type_to_split_by = scene_type if p.get('Scenes to include'): # by scene type_to_split_by = scene_type elif sub_location and p.get('Sub locations to include'): type_to_split_by = sub_location elif location and p.get('Locations to include'): type_to_split_by = location if type_to_split_by not in results_dict: results_dict[type_to_split_by] = 0 results_dict[type_to_split_by] += res results_list = [ self.calculate_score(res, p) for res in results_dict.values() ] results_list = filter(bool, results_list) # filtering the score=0 if len(results_list) == 1: score = 100 else: score = 0 # Saving to old tables if p.get( 'level' ) == 2: # saving also to level3 in case this KPI has only one level attributes_for_table2 = self.create_attributes_for_level2_df( p, score, kpi_fk) self.write_to_db_result(attributes_for_table2, 'level2') attributes_for_table3 = self.create_attributes_for_level3_df( p, score, kpi_fk) self.write_to_db_result(attributes_for_table3, 'level3') return score def write_to_db_result(self, df=None, level=None, kps_name_temp=None): """ This function writes KPI results to old tables """ if level == 'level3': df['atomic_kpi_fk'] = self.kpi_fetcher.get_atomic_kpi_fk( df['name'][0]) df['kpi_fk'] = df['kpi_fk'][0] df_dict = df.to_dict() df_dict.pop('name', None) query = insert(df_dict, KPI_RESULT) self.kpi_results_queries.append(query) elif level == 'level2': kpi_name = df['kpk_name'][0].encode('utf-8') df['kpi_fk'] = self.kpi_fetcher.get_kpi_fk(kpi_name) df_dict = df.to_dict() # df_dict.pop("kpk_name", None) query = insert(df_dict, KPK_RESULT) self.kpi_results_queries.append(query) elif level == 'level1': df['kpi_set_fk'] = self.kpi_fetcher.get_kpi_set_fk() df_dict = df.to_dict() query = insert(df_dict, KPS_RESULT) self.kpi_results_queries.append(query) def commit_results_data(self): cur = self.rds_conn.db.cursor() delete_queries = self.kpi_fetcher.get_delete_session_results( self.session_uid) for query in delete_queries: cur.execute(query) for query in self.kpi_results_queries: cur.execute(query) self.rds_conn.db.commit() return def create_attributes_for_level2_df(self, params, score, kpi_fk): """ This function creates a data frame with all attributes needed for saving in level 2 tables """ score = round(score) attributes_for_table2 = pd.DataFrame( [(self.session_uid, self.store_id, self.visit_date.isoformat(), kpi_fk, params.get('KPI name Eng').replace("'", "\\'"), score)], columns=[ 'session_uid', 'store_fk', 'visit_date', 'kpi_fk', 'kpk_name', 'score' ]) return attributes_for_table2 def create_attributes_for_level3_df(self, params, score, kpi_fk): """ This function creates a data frame with all attributes needed for saving in level 3 tables """ score = round(score) if self.thresholds_and_results.get(params.get("KPI name Eng")): result = self.thresholds_and_results[params.get( "KPI name Eng")]['result'] threshold = self.thresholds_and_results[params.get( "KPI name Eng")]['threshold'] else: result = threshold = 0 attributes_for_table3 = pd.DataFrame( [(params.get('KPI name Rus').encode('utf-8').replace( "'", "\\'"), self.session_uid, self.set_name, self.store_id, self.visit_date.isoformat(), datetime.datetime.utcnow().isoformat(), score, kpi_fk, None, threshold, result, params.get('KPI name Eng').replace( "'", "\\'"))], columns=[ 'display_text', 'session_uid', 'kps_name', 'store_fk', 'visit_date', 'calculation_time', 'score', 'kpi_fk', 'atomic_kpi_fk', 'threshold', 'result', 'name' ]) return attributes_for_table3 def check_number_of_doors_given_sos(self, params): set_total_res = 0 for p in params.values()[0]: if p.get('Formula') != "number of doors given sos" or not p.get( "children"): continue kpi_fk = self.kpi_fetcher.get_kpi_fk(p.get('KPI name Eng')) children = [ int(child) for child in str(p.get("children")).split(", ") ] first_atomic_score = second_atomic_res = 0 for c in params.values()[0]: if c.get("KPI ID") in children and c.get( "Formula") == "atomic sos": first_atomic_res = self.calculate_facings_sos(c) first_atomic_score = self.calculate_score( first_atomic_res, c) # write to DB attributes_for_level3 = self.create_attributes_for_level3_df( c, first_atomic_score, kpi_fk) self.write_to_db_result(attributes_for_level3, 'level3') for c in params.values()[0]: if c.get("KPI ID") in children and c.get( "Formula") == "atomic number of doors": second_atomic_res = self.calculate_number_of_doors(c) second_atomic_score = self.calculate_score( second_atomic_res, c) # write to DB attributes_for_level3 = self.create_attributes_for_level3_df( c, second_atomic_score, kpi_fk) self.write_to_db_result(attributes_for_level3, 'level3') if first_atomic_score > 0: kpi_total_res = second_atomic_res else: kpi_total_res = 0 score = self.calculate_score(kpi_total_res, p) set_total_res += score * p.get('KPI Weight') # saving to DB attributes_for_level2 = self.create_attributes_for_level2_df( p, score, kpi_fk) self.write_to_db_result(attributes_for_level2, 'level2') return set_total_res def check_number_of_doors_given_number_of_sku(self, params): set_total_res = 0 for p in params.values()[0]: if p.get('Formula' ) != "number of doors given number of SKUs" or not p.get( "children"): continue kpi_fk = self.kpi_fetcher.get_kpi_fk(p.get('KPI name Eng')) children = [ int(child) for child in str(p.get("children")).split(", ") ] first_atomic_scores = [] for c in params.values()[0]: if c.get("KPI ID") in children and c.get( "Formula") == "atomic number of SKUs": first_atomic_res = self.calculate_availability(c) first_atomic_score = self.calculate_score( first_atomic_res, c) first_atomic_scores.append(first_atomic_score) # write to DB attributes_for_level3 = self.create_attributes_for_level3_df( c, first_atomic_score, kpi_fk) self.write_to_db_result(attributes_for_level3, 'level3') second_atomic_res = 0 for c in params.values()[0]: if c.get("KPI ID") in children and c.get( "Formula") == "atomic number of doors": second_atomic_res = self.calculate_number_of_doors(c) second_atomic_score = self.calculate_score( second_atomic_res, c) # write to DB attributes_for_level3 = self.create_attributes_for_level3_df( c, second_atomic_score, kpi_fk) self.write_to_db_result(attributes_for_level3, 'level3') if 0 not in first_atomic_scores: # if all assortment atomics have score > 0 kpi_total_res = second_atomic_res else: kpi_total_res = 0 score = self.calculate_score(kpi_total_res, p) set_total_res += score * p.get('KPI Weight') # saving to DB attributes_for_level2 = self.create_attributes_for_level2_df( p, score, kpi_fk) self.write_to_db_result(attributes_for_level2, 'level2') return set_total_res def get_set(self): query = """ select ss.pk , ss.additional_attribute_12 from static.stores ss join probedata.session ps on ps.store_fk=ss.pk where ss.delete_date is null and ps.session_uid = '{}'; """.format(self.session_uid) cur = self.rds_conn.db.cursor() cur.execute(query) res = cur.fetchall() df = pd.DataFrame(list(res), columns=['store_fk', 'additional_attribute_12']) return df['additional_attribute_12'][0]
class RNBDEGENERALToolBox: EXCLUDE_FILTER = 0 INCLUDE_FILTER = 1 EXCLUDE_EMPTY = 0 INCLUDE_EMPTY = 1 STRICT_MODE = ALL = 1000 EMPTY = 'Empty' DEFAULT = 'Default' TEMPLATES_PATH = 'RNBDE_templates/' DIAGEO = 'Diageo' ASSORTMENT = 'assortment' AVAILABILITY = 'availability' RELEVANT_FOR_STORE = 'Y' IRRELEVANT_FOR_STORE = 'N' OR_OTHER_PRODUCTS = 'Or' UNLIMITED_DISTANCE = 'General' # Templates fields # FORMULA = 'Formula' # Availability KPIs PRODUCT_NAME = PRODUCT_NAME PRODUCT_EAN_CODE = 'Leading Product EAN' PRODUCT_EAN_CODE2 = 'Product EAN' ADDITIONAL_SKUS = '1st Follower Product EAN' ENTITY_TYPE = 'Entity Type' TARGET = 'Target' # POSM KPIs DISPLAY_NAME = 'Product Name' # Relative Position CHANNEL = 'Channel' LOCATION = 'Primary "In store location"' TESTED = 'Tested EAN' ANCHOR = 'Anchor EAN' TOP_DISTANCE = 'Up to (above) distance (by shelves)' BOTTOM_DISTANCE = 'Up to (below) distance (by shelves)' LEFT_DISTANCE = 'Up to (Left) Distance (by SKU facings)' RIGHT_DISTANCE = 'Up to (right) distance (by SKU facings)' # Block Together BRAND_NAME = 'Brand Name' SUB_BRAND_NAME = 'Brand Variant' VISIBILITY_PRODUCTS_FIELD = 'additional_attribute_2' BRAND_POURING_FIELD = 'additional_attribute_1' ENTITY_TYPE_CONVERTER = { 'SKUs': 'product_ean_code', 'Brand': 'brand_name', 'Sub brand': 'sub_brand_name', 'Category': 'category', 'display': 'display_name' } KPI_SETS = [ 'MPA', 'New Products', 'POSM', 'Secondary', 'Relative Position', 'Brand Blocking', 'Brand Pouring', 'Visible to Customer' ] KPI_SETS_WITH_PRODUCT_AS_NAME = ['MPA', 'New Products', 'POSM'] KPI_SETS_WITH_PERCENT_AS_SCORE = [ 'Share of Assortment', 'Linear Share of Shelf vs. Target', 'Blocked Together', 'Shelf Level', 'OSA' ] KPI_SETS_WITHOUT_A_TEMPLATE = ['Secondary', 'Visible to Customer'] KPI_NAME = KPI_NAME SHELF_TARGET = 'Target Shelf' def __init__(self, data_provider, output, **kwargs): self.k_engine = BaseCalculationsGroup(data_provider, output) self.data_provider = data_provider self.project_name = self.data_provider.project_name self.session_uid = self.data_provider.session_uid self.scif = self.data_provider[Data.SCENE_ITEM_FACTS] self.all_products = self.data_provider[Data.ALL_PRODUCTS] self.survey_response = self.data_provider[Data.SURVEY_RESPONSES] self.amz_conn = StorageFactory.get_connector(BUCKET) self.templates_path = self.TEMPLATES_PATH + self.project_name + '/' self.local_templates_path = os.path.join(CACHE_PATH, 'templates') self.cloud_templates_path = '{}{}/{}'.format(self.TEMPLATES_PATH, self.project_name, {}) for data in kwargs.keys(): setattr(self, data, kwargs[data]) @property def position_graphs(self): if not hasattr(self, '_position_graphs'): self._position_graphs = RNBDEPositionGraphs(self.data_provider) return self._position_graphs @property def match_product_in_scene(self): if not hasattr(self, '_match_product_in_scene'): self._match_product_in_scene = self.position_graphs.match_product_in_scene return self._match_product_in_scene def check_survey_answer(self, survey_text, target_answer): """ :param survey_text: The name of the survey in the DB. :param target_answer: The required answer/s for the KPI to pass. :return: True if the answer matches the target; otherwise - False. """ if not isinstance(survey_text, (list, tuple)): entity = 'question_text' value = survey_text else: entity, value = survey_text survey_data = self.survey_response[self.survey_response[entity].isin( value)] if survey_data.empty: Log.warning('Survey with {} = {} doesn\'t exist'.format( entity, value)) return False answer_field = 'selected_option_text' if not survey_data[ 'selected_option_text'].empty else 'number_value' if target_answer in survey_data[answer_field].values.tolist(): return True else: return False def calculate_number_of_scenes(self, **filters): """ :param filters: These are the parameters which the data frame is filtered by. :return: The number of scenes matching the filtered Scene Item Facts data frame. """ filtered_scif = self.scif[self.get_filter_condition( self.scif, **filters)] number_of_scenes = len(filtered_scif['scene_id'].unique()) return number_of_scenes def calculate_availability(self, **filters): """ :param filters: These are the parameters which the data frame is filtered by. :return: Total number of SKUs facings appeared in the filtered Scene Item Facts data frame. """ filtered_scif = self.scif[self.get_filter_condition( self.scif, **filters)] availability = filtered_scif['facings'].sum() return availability def calculate_assortment(self, assortment_entity='product_ean_code', **filters): """ :param filters: These are the parameters which the data frame is filtered by. :param assortment_entity: This is the entity on which the assortment is calculated. :return: Number of unique SKUs appeared in the filtered Scene Item Facts data frame. """ filtered_scif = self.scif[self.get_filter_condition( self.scif, **filters)] assortment = len(filtered_scif[assortment_entity].unique()) return assortment def calculate_share_of_shelf(self, sos_filters=None, include_empty=EXCLUDE_EMPTY, **general_filters): """ :param sos_filters: These are the parameters on which ths SOS is calculated (out of the general DF). :param include_empty: This dictates whether Empty-typed SKUs are included in the calculation. :param general_filters: These are the parameters which the general data frame is filtered by. :return: The ratio of the SOS. """ if include_empty == self.EXCLUDE_EMPTY: general_filters['product_type'] = (self.EMPTY, self.EXCLUDE_FILTER) pop_filter = self.get_filter_condition(self.scif, **general_filters) subset_filter = self.get_filter_condition(self.scif, **sos_filters) try: ratio = self.k_engine.calculate_sos_by_facings( pop_filter=pop_filter, subset_filter=subset_filter) except: ratio = 0 if not isinstance(ratio, (float, int)): ratio = 0 return ratio def calculate_products_on_edge(self, min_number_of_facings=1, min_number_of_shleves=1, **filters): """ :param min_number_of_facings: Minimum number of edge facings for KPI to pass. :param min_number_of_shleves: Minimum number of different shelves with edge facings for KPI to pass. :param filters: This are the parameters which dictate the relevant SKUs for the edge calculation. :return: A tuple: (Number of scenes which pass, Total number of relevant scenes) """ filters, relevant_scenes = self.separate_location_filters_from_product_filters( **filters) if len(relevant_scenes) == 0: return 0, 0 number_of_edge_scenes = 0 for scene in relevant_scenes: edge_facings = pd.DataFrame( columns=self.match_product_in_scene.columns) matches = self.match_product_in_scene[ self.match_product_in_scene['scene_fk'] == scene] for shelf in matches['shelf_number'].unique(): shelf_matches = matches[matches['shelf_number'] == shelf] if not shelf_matches.empty: shelf_matches = shelf_matches.sort_values( by=['bay_number', 'facing_sequence_number']) edge_facings = edge_facings.append(shelf_matches.iloc[0]) if len(edge_facings) > 1: edge_facings = edge_facings.append( shelf_matches.iloc[-1]) edge_facings = edge_facings[self.get_filter_condition( edge_facings, **filters)] if len(edge_facings) >= min_number_of_facings \ and len(edge_facings['shelf_number'].unique()) >= min_number_of_shleves: number_of_edge_scenes += 1 return number_of_edge_scenes, len(relevant_scenes) def calculate_eye_level_assortment(self, eye_level_configurations=DEFAULT, min_number_of_products=ALL, **filters): """ :param eye_level_configurations: A data frame containing information about shelves to ignore (==not eye level) for every number of shelves in each bay. :param min_number_of_products: Minimum number of eye level unique SKUs for KPI to pass. :param filters: This are the parameters which dictate the relevant SKUs for the eye-level calculation. :return: A tuple: (Number of scenes which pass, Total number of relevant scenes) """ filters, relevant_scenes = self.separate_location_filters_from_product_filters( **filters) if len(relevant_scenes) == 0: return 0, 0 if eye_level_configurations == self.DEFAULT: if hasattr(self, 'eye_level_configurations'): eye_level_configurations = self.eye_level_configurations else: Log.error('Eye-level configurations are not set up') return False number_of_products = len(self.all_products[self.get_filter_condition( self.all_products, **filters)]['product_ean_code']) min_shelf, max_shelf, min_ignore, max_ignore = eye_level_configurations.columns number_of_eye_level_scenes = 0 for scene in relevant_scenes: eye_level_facings = pd.DataFrame( columns=self.match_product_in_scene.columns) matches = self.match_product_in_scene[ self.match_product_in_scene['scene_fk'] == scene] for bay in matches['bay_number'].unique(): bay_matches = matches[matches['bay_number'] == bay] number_of_shelves = bay_matches['shelf_number'].max() configuration = eye_level_configurations[ (eye_level_configurations[min_shelf] <= number_of_shelves) & (eye_level_configurations[max_shelf] >= number_of_shelves)] if not configuration.empty: configuration = configuration.iloc[0] else: configuration = {min_ignore: 0, max_ignore: 0} min_include = configuration[min_ignore] + 1 max_include = number_of_shelves - configuration[max_ignore] eye_level_shelves = bay_matches[ bay_matches['shelf_number'].between( min_include, max_include)] eye_level_facings = eye_level_facings.append(eye_level_shelves) eye_level_assortment = len( eye_level_facings[self.get_filter_condition( eye_level_facings, **filters)]['product_ean_code']) if min_number_of_products == self.ALL: min_number_of_products = number_of_products if eye_level_assortment >= min_number_of_products: number_of_eye_level_scenes += 1 return number_of_eye_level_scenes, len(relevant_scenes) def calculate_product_sequence(self, sequence_filters, direction, empties_allowed=True, irrelevant_allowed=False, min_required_to_pass=STRICT_MODE, **general_filters): """ :param sequence_filters: One of the following: 1- a list of dictionaries, each containing the filters values of an organ in the sequence. 2- a tuple of (entity_type, [value1, value2, value3...]) in case every organ in the sequence is defined by only one filter (and of the same entity, such as brand_name, etc). :param direction: left/right/top/bottom - the direction of the sequence. :param empties_allowed: This dictates whether or not the sequence can be interrupted by Empty facings. :param irrelevant_allowed: This dictates whether or not the sequence can be interrupted by facings which are not in the sequence. :param min_required_to_pass: The number of sequences needed to exist in order for KPI to pass. If STRICT_MODE is activated, the KPI passes only if it has NO rejects. :param general_filters: These are the parameters which the general data frame is filtered by. :return: True if the KPI passes; otherwise False. """ if isinstance(sequence_filters, (list, tuple)) and isinstance(sequence_filters[0], (str, unicode)): entity, sequence_filters = sequence_filters else: entity = None filtered_scif = self.scif[self.get_filter_condition( self.scif, **general_filters)] scenes = set(filtered_scif['scene_id'].unique()) for filters in sequence_filters: if isinstance(filters, dict): scene_for_filters = filtered_scif[self.get_filter_condition( filtered_scif, **filters)]['scene_id'].unique() else: scene_for_filters = filtered_scif[ filtered_scif[entity] == filters]['scene_id'].unique() scenes = scenes.intersection(scene_for_filters) if not scenes: Log.debug( 'None of the scenes include products from all types relevant for sequence' ) return True pass_counter = 0 reject_counter = 0 for scene in scenes: scene_graph = self.position_graphs.get(scene) # removing unnecessary edges filtered_scene_graph = scene_graph.copy() edges_to_remove = filtered_scene_graph.es.select( direction_ne=direction) filtered_scene_graph.delete_edges( [edge.index for edge in edges_to_remove]) reversed_scene_graph = scene_graph.copy() edges_to_remove = reversed_scene_graph.es.select( direction_ne=self._reverse_direction(direction)) reversed_scene_graph.delete_edges( [edge.index for edge in edges_to_remove]) vertices_list = [] for filters in sequence_filters: if not isinstance(filters, dict): filters = {entity: filters} vertices_list.append( self.filter_vertices_from_graph(scene_graph, **filters)) tested_vertices, sequence_vertices = vertices_list[ 0], vertices_list[1:] vertices_list = reduce(lambda x, y: x + y, sequence_vertices) sequences = [] for vertex in tested_vertices: previous_sequences = self.get_positions_by_direction( reversed_scene_graph, vertex) if previous_sequences and set(vertices_list).intersection( reduce(lambda x, y: x + y, previous_sequences)): reject_counter += 1 if min_required_to_pass == self.STRICT_MODE: return False continue next_sequences = self.get_positions_by_direction( filtered_scene_graph, vertex) sequences.extend(next_sequences) sequences = self._filter_sequences(sequences) for sequence in sequences: all_products_appeared = True empties_found = False irrelevant_found = False full_sequence = False broken_sequence = False current_index = 0 previous_vertices = list(tested_vertices) for vertices in sequence_vertices: if not set(sequence).intersection(vertices): all_products_appeared = False break for vindex in sequence: vertex = scene_graph.vs[vindex] if vindex not in vertices_list and vindex not in tested_vertices: if current_index < len(sequence_vertices): if vertex['product_type'] == self.EMPTY: empties_found = True else: irrelevant_found = True elif vindex in previous_vertices: pass elif vindex in sequence_vertices[current_index]: previous_vertices = list( sequence_vertices[current_index]) current_index += 1 else: broken_sequence = True if current_index == len(sequence_vertices): full_sequence = True if broken_sequence: reject_counter += 1 elif full_sequence: if not empties_allowed and empties_found: reject_counter += 1 elif not irrelevant_allowed and irrelevant_found: reject_counter += 1 elif all_products_appeared: pass_counter += 1 if pass_counter >= min_required_to_pass: return True elif min_required_to_pass == self.STRICT_MODE and reject_counter > 0: return False if reject_counter == 0: return True else: return False def update_templates(self): """ This function checks whether the recent templates are updated. If they're not, it downloads them from the Cloud and saves them in a local path. """ if not os.path.exists(self.local_templates_path): os.makedirs(self.local_templates_path) self.save_latest_templates() else: files_list = os.listdir(self.local_templates_path) if files_list and UPDATED_DATE_FILE in files_list: with open( os.path.join(self.local_templates_path, UPDATED_DATE_FILE), 'rb') as f: date = datetime.strptime(f.read(), UPDATED_DATE_FORMAT) if date.date() == datetime.utcnow().date(): return else: self.save_latest_templates() else: self.save_latest_templates() def save_latest_templates(self): """ This function reads the latest templates from the Cloud, and saves them in a local path. """ if not os.path.exists(self.local_templates_path): os.makedirs(self.local_templates_path) dir_name = self.get_latest_directory_date_from_cloud( self.cloud_templates_path.format(''), self.amz_conn) files = [ f.key for f in self.amz_conn.bucket.list( self.cloud_templates_path.format(dir_name)) ] for file_path in files: file_name = file_path.split('/')[-1] with open(os.path.join(self.local_templates_path, file_name), 'wb') as f: self.amz_conn.download_file(file_path, f) with open(os.path.join(self.local_templates_path, UPDATED_DATE_FILE), 'wb') as f: f.write(datetime.utcnow().strftime(UPDATED_DATE_FORMAT)) Log.info('Latest version of templates has been saved to cache') @staticmethod def get_latest_directory_date_from_cloud(cloud_path, amz_conn): """ This function reads all files from a given path (in the Cloud), and extracts the dates of their mother dirs by their name. Later it returns the latest date (up to today). """ files = amz_conn.bucket.list(cloud_path) files = [f.key.replace(cloud_path, '') for f in files] files = [f for f in files if len(f.split('/')) > 1] files = [f.split('/')[0] for f in files] files = [f for f in files if f.isdigit()] if not files: return dates = [datetime.strptime(f, '%y%m%d') for f in files] for date in sorted(dates, reverse=True): if date.date() <= datetime.utcnow().date(): return date.strftime("%y%m%d") return @staticmethod def _reverse_direction(direction): """ This function returns the opposite of a given direction. """ if direction == 'top': new_direction = 'bottom' elif direction == 'bottom': new_direction = 'top' elif direction == 'left': new_direction = 'right' elif direction == 'right': new_direction = 'left' else: new_direction = direction return new_direction def get_positions_by_direction(self, graph, vertex_index): """ This function gets a filtered graph (contains only edges of a relevant direction) and a Vertex index, and returns all sequences starting in it (until it gets to a dead end). """ sequences = [] edges = [graph.es[e] for e in graph.incident(vertex_index)] next_vertices = [edge.target for edge in edges] for vertex in next_vertices: next_sequences = self.get_positions_by_direction(graph, vertex) if not next_sequences: sequences.append([vertex]) else: for sequence in next_sequences: sequences.append([vertex] + sequence) return sequences @staticmethod def _filter_sequences(sequences): """ This function receives a list of sequences (lists of indexes), and removes sequences which can be represented by a shorter sequence (which is also in the list). """ if not sequences: return sequences sequences = sorted(sequences, key=lambda x: (x[-1], len(x))) filtered_sequences = [sequences[0]] for sequence in sequences[1:]: if sequence[-1] != filtered_sequences[-1][-1]: filtered_sequences.append(sequence) return filtered_sequences def calculate_non_proximity(self, tested_filters, anchor_filters, allowed_diagonal=False, **general_filters): """ :param tested_filters: The tested SKUs' filters. :param anchor_filters: The anchor SKUs' filters. :param allowed_diagonal: True - a tested SKU can be in a direct diagonal from an anchor SKU in order for the KPI to pass; False - a diagonal proximity is NOT allowed. :param general_filters: These are the parameters which the general data frame is filtered by. :return: """ direction_data = [] if allowed_diagonal: direction_data.append({'top': (0, 1), 'bottom': (0, 1)}) direction_data.append({'right': (0, 1), 'left': (0, 1)}) else: direction_data.append({ 'top': (0, 1), 'bottom': (0, 1), 'right': (0, 1), 'left': (0, 1) }) is_proximity = self.calculate_relative_position(tested_filters, anchor_filters, direction_data, min_required_to_pass=1, **general_filters) return not is_proximity def calculate_relative_position(self, tested_filters, anchor_filters, direction_data, min_required_to_pass=1, **general_filters): """ :param tested_filters: The tested SKUs' filters. :param anchor_filters: The anchor SKUs' filters. :param direction_data: The allowed distance between the tested and anchor SKUs. In form: {'top': 4, 'bottom: 0, 'left': 100, 'right': 0} Alternative form: {'top': (0, 1), 'bottom': (1, 1000), ...} - As range. :param min_required_to_pass: The number of appearances needed to be True for relative position in order for KPI to pass. If all appearances are required: ==a string or a big number. :param general_filters: These are the parameters which the general data frame is filtered by. :return: True if (at least) one pair of relevant SKUs fits the distance requirements; otherwise - returns False. """ filtered_scif = self.scif[self.get_filter_condition( self.scif, **general_filters)] tested_scenes = filtered_scif[self.get_filter_condition( filtered_scif, **tested_filters)]['scene_id'].unique() anchor_scenes = filtered_scif[self.get_filter_condition( filtered_scif, **anchor_filters)]['scene_id'].unique() relevant_scenes = set(tested_scenes).intersection(anchor_scenes) if relevant_scenes: pass_counter = 0 reject_counter = 0 for scene in relevant_scenes: scene_graph = self.position_graphs.get(scene) if not len(scene_graph.vs): pass tested_vertices = self.filter_vertices_from_graph( scene_graph, **tested_filters) anchor_vertices = self.filter_vertices_from_graph( scene_graph, **anchor_filters) for tested_vertex in tested_vertices: for anchor_vertex in anchor_vertices: moves = {'top': 0, 'bottom': 0, 'left': 0, 'right': 0} path = scene_graph.get_shortest_paths(anchor_vertex, tested_vertex, output='epath') if path: path = path[0] for edge in path: moves[scene_graph.es[edge]['direction']] += 1 if self.validate_moves(moves, direction_data): pass_counter += 1 if isinstance( min_required_to_pass, int ) and pass_counter >= min_required_to_pass: return True else: reject_counter += 1 else: Log.debug('Tested and Anchor have no direct path') if pass_counter > 0 and reject_counter == 0: return True else: return False else: Log.debug('None of the scenes contain both anchor and tested SKUs') return False @staticmethod def filter_vertices_from_graph(graph, **filters): """ This function is given a graph and returns a set of vertices calculated by a given set of filters. """ vertices_indexes = None for field in filters.keys(): field_vertices = set() values = filters[field] if isinstance( filters[field], (list, tuple)) else [filters[field]] for value in values: vertices = [v.index for v in graph.vs.select(**{field: value})] field_vertices = field_vertices.union(vertices) if vertices_indexes is None: vertices_indexes = field_vertices else: vertices_indexes = vertices_indexes.intersection( field_vertices) vertices_indexes = vertices_indexes if vertices_indexes is not None else [ v.index for v in graph.vs ] return list(vertices_indexes) @staticmethod def validate_moves(moves, direction_data): """ This function checks whether the distance between the anchor and the tested SKUs fits the requirements. """ direction_data = direction_data if isinstance( direction_data, (list, tuple)) else [direction_data] validated = False for data in direction_data: data_validated = True for direction in moves.keys(): allowed_moves = data.get(direction, (0, 0)) min_move, max_move = allowed_moves if isinstance( allowed_moves, tuple) else (0, allowed_moves) if not min_move <= moves[direction] <= max_move: data_validated = False break if data_validated: validated = True break return validated def calculate_block_together(self, allowed_products_filters=None, include_empty=EXCLUDE_EMPTY, minimum_block_ratio=1, result_by_scene=False, **filters): """ :param allowed_products_filters: These are the parameters which are allowed to corrupt the block without failing it. :param include_empty: This parameter dictates whether or not to discard Empty-typed products. :param minimum_block_ratio: The minimum (block number of facings / total number of relevant facings) ratio in order for KPI to pass (if ratio=1, then only one block is allowed). :param result_by_scene: True - The result is a tuple of (number of passed scenes, total relevant scenes); False - The result is True if at least one scene has a block, False - otherwise. :param filters: These are the parameters which the blocks are checked for. :return: see 'result_by_scene' above. """ filters, relevant_scenes = self.separate_location_filters_from_product_filters( **filters) if len(relevant_scenes) == 0: if result_by_scene: return 0, 0 else: Log.debug( 'Block Together: No relevant SKUs were found for these filters {}' .format(filters)) return True number_of_blocked_scenes = 0 cluster_ratios = [] for scene in relevant_scenes: scene_graph = self.position_graphs.get(scene).copy() relevant_vertices = set( self.filter_vertices_from_graph(scene_graph, **filters)) if allowed_products_filters: allowed_vertices = self.filter_vertices_from_graph( scene_graph, **allowed_products_filters) else: allowed_vertices = set() if include_empty == self.EXCLUDE_EMPTY: empty_vertices = { v.index for v in scene_graph.vs.select(product_type='Empty') } allowed_vertices = set(allowed_vertices).union(empty_vertices) all_vertices = {v.index for v in scene_graph.vs} vertices_to_remove = all_vertices.difference( relevant_vertices.union(allowed_vertices)) scene_graph.delete_vertices(vertices_to_remove) # removing clusters including 'allowed' SKUs only clusters = [ cluster for cluster in scene_graph.clusters() if set(cluster).difference(allowed_vertices) ] new_relevant_vertices = self.filter_vertices_from_graph( scene_graph, **filters) for cluster in clusters: relevant_vertices_in_cluster = set(cluster).intersection( new_relevant_vertices) if len(new_relevant_vertices) > 0: cluster_ratio = len(relevant_vertices_in_cluster) / float( len(new_relevant_vertices)) else: cluster_ratio = 0 cluster_ratios.append(cluster_ratio) if cluster_ratio >= minimum_block_ratio: if result_by_scene: number_of_blocked_scenes += 1 break else: if minimum_block_ratio == 1: return True else: all_vertices = {v.index for v in scene_graph.vs} non_cluster_vertices = all_vertices.difference( cluster) scene_graph.delete_vertices(non_cluster_vertices) return cluster_ratio, scene_graph if result_by_scene: return number_of_blocked_scenes, len(relevant_scenes) else: if minimum_block_ratio == 1: return False elif cluster_ratios: return max(cluster_ratios) else: return None def get_filter_condition(self, df, **filters): """ :param df: The data frame to be filters. :param filters: These are the parameters which the data frame is filtered by. Every parameter would be a tuple of the value and an include/exclude flag. INPUT EXAMPLE (1): manufacturer_name = ('Diageo', DIAGEOAUGENERALToolBox.INCLUDE_FILTER) INPUT EXAMPLE (2): manufacturer_name = 'Diageo' :return: a filtered Scene Item Facts data frame. """ if 'facings' in df.keys(): filter_condition = (df['facings'] > 0) else: filter_condition = None for field in filters.keys(): if field in df.keys(): if isinstance(filters[field], tuple): value, exclude_or_include = filters[field] else: value, exclude_or_include = filters[ field], self.INCLUDE_FILTER if not isinstance(value, list): value = [value] if exclude_or_include == self.INCLUDE_FILTER: condition = (df[field].isin(value)) elif exclude_or_include == self.EXCLUDE_FILTER: condition = (~df[field].isin(value)) else: continue if filter_condition is None: filter_condition = condition else: filter_condition &= condition else: Log.warning('field {} is not in the Data Frame'.format(field)) return filter_condition def separate_location_filters_from_product_filters(self, **filters): """ This function gets scene-item-facts filters of all kinds, extracts the relevant scenes by the location filters, and returns them along with the product filters only. """ location_filters = {} for field in filters.keys(): if field not in self.all_products.columns: location_filters[field] = filters.pop(field) relevant_scenes = self.scif[self.get_filter_condition( self.scif, **location_filters)]['scene_id'].unique() return filters, relevant_scenes @staticmethod def get_json_data(file_path, sheet_name=None, skiprows=0): """ This function gets a file's path and extract its content into a JSON. """ data = {} if sheet_name: sheet_names = [sheet_name] else: sheet_names = xlrd.open_workbook(file_path).sheet_names() for sheet_name in sheet_names: try: output = pd.read_excel(file_path, sheetname=sheet_name, skiprows=skiprows) except xlrd.biffh.XLRDError: Log.warning('Sheet name {} doesn\'t exist'.format(sheet_name)) return None output = output.to_json(orient='records') output = json.loads(output) data[sheet_name] = output if sheet_name: data = data[sheet_name] elif len(data.keys()) == 1: data = data[data.keys()[0]] return data def download_template(self, set_name): """ This function receives a KPI set name and return its relevant template as a JSON. """ temp_file_path = '{}/{}_temp.xlsx'.format(os.getcwd(), set_name) f = open(temp_file_path, 'wb') self.amz_conn.download_file( '{}{}.xlsx'.format(self.templates_path, set_name), f) f.close() json_data = self.get_json_data(temp_file_path) os.remove(temp_file_path) return json_data def calculate_linear_share_of_shelf(self, sos_filters=None, include_empty=EXCLUDE_EMPTY, **general_filters): """ :param sos_filters: These are the parameters on which ths SOS is calculated (out of the general DF). :param include_empty: This dictates whether Empty-typed SKUs are included in the calculation. :param general_filters: These are the parameters which the general data frame is filtered by. :return: The ratio of the SOS. """ if include_empty == self.EXCLUDE_EMPTY: general_filters['product_type'] = (self.EMPTY, self.EXCLUDE_FILTER) pop_filter = self.get_filter_condition(self.scif, **general_filters) subset_filter = self.get_filter_condition(self.scif, **sos_filters) try: ratio = self.calculate_sos_by_linear(pop_filter=pop_filter, subset_filter=subset_filter) except: ratio = 0 if not isinstance(ratio, (float, int)): ratio = 0 return ratio def calculate_sos_by_linear(self, pop_filter, subset_filter, population=None): """ Returns data frame containing result, target and score. :param pop_filter: how to filter the population :param subset_filter: how to create the subset population :param population: optional :class:`pandas.DataFrame` to be used in this calculation :return: A newly created :class:`Core.DataProvider.Fact` object """ pop = self.get_population(population) filtered_population = pop[pop_filter] if filtered_population.empty: return None else: subset_population = filtered_population[subset_filter] ratio = TBox.calculate_ratio_sum_field_in_rows( filtered_population, subset_population, Fd.GROSS_LEN_IGN_STACK) return ratio def get_population(self, population): """ Returns a reference to the population to work on in next steps. If population accepted, return it, otherwise, return self.scif (scene_item_facts) as default. :param population: optional :class:`pandas.DataFrame` :return: :class:`pandas.DataFrame` to use in next steps of the calculation """ if population is None: pop = self.scif else: Validation.is_df(population) pop = population return pop
class PNGMCCN_SANDGENERALToolBox: """ MOVED TO Trax.Data.ProfessionalServices.KPIUtils.GeneralToolBox """ EXCLUDE_FILTER = 0 INCLUDE_FILTER = 1 EXCLUDE_EMPTY = 0 INCLUDE_EMPTY = 1 EMPTY = 'Empty' ASSORTMENT = 'assortment' AVAILABILITY = 'availability' def __init__(self, data_provider, output, kpi_static_data, geometric_kpi_flag=False): self.k_engine = BaseCalculationsGroup(data_provider, output) self.data_provider = data_provider self.project_name = self.data_provider.project_name self.session_uid = self.data_provider.session_uid self.scif = self.data_provider[Data.SCENE_ITEM_FACTS] self.all_products = self.data_provider[Data.ALL_PRODUCTS] self.survey_response = self.data_provider[Data.SURVEY_RESPONSES] self.kpi_static_data = kpi_static_data if geometric_kpi_flag: self.position_graphs = PNGMCCN_SANDPositionGraphs( self.data_provider) self.matches = self.position_graphs.match_product_in_scene else: self.position_graphs = None self.matches = self.data_provider[Data.MATCHES] def check_survey_answer(self, survey_text, target_answer): """ :param survey_text: The name of the survey in the DB. :param target_answer: The required answer/s for the KPI to pass. :return: True if the answer matches the target; otherwise - False. """ if not isinstance(target_answer, list): target_answer = [target_answer] survey_data = self.survey_response.loc[ self.survey_response['question_text'] == survey_text] answer_field = 'selected_option_text' if not survey_data[ 'selected_option_text'].empty else 'number_value' if survey_data[answer_field].values and survey_data[ answer_field].values[0] in target_answer: return True else: return False def calculate_number_of_scenes(self, **filters): """ :param filters: These are the parameters which the data frame is filtered by. :return: The number of scenes matching the filtered Scene Item Facts data frame. """ filtered_scif = self.scif[self.get_filter_condition( self.scif, **filters)] number_of_scenes = len(filtered_scif['scene_id'].unique()) return number_of_scenes def calculate_availability(self, **filters): """ :param filters: These are the parameters which the data frame is filtered by. :return: Total number of SKUs facings appeared in the filtered Scene Item Facts data frame. """ filtered_scif = self.scif[self.get_filter_condition( self.scif, **filters)] availability = filtered_scif['facings'].sum() return availability def calculate_assortment(self, **filters): """ :param filters: These are the parameters which the data frame is filtered by. :return: Number of unique SKUs appeared in the filtered Scene Item Facts data frame. """ filtered_scif = self.scif[self.get_filter_condition( self.scif, **filters)] assortment = len(filtered_scif['product_ean_code'].unique()) return assortment def calculate_share_of_shelf(self, sos_filters=None, include_empty=EXCLUDE_EMPTY, **general_filters): """ :param sos_filters: These are the parameters on which ths SOS is calculated (out of the general DF). :param include_empty: This dictates whether Empty-typed SKUs are included in the calculation. :param general_filters: These are the parameters which the general data frame is filtered by. :return: The ratio of the SOS. """ if include_empty == self.EXCLUDE_EMPTY: general_filters['product_type'] = (self.EMPTY, self.EXCLUDE_FILTER) pop_filter = self.get_filter_condition(self.scif, **general_filters) subset_filter = self.get_filter_condition(self.scif, **sos_filters) try: ratio = self.k_engine.calculate_sos_by_facings( pop_filter=pop_filter, subset_filter=subset_filter) except: ratio = 0 if not isinstance(ratio, (float, int)): ratio = 0 return ratio def calculate_relative_position(self, tested_filters, anchor_filters, direction_data, **general_filters): """ :param tested_filters: The tested SKUs' filters. :param anchor_filters: The anchor SKUs' filters. :param direction_data: The allowed distance between the tested and anchor SKUs. In form: {'top': 4, 'bottom: 0, 'left': 100, 'right': 0} :return: True if (at least) one pair of relevant SKUs fits the distance requirements; otherwise - returns False. """ filtered_scif = self.scif[self.get_filter_condition( self.scif, **general_filters)] tested_scenes = filtered_scif[self.get_filter_condition( filtered_scif, **tested_filters)]['scene_id'].unique() anchor_scenes = filtered_scif[self.get_filter_condition( filtered_scif, **anchor_filters)]['scene_id'].unique() relevant_scenes = set(tested_scenes).intersection(anchor_scenes) if relevant_scenes: for scene in relevant_scenes: scene_graph = self.position_graphs.position_graphs.get(scene) tested_vertices = self.filter_vertices_from_graph( scene_graph, **tested_filters) anchor_vertices = self.filter_vertices_from_graph( scene_graph, **anchor_filters) for tested_vertex in tested_vertices: for anchor_vertex in anchor_vertices: moves = {'top': 0, 'bottom': 0, 'left': 0, 'right': 0} path = scene_graph.get_shortest_paths(anchor_vertex, tested_vertex, output='epath') if path: path = path[0] for edge in path: moves[scene_graph.es[edge]['direction']] += 1 if self.validate_moves(moves, direction_data): return True else: Log.debug('None of the scenes contain both anchor and tested SKUs') return False @staticmethod def filter_vertices_from_graph(graph, **filters): """ This function is given a graph and returns a set of vertices calculated by a given set of filters. """ vertices_indexes = None for field in filters.keys(): field_vertices = set() values = filters[field] if isinstance( filters[field], (list, tuple)) else [filters[field]] for value in values: vertices = [v.index for v in graph.vs.select(**{field: value})] field_vertices = field_vertices.union(vertices) if vertices_indexes is None: vertices_indexes = field_vertices else: vertices_indexes = vertices_indexes.intersection( field_vertices) vertices_indexes = vertices_indexes if vertices_indexes is not None else [ v.index for v in graph.vs ] return vertices_indexes @staticmethod def validate_moves(moves, direction_data): """ This function checks whether the distance between the anchor and the tested SKUs fits the requirements. """ for direction in moves.keys(): if moves[direction] > 0: if moves[direction] > direction_data[direction]: return False return True def calculate_block_together(self, allowed_products_filters=None, include_empty=EXCLUDE_EMPTY, **filters): """ :param allowed_products_filters: These are the parameters which are allowed to corrupt the block without failing it. :param include_empty: This parameter dictates whether or not to discard Empty-typed products. :param filters: These are the parameters which the blocks are checked for. :return: True - if in (at least) one of the scenes all the relevant SKUs are grouped together in one block; otherwise - returns False. """ relevant_scenes = self.scif[self.get_filter_condition( self.scif, **filters)]['scene_id'].unique() if relevant_scenes: for scene in relevant_scenes: scene_graph = self.position_graphs.position_graphs[scene].copy( ) relevant_vertices = None for field in filters.keys(): values = filters[field] if isinstance( filters[field], (list, float)) else [filters[field]] vertices_for_field = set() for value in values: condition = {field: value} vertices = { v.index for v in scene_graph.vs.select(**condition) } vertices_for_field = vertices_for_field.union(vertices) if relevant_vertices is None: relevant_vertices = vertices_for_field else: relevant_vertices = relevant_vertices.intersection( vertices_for_field) if allowed_products_filters: allowed_vertices = None for field in allowed_products_filters.keys(): values = allowed_products_filters[field] if isinstance(allowed_products_filters[field], (list, float)) \ else [allowed_products_filters[field]] vertices_for_field = set() for value in values: condition = {field: value} vertices = { v.index for v in scene_graph.vs.select(**condition) } vertices_for_field = vertices_for_field.union( vertices) if allowed_vertices is None: allowed_vertices = vertices_for_field else: allowed_vertices = allowed_vertices.intersection( vertices_for_field) if include_empty == self.EXCLUDE_EMPTY: empty_vertices = { v.index for v in scene_graph.vs.select( product_type='Empty') } allowed_vertices = allowed_vertices.union( empty_vertices) relevant_vertices = relevant_vertices if relevant_vertices is not None else set( ) allowed_vertices = allowed_vertices if allowed_vertices is not None else set( ) else: allowed_vertices = [] all_vertices = {v.index for v in scene_graph.vs} vertices_to_remove = all_vertices.difference( relevant_vertices.union(allowed_vertices)) scene_graph.delete_vertices(vertices_to_remove) # removing clusters that include un-bothered SKUs only clusters = [ cluster for cluster in scene_graph.clusters() if set(cluster).difference(allowed_vertices) ] if len(clusters) == 1: return True else: Log.debug('None of the scenes contain relevant SKUs') return False def get_filter_condition(self, df, **filters): """ :param df: The data frame to be filters. :param filters: These are the parameters which the data frame is filtered by. Every parameter would be a tuple of the value and an include/exclude flag. INPUT EXAMPLE (1): manufacturer_name = ('Diageo', PNGMCCN_SANDGENERALToolBox.INCLUDE_FILTER) INPUT EXAMPLE (2): manufacturer_name = 'Diageo' :return: a filtered Scene Item Facts data frame. """ if 'facings' in df.keys(): filter_condition = (df['facings'] > 0) else: filter_condition = None for field in filters.keys(): if field in df.keys(): if isinstance(filters[field], tuple): value, exclude_or_include = filters[field] else: value, exclude_or_include = filters[ field], self.INCLUDE_FILTER if not isinstance(value, list): value = [value] if exclude_or_include == self.INCLUDE_FILTER: condition = (df[field].isin(value)) elif exclude_or_include == self.EXCLUDE_FILTER: condition = (~df[field].isin(value)) else: continue if filter_condition is None: filter_condition = condition else: filter_condition &= condition else: Log.warning('field {} not in DF'.format(field)) return filter_condition @staticmethod def get_json_data(file_path, skiprows=0): """ This function gets a file's path and extract its content into a JSON. """ output = pd.read_excel(file_path, skiprows=skiprows) output = output.to_json(orient='records') output = json.loads(output) return output def add_new_kpi_to_static_tables(self, set_fk, new_kpi_list): """ :param set_fk: The relevant KPI set FK. :param new_kpi_list: a list of all new KPI's parameters. This function adds new KPIs to the DB ('Static' table) - both to level2 (KPI) and level3 (Atomic KPI). """ session = OrmSession(self.project_name, writable=True) with session.begin(subtransactions=True): for kpi in new_kpi_list: level2_query = """ INSERT INTO static.kpi (kpi_set_fk, display_text) VALUES ('{0}', '{1}');""".format( set_fk, kpi.get(KPI_NAME)) result = session.execute(level2_query) kpi_fk = result.lastrowid level3_query = """ INSERT INTO static.atomic_kpi (kpi_fk, name, description, display_text, presentation_order, display) VALUES ('{0}', '{1}', '{2}', '{3}', '{4}', '{5}');""".format( kpi_fk, kpi.get(KPI_NAME), kpi.get(KPI_NAME), kpi.get(KPI_NAME), 1, 'Y') session.execute(level3_query) session.close() return def add_kpi_sets_to_static(self, set_names): """ This function is to be ran at a beginning of a projects - and adds the constant KPI sets data to the DB. """ session = OrmSession(self.project_name, writable=True) with session.begin(subtransactions=True): for set_name in set_names: level1_query = """ INSERT INTO static.kpi_set (name, missing_kpi_score, enable, normalize_weight, expose_to_api, is_in_weekly_report) VALUES ('{0}', '{1}', '{2}', '{3}', '{4}', '{5}');""".format( set_name, 'Bad', 'Y', 'N', 'N', 'N') session.execute(level1_query) session.close() return
class GILLETTEUSGENERALToolBox: EXCLUDE_FILTER = 0 INCLUDE_FILTER = 1 EXCLUDE_EMPTY = 0 INCLUDE_EMPTY = 1 STRICT_MODE = ALL = 1000 EMPTY = 'Empty' DEFAULT = 'Default' def __init__(self, data_provider, output, **kwargs): self.k_engine = BaseCalculationsGroup(data_provider, output) self.data_provider = data_provider self.project_name = self.data_provider.project_name self.session_uid = self.data_provider.session_uid self.scif = self.data_provider[Data.SCENE_ITEM_FACTS] self.all_products = self.data_provider[Data.ALL_PRODUCTS] self.survey_response = self.data_provider[Data.SURVEY_RESPONSES] for data in kwargs.keys(): setattr(self, data, kwargs[data]) @property def position_graphs(self): if not hasattr(self, '_position_graphs'): self._position_graphs = GILLETTEUSPositionGraphs( self.data_provider) return self._position_graphs @property def match_product_in_scene(self): if not hasattr(self, '_match_product_in_scene'): self._match_product_in_scene = self.position_graphs.match_product_in_scene return self._match_product_in_scene def check_survey_answer(self, survey_text, target_answer): """ :param survey_text: The name of the survey in the DB. :param target_answer: The required answer/s for the KPI to pass. :return: True if the answer matches the target; otherwise - False. """ if not isinstance(survey_text, (list, tuple)): entity = 'question_text' value = survey_text else: entity, value = survey_text survey_data = self.survey_response[self.survey_response[entity].isin( value)] if survey_data.empty: Log.warning('Survey with {} = {} doesn\'t exist'.format( entity, value)) return False answer_field = 'selected_option_text' if not survey_data[ 'selected_option_text'].empty else 'number_value' if target_answer in survey_data[answer_field].values.tolist(): return True else: return False def calculate_number_of_scenes(self, **filters): """ :param filters: These are the parameters which the data frame is filtered by. :return: The number of scenes matching the filtered Scene Item Facts data frame. """ filtered_scif = self.scif[self.get_filter_condition( self.scif, **filters)] number_of_scenes = len(filtered_scif['scene_id'].unique()) return number_of_scenes def calculate_availability(self, **filters): """ :param filters: These are the parameters which the data frame is filtered by. :return: Total number of SKUs facings appeared in the filtered Scene Item Facts data frame. """ filtered_scif = self.scif[self.get_filter_condition( self.scif, **filters)] availability = filtered_scif['facings'].sum() return availability def calculate_assortment(self, assortment_entity='product_ean_code', **filters): """ :param filters: These are the parameters which the data frame is filtered by. :param assortment_entity: This is the entity on which the assortment is calculated. :return: Number of unique SKUs appeared in the filtered Scene Item Facts data frame. """ filtered_scif = self.scif[self.get_filter_condition( self.scif, **filters)] assortment = len(filtered_scif[assortment_entity].unique()) return assortment def calculate_share_of_shelf(self, sos_filters=None, include_empty=EXCLUDE_EMPTY, **general_filters): """ :param sos_filters: These are the parameters on which ths SOS is calculated (out of the general DF). :param include_empty: This dictates whether Empty-typed SKUs are included in the calculation. :param general_filters: These are the parameters which the general data frame is filtered by. :return: The ratio of the SOS. """ if include_empty == self.EXCLUDE_EMPTY: general_filters['product_type'] = (self.EMPTY, self.EXCLUDE_FILTER) pop_filter = self.get_filter_condition(self.scif, **general_filters) subset_filter = self.get_filter_condition(self.scif, **sos_filters) try: ratio = self.k_engine.calculate_sos_by_facings( pop_filter=pop_filter, subset_filter=subset_filter) except: ratio = 0 if not isinstance(ratio, (float, int)): ratio = 0 return ratio def calculate_products_on_edge(self, min_number_of_facings=1, min_number_of_shelves=1, **filters): """ :param min_number_of_facings: Minimum number of edge facings for KPI to pass. :param min_number_of_shelves: Minimum number of different shelves with edge facings for KPI to pass. :param filters: This are the parameters which dictate the relevant SKUs for the edge calculation. :return: A tuple: (Number of scenes which pass, Total number of relevant scenes) """ filters, relevant_scenes = self.separate_location_filters_from_product_filters( **filters) if len(relevant_scenes) == 0: return 0, 0 number_of_edge_scenes = 0 for scene in relevant_scenes: edge_facings = pd.DataFrame( columns=self.match_product_in_scene.columns) matches = self.match_product_in_scene[ self.match_product_in_scene['scene_fk'] == scene] for shelf in matches['shelf_number'].unique(): shelf_matches = matches[matches['shelf_number'] == shelf] if not shelf_matches.empty: shelf_matches = shelf_matches.sort_values( by=['bay_number', 'facing_sequence_number']) edge_facings = edge_facings.append(shelf_matches.iloc[0]) if len(edge_facings) > 1: edge_facings = edge_facings.append( shelf_matches.iloc[-1]) edge_facings = edge_facings[self.get_filter_condition( edge_facings, **filters)] if len(edge_facings) >= min_number_of_facings \ and len(edge_facings['shelf_number'].unique()) >= min_number_of_shelves: number_of_edge_scenes += 1 return number_of_edge_scenes, len(relevant_scenes) def calculate_eye_level_assortment(self, eye_level_configurations=DEFAULT, min_number_of_products=ALL, **filters): """ :param eye_level_configurations: A data frame containing information about shelves to ignore (==not eye level) for every number of shelves in each bay. :param min_number_of_products: Minimum number of eye level unique SKUs for KPI to pass. :param filters: This are the parameters which dictate the relevant SKUs for the eye-level calculation. :return: A tuple: (Number of scenes which pass, Total number of relevant scenes) """ filters, relevant_scenes = self.separate_location_filters_from_product_filters( **filters) if len(relevant_scenes) == 0: return 0, 0 if eye_level_configurations == self.DEFAULT: if hasattr(self, 'eye_level_configurations'): eye_level_configurations = self.eye_level_configurations else: Log.error('Eye-level configurations are not set up') return False number_of_products = len(self.all_products[self.get_filter_condition( self.all_products, **filters)]['product_ean_code']) min_shelf, max_shelf, min_ignore, max_ignore = eye_level_configurations.columns number_of_eye_level_scenes = 0 for scene in relevant_scenes: eye_level_facings = pd.DataFrame( columns=self.match_product_in_scene.columns) matches = self.match_product_in_scene[ self.match_product_in_scene['scene_fk'] == scene] for bay in matches['bay_number'].unique(): bay_matches = matches[matches['bay_number'] == bay] number_of_shelves = bay_matches['shelf_number'].max() configuration = eye_level_configurations[ (eye_level_configurations[min_shelf] <= number_of_shelves) & (eye_level_configurations[max_shelf] >= number_of_shelves)] if not configuration.empty: configuration = configuration.iloc[0] else: configuration = {min_ignore: 0, max_ignore: 0} min_include = configuration[min_ignore] + 1 max_include = number_of_shelves - configuration[max_ignore] eye_level_shelves = bay_matches[ bay_matches['shelf_number'].between( min_include, max_include)] eye_level_facings = eye_level_facings.append(eye_level_shelves) eye_level_assortment = len( eye_level_facings[self.get_filter_condition( eye_level_facings, **filters)]['product_ean_code']) if min_number_of_products == self.ALL: min_number_of_products = number_of_products if eye_level_assortment >= min_number_of_products: number_of_eye_level_scenes += 1 return number_of_eye_level_scenes, len(relevant_scenes) def calculate_product_sequence(self, sequence_filters, direction, empties_allowed=True, irrelevant_allowed=False, min_required_to_pass=STRICT_MODE, **general_filters): """ :param sequence_filters: One of the following: 1- a list of dictionaries, each containing the filters values of an organ in the sequence. 2- a tuple of (entity_type, [value1, value2, value3...]) in case every organ in the sequence is defined by only one filter (and of the same entity, such as brand_name, etc). :param direction: left/right/top/bottom - the direction of the sequence. :param empties_allowed: This dictates whether or not the sequence can be interrupted by Empty facings. :param irrelevant_allowed: This dictates whether or not the sequence can be interrupted by facings which are not in the sequence. :param min_required_to_pass: The number of sequences needed to exist in order for KPI to pass. If STRICT_MODE is activated, the KPI passes only if it has NO rejects. :param general_filters: These are the parameters which the general data frame is filtered by. :return: True if the KPI passes; otherwise False. """ if isinstance(sequence_filters, (list, tuple)) and isinstance(sequence_filters[0], (str, unicode)): entity, sequence_filters = sequence_filters else: entity = None filtered_scif = self.scif[self.get_filter_condition( self.scif, **general_filters)] scenes = set(filtered_scif['scene_id'].unique()) for filters in sequence_filters: if isinstance(filters, dict): scene_for_filters = filtered_scif[self.get_filter_condition( filtered_scif, **filters)]['scene_id'].unique() else: scene_for_filters = filtered_scif[ filtered_scif[entity] == filters]['scene_id'].unique() scenes = scenes.intersection(scene_for_filters) if not scenes: Log.debug( 'None of the scenes include products from all types relevant for sequence' ) return True pass_counter = 0 reject_counter = 0 for scene in scenes: scene_graph = self.position_graphs.get(scene) # removing unnecessary edges filtered_scene_graph = scene_graph.copy() edges_to_remove = filtered_scene_graph.es.select( direction_ne=direction) filtered_scene_graph.delete_edges( [edge.index for edge in edges_to_remove]) reversed_scene_graph = scene_graph.copy() edges_to_remove = reversed_scene_graph.es.select( direction_ne=self._reverse_direction(direction)) reversed_scene_graph.delete_edges( [edge.index for edge in edges_to_remove]) vertices_list = [] for filters in sequence_filters: if not isinstance(filters, dict): filters = {entity: filters} vertices_list.append( self.filter_vertices_from_graph(scene_graph, **filters)) tested_vertices, sequence_vertices = vertices_list[ 0], vertices_list[1:] vertices_list = reduce(lambda x, y: x + y, sequence_vertices) sequences = [] for vertex in tested_vertices: previous_sequences = self.get_positions_by_direction( reversed_scene_graph, vertex) if previous_sequences and set(vertices_list).intersection( reduce(lambda x, y: x + y, previous_sequences)): reject_counter += 1 if min_required_to_pass == self.STRICT_MODE: return False continue next_sequences = self.get_positions_by_direction( filtered_scene_graph, vertex) sequences.extend(next_sequences) sequences = self._filter_sequences(sequences) for sequence in sequences: all_products_appeared = True empties_found = False irrelevant_found = False full_sequence = False broken_sequence = False current_index = 0 previous_vertices = list(tested_vertices) for vertices in sequence_vertices: if not set(sequence).intersection(vertices): all_products_appeared = False break for vindex in sequence: vertex = scene_graph.vs[vindex] if vindex not in vertices_list and vindex not in tested_vertices: if current_index < len(sequence_vertices): if vertex['product_type'] == self.EMPTY: empties_found = True else: irrelevant_found = True elif vindex in previous_vertices: pass elif vindex in sequence_vertices[current_index]: previous_vertices = list( sequence_vertices[current_index]) current_index += 1 else: broken_sequence = True if current_index == len(sequence_vertices): full_sequence = True if broken_sequence: reject_counter += 1 elif full_sequence: if not empties_allowed and empties_found: reject_counter += 1 elif not irrelevant_allowed and irrelevant_found: reject_counter += 1 elif all_products_appeared: pass_counter += 1 if pass_counter >= min_required_to_pass: return True elif min_required_to_pass == self.STRICT_MODE and reject_counter > 0: return False if reject_counter == 0: return True else: return False @staticmethod def _reverse_direction(direction): """ This function returns the opposite of a given direction. """ if direction == 'top': new_direction = 'bottom' elif direction == 'bottom': new_direction = 'top' elif direction == 'left': new_direction = 'right' elif direction == 'right': new_direction = 'left' else: new_direction = direction return new_direction def get_positions_by_direction(self, graph, vertex_index): """ This function gets a filtered graph (contains only edges of a relevant direction) and a Vertex index, and returns all sequences starting in it (until it gets to a dead end). """ sequences = [] edges = [graph.es[e] for e in graph.incident(vertex_index)] next_vertices = [edge.target for edge in edges] for vertex in next_vertices: next_sequences = self.get_positions_by_direction(graph, vertex) if not next_sequences: sequences.append([vertex]) else: for sequence in next_sequences: sequences.append([vertex] + sequence) return sequences @staticmethod def _filter_sequences(sequences): """ This function receives a list of sequences (lists of indexes), and removes sequences which can be represented by a shorter sequence (which is also in the list). """ if not sequences: return sequences sequences = sorted(sequences, key=lambda x: (x[-1], len(x))) filtered_sequences = [sequences[0]] for sequence in sequences[1:]: if sequence[-1] != filtered_sequences[-1][-1]: filtered_sequences.append(sequence) return filtered_sequences def calculate_non_proximity(self, tested_filters, anchor_filters, allowed_diagonal=False, **general_filters): """ :param tested_filters: The tested SKUs' filters. :param anchor_filters: The anchor SKUs' filters. :param allowed_diagonal: True - a tested SKU can be in a direct diagonal from an anchor SKU in order for the KPI to pass; False - a diagonal proximity is NOT allowed. :param general_filters: These are the parameters which the general data frame is filtered by. :return: """ direction_data = [] if allowed_diagonal: direction_data.append({'top': (0, 1), 'bottom': (0, 1)}) direction_data.append({'right': (0, 1), 'left': (0, 1)}) else: direction_data.append({ 'top': (0, 1), 'bottom': (0, 1), 'right': (0, 1), 'left': (0, 1) }) is_proximity = self.calculate_relative_position(tested_filters, anchor_filters, direction_data, min_required_to_pass=1, **general_filters) return not is_proximity def calculate_relative_position(self, tested_filters, anchor_filters, direction_data, min_required_to_pass=1, **general_filters): """ :param tested_filters: The tested SKUs' filters. :param anchor_filters: The anchor SKUs' filters. :param direction_data: The allowed distance between the tested and anchor SKUs. In form: {'top': 4, 'bottom: 0, 'left': 100, 'right': 0} Alternative form: {'top': (0, 1), 'bottom': (1, 1000), ...} - As range. :param min_required_to_pass: The number of appearances needed to be True for relative position in order for KPI to pass. If all appearances are required: ==a string or a big number. :param general_filters: These are the parameters which the general data frame is filtered by. :return: True if (at least) one pair of relevant SKUs fits the distance requirements; otherwise - returns False. """ filtered_scif = self.scif[self.get_filter_condition( self.scif, **general_filters)] tested_scenes = filtered_scif[self.get_filter_condition( filtered_scif, **tested_filters)]['scene_id'].unique() anchor_scenes = filtered_scif[self.get_filter_condition( filtered_scif, **anchor_filters)]['scene_id'].unique() relevant_scenes = set(tested_scenes).intersection(anchor_scenes) if relevant_scenes: pass_counter = 0 reject_counter = 0 for scene in relevant_scenes: scene_graph = self.position_graphs.get(scene) tested_vertices = self.filter_vertices_from_graph( scene_graph, **tested_filters) anchor_vertices = self.filter_vertices_from_graph( scene_graph, **anchor_filters) for tested_vertex in tested_vertices: for anchor_vertex in anchor_vertices: moves = {'top': 0, 'bottom': 0, 'left': 0, 'right': 0} path = scene_graph.get_shortest_paths(anchor_vertex, tested_vertex, output='epath') if path: path = path[0] for edge in path: moves[scene_graph.es[edge]['direction']] += 1 if self.validate_moves(moves, direction_data): pass_counter += 1 if isinstance( min_required_to_pass, int ) and pass_counter >= min_required_to_pass: return True else: reject_counter += 1 else: Log.debug('Tested and Anchor have no direct path') if pass_counter > 0 and reject_counter == 0: return True else: return False else: Log.debug('None of the scenes contain both anchor and tested SKUs') return False @staticmethod def filter_vertices_from_graph(graph, **filters): """ This function is given a graph and returns a set of vertices calculated by a given set of filters. """ vertices_indexes = None for field in filters.keys(): field_vertices = set() values = filters[field] if isinstance( filters[field], (list, tuple)) else [filters[field]] for value in values: vertices = [v.index for v in graph.vs.select(**{field: value})] field_vertices = field_vertices.union(vertices) if vertices_indexes is None: vertices_indexes = field_vertices else: vertices_indexes = vertices_indexes.intersection( field_vertices) vertices_indexes = vertices_indexes if vertices_indexes is not None else [ v.index for v in graph.vs ] return list(vertices_indexes) @staticmethod def validate_moves(moves, direction_data): """ This function checks whether the distance between the anchor and the tested SKUs fits the requirements. """ direction_data = direction_data if isinstance( direction_data, (list, tuple)) else [direction_data] validated = False for data in direction_data: data_validated = True for direction in moves.keys(): allowed_moves = data.get(direction, (0, 0)) min_move, max_move = allowed_moves if isinstance( allowed_moves, tuple) else (0, allowed_moves) if not min_move <= moves[direction] <= max_move: data_validated = False break if data_validated: validated = True break return validated def calculate_block_together(self, allowed_products_filters=None, include_empty=EXCLUDE_EMPTY, **filters): """ :param allowed_products_filters: These are the parameters which are allowed to corrupt the block without failing it. :param include_empty: This parameter dictates whether or not to discard Empty-typed products. :param filters: These are the parameters which the blocks are checked for. :return: True - if in (at least) one of the scenes all the relevant SKUs are grouped together in one block; otherwise - returns False. """ relevant_scenes = self.scif[self.get_filter_condition( self.scif, **filters)]['scene_id'].unique().tolist() for field in filters.keys(): if field not in self.all_products.columns: filters.pop(field, None) if relevant_scenes: for scene in relevant_scenes: scene_graph = self.position_graphs.get(scene).copy() relevant_vertices = None for field in filters.keys(): values = filters[field] if isinstance( filters[field], (list, float)) else [filters[field]] vertices_for_field = set() for value in values: condition = {field: value} vertices = { v.index for v in scene_graph.vs.select(**condition) } vertices_for_field = vertices_for_field.union(vertices) if relevant_vertices is None: relevant_vertices = vertices_for_field else: relevant_vertices = relevant_vertices.intersection( vertices_for_field) if allowed_products_filters: allowed_vertices = None for field in allowed_products_filters.keys(): values = allowed_products_filters[field] \ if isinstance(allowed_products_filters[field], (list, float)) \ else [allowed_products_filters[field]] vertices_for_field = set() for value in values: condition = {field: value} vertices = { v.index for v in scene_graph.vs.select(**condition) } vertices_for_field = vertices_for_field.union( vertices) if allowed_vertices is None: allowed_vertices = vertices_for_field else: allowed_vertices = allowed_vertices.intersection( vertices_for_field) if include_empty == self.EXCLUDE_EMPTY: try: empty_vertices = { v.index for v in scene_graph.vs.select( product_type='Empty') } allowed_vertices = allowed_vertices.union( empty_vertices) except KeyError: Log.warning( "Entity 'product_type' doesn't appear in the vertex attributes" ) relevant_vertices = relevant_vertices if relevant_vertices is not None else set( ) allowed_vertices = allowed_vertices if allowed_vertices is not None else set( ) else: allowed_vertices = [] all_vertices = {v.index for v in scene_graph.vs} vertices_to_remove = all_vertices.difference( relevant_vertices.union(allowed_vertices)) scene_graph.delete_vertices(vertices_to_remove) # removing clusters including 'allowed' SKUs only clusters = [ cluster for cluster in scene_graph.clusters() if set(cluster).difference(allowed_vertices) ] if len(clusters) == 1: return True else: Log.debug('None of the scenes contain relevant SKUs') return False def get_filter_condition(self, df, **filters): """ :param df: The data frame to be filters. :param filters: These are the parameters which the data frame is filtered by. Every parameter would be a tuple of the value and an include/exclude flag. INPUT EXAMPLE (1): manufacturer_name = ('Diageo', DIAGEOAUGILLETTEUSGENERALToolBox.INCLUDE_FILTER) INPUT EXAMPLE (2): manufacturer_name = 'Diageo' :return: a filtered Scene Item Facts data frame. """ if 'facings' in df.keys(): filter_condition = (df['facings'] > 0) else: filter_condition = None for field in filters.keys(): if field in df.keys(): if isinstance(filters[field], tuple): value, exclude_or_include = filters[field] else: value, exclude_or_include = filters[ field], self.INCLUDE_FILTER if not isinstance(value, list): value = [value] if exclude_or_include == self.INCLUDE_FILTER: condition = (df[field].isin(value)) elif exclude_or_include == self.EXCLUDE_FILTER: condition = (~df[field].isin(value)) else: continue if filter_condition is None: filter_condition = condition else: filter_condition &= condition else: Log.warning('field {} is not in the Data Frame'.format(field)) return filter_condition def separate_location_filters_from_product_filters(self, **filters): """ This function gets scene-item-facts filters of all kinds, extracts the relevant scenes by the location filters, and returns them along with the product filters only. """ location_filters = {} for field in filters.keys(): if field not in self.all_products.columns: location_filters[field] = filters.pop(field) relevant_scenes = self.scif[self.get_filter_condition( self.scif, **location_filters)]['scene_id'].unique() return filters, relevant_scenes @staticmethod def get_json_data(file_path, sheet_name=None, skiprows=0): """ This function gets a file's path and extract its content into a JSON. """ data = {} if sheet_name: sheet_names = [sheet_name] else: sheet_names = xlrd.open_workbook(file_path).sheet_names() for sheet_name in sheet_names: try: output = pd.read_excel(file_path, sheetname=sheet_name, skiprows=skiprows) except xlrd.biffh.XLRDError: Log.warning('Sheet name {} doesn\'t exist'.format(sheet_name)) return None output = output.to_json(orient='records') output = json.loads(output) data[sheet_name] = output if sheet_name: data = data[sheet_name] elif len(data.keys()) == 1: data = data[data.keys()[0]] return data
class MarsUkGENERALToolBox: EXCLUDE_FILTER = 0 INCLUDE_FILTER = 1 CONTAIN_FILTER = 2 EXCLUDE_EMPTY = False INCLUDE_EMPTY = True STRICT_MODE = ALL = 1000 EMPTY = 'Empty' DEFAULT = 'Default' TOP = 'Top' BOTTOM = 'Bottom' def __init__(self, data_provider, output, **kwargs): self.k_engine = BaseCalculationsGroup(data_provider, output) self.data_provider = data_provider self.project_name = self.data_provider.project_name self.session_uid = self.data_provider.session_uid self.scif = self.data_provider[Data.SCENE_ITEM_FACTS] self.all_products = self.data_provider[Data.ALL_PRODUCTS] for data in kwargs.keys(): setattr(self, data, kwargs[data]) @property def position_graphs(self): if not hasattr(self, '_position_graphs'): self._position_graphs = MarsUkPositionGraphs(self.data_provider) return self._position_graphs @property def match_product_in_scene(self): if not hasattr(self, '_match_product_in_scene'): self._match_product_in_scene = self.position_graphs.match_product_in_scene return self._match_product_in_scene def calculate_availability(self, **filters): """ :param filters: These are the parameters which the data frame is filtered by. :return: Total number of SKUs facings appeared in the filtered Scene Item Facts data frame. """ filtered_scif = self.scif[self.get_filter_condition( self.scif, **filters)] availability = filtered_scif['facings'].sum() return availability def calculate_assortment(self, assortment_entity='product_ean_code', minimum_assortment_for_entity=1, **filters): """ :param assortment_entity: This is the entity on which the assortment is calculated. :param minimum_assortment_for_entity: This is the number of assortment per each unique entity in order for it to be counted in the final assortment result (default is 1). :param filters: These are the parameters which the data frame is filtered by. :return: Number of unique SKUs appeared in the filtered Scene Item Facts data frame. """ if set(filters.keys()).difference(self.scif.keys()): filtered_df = self.match_product_in_scene[ self.get_filter_condition(self.match_product_in_scene, **filters)] else: filtered_df = self.scif[self.get_filter_condition( self.scif, **filters)] if minimum_assortment_for_entity == 1: assortment = len(filtered_df[assortment_entity].unique()) else: assortment = 0 for entity_id in filtered_df[assortment_entity].unique(): assortment_for_entity = filtered_df[ filtered_df[assortment_entity] == entity_id] if 'facings' in filtered_df.columns: assortment_for_entity = assortment_for_entity[ 'facings'].sum() else: assortment_for_entity = len(assortment_for_entity) if assortment_for_entity >= minimum_assortment_for_entity: assortment += 1 return assortment def calculate_share_of_shelf(self, sos_filters=None, include_empty=EXCLUDE_EMPTY, **general_filters): """ :param sos_filters: These are the parameters on which ths SOS is calculated (out of the general DF). :param include_empty: This dictates whether Empty-typed SKUs are included in the calculation. :param general_filters: These are the parameters which the general data frame is filtered by. :return: The ratio of the SOS. """ if include_empty == self.EXCLUDE_EMPTY: general_filters['product_type'] = (self.EMPTY, self.EXCLUDE_FILTER) pop_filter = self.get_filter_condition(self.scif, **general_filters) subset_filter = self.get_filter_condition(self.scif, **sos_filters) try: ratio = self.k_engine.calculate_sos_by_facings( pop_filter=pop_filter, subset_filter=subset_filter) except: ratio = 0 if not isinstance(ratio, (float, int)): ratio = 0 return ratio def calculate_linear_share_of_shelf(self, sos_filters=None, include_empty=EXCLUDE_EMPTY, **general_filters): """ :param sos_filters: These are the parameters on which ths SOS is calculated (out of the general DF). :param include_empty: This dictates whether Empty-typed SKUs are included in the calculation. :param general_filters: These are the parameters which the general data frame is filtered by. :return: The ratio of the SOS. """ if include_empty == self.EXCLUDE_EMPTY: general_filters['product_type'] = (self.EMPTY, self.EXCLUDE_FILTER) pop_filter = self.get_filter_condition(self.scif, **general_filters) subset_filter = self.get_filter_condition(self.scif, **sos_filters) try: ratio = self.k_engine.calculate_sos_by_linear( pop_filter=pop_filter, subset_filter=subset_filter) except: ratio = 0 if not isinstance(ratio, (float, int)): ratio = 0 return ratio def calculate_shelf_level_assortment(self, shelves, from_top_or_bottom=TOP, **filters): """ :param shelves: A shelf number (of type int or string), or a list of shelves (of type int or string). :param from_top_or_bottom: TOP for default shelf number (counted from top) or BOTTOM for shelf number counted from bottom. :param filters: These are the parameters which the data frame is filtered by. :return: Number of unique SKUs appeared in the filtered condition. """ shelves = shelves if isinstance(shelves, list) else [shelves] shelves = [int(shelf) for shelf in shelves] if from_top_or_bottom == self.TOP: assortment = self.calculate_assortment(shelf_number=shelves, **filters) else: assortment = self.calculate_assortment( shelf_number_from_bottom=shelves, **filters) return assortment def get_filter_condition(self, df, **filters): """ :param df: The data frame to be filters. :param filters: These are the parameters which the data frame is filtered by. Every parameter would be a tuple of the value and an include/exclude flag. INPUT EXAMPLE (1): manufacturer_name = ('Diageo', DIAGEOAUGENERALToolBox.INCLUDE_FILTER) INPUT EXAMPLE (2): manufacturer_name = 'Diageo' :return: a filtered Scene Item Facts data frame. """ if 'facings' in df.keys(): filter_condition = (df['facings'] > 0) else: filter_condition = None for field in filters.keys(): if field in df.keys(): if isinstance(filters[field], tuple): value, exclude_or_include = filters[field] else: value, exclude_or_include = filters[ field], self.INCLUDE_FILTER if not value: continue if not isinstance(value, list): value = [value] if exclude_or_include == self.INCLUDE_FILTER: condition = (df[field].isin(value)) elif exclude_or_include == self.EXCLUDE_FILTER: condition = (~df[field].isin(value)) elif exclude_or_include == self.CONTAIN_FILTER: condition = (df[field].str.contains(value[0], regex=False)) for v in value[1:]: condition |= df[field].str.contains(v, regex=False) else: continue if filter_condition is None: filter_condition = condition else: filter_condition &= condition else: Log.warning('field {} is not in the Data Frame'.format(field)) return filter_condition def separate_location_filters_from_product_filters(self, **filters): """ This function gets scene-item-facts filters of all kinds, extracts the relevant scenes by the location filters, and returns them along with the product filters only. """ location_filters = {} for field in filters.keys(): if field not in self.all_products.columns: location_filters[field] = filters.pop(field) relevant_scenes = self.scif[self.get_filter_condition( self.scif, **location_filters)]['scene_id'].unique() return filters, relevant_scenes
class PEPSICOBR_SANDGENERALToolBox: EXCLUDE_FILTER = 0 INCLUDE_FILTER = 1 CONTAIN_FILTER = 2 EXCLUDE_EMPTY = False INCLUDE_EMPTY = True STRICT_MODE = ALL = 1000 EMPTY = 'Empty' DEFAULT = 'Default' TOP = 'Top' BOTTOM = 'Bottom' def __init__(self, data_provider, output, rds_conn=None, ignore_stacking=False, front_facing=False, **kwargs): self.k_engine = BaseCalculationsGroup(data_provider, output) self.rds_conn = rds_conn self.data_provider = data_provider self.project_name = self.data_provider.project_name self.session_uid = self.data_provider.session_uid self.scif = self.data_provider[Data.SCENE_ITEM_FACTS] self.all_products = self.data_provider[Data.ALL_PRODUCTS] self.survey_response = self.data_provider[Data.SURVEY_RESPONSES] self.scenes_info = self.data_provider[Data.SCENES_INFO].merge( self.data_provider[Data.ALL_TEMPLATES], how='left', on='template_fk', suffixes=['', '_y']) self.ignore_stacking = ignore_stacking self.facings_field = 'facings' if not self.ignore_stacking else 'facings_ign_stack' self.front_facing = front_facing for data in kwargs.keys(): setattr(self, data, kwargs[data]) if self.front_facing: self.scif = self.scif[self.scif['front_face_count'] == 1] @property def position_graphs(self): if not hasattr(self, '_position_graphs'): self._position_graphs = PEPSICOBR_SANDPositionGraphs( self.data_provider, rds_conn=self.rds_conn) return self._position_graphs @property def match_product_in_scene(self): if not hasattr(self, '_match_product_in_scene'): self._match_product_in_scene = self.position_graphs.match_product_in_scene if self.front_facing: self._match_product_in_scene = self._match_product_in_scene[ self._match_product_in_scene['front_facing'] == 'Y'] if self.ignore_stacking: self._match_product_in_scene = self._match_product_in_scene[ self._match_product_in_scene['stacking_layer'] == 1] return self._match_product_in_scene def get_survey_answer(self, survey_data, answer_field=None): """ :param survey_data: 1) str - The name of the survey in the DB. 2) tuple - (The field name, the field value). For example: ('question_fk', 13) :param answer_field: The DB field from which the answer is extracted. Default is the usual hierarchy. :return: The required survey response. """ if not isinstance(survey_data, (list, tuple)): entity = 'question_text' value = survey_data else: entity, value = survey_data survey = self.survey_response[self.survey_response[entity] == value] if survey.empty: return None survey = survey.iloc[0] if answer_field is None or answer_field not in survey.keys(): answer_field = 'selected_option_text' if survey[ 'selected_option_text'] else 'number_value' survey_answer = survey[answer_field] return survey_answer def check_survey_answer(self, survey_text, target_answer): """ :param survey_text: 1) str - The name of the survey in the DB. 2) tuple - (The field name, the field value). For example: ('question_fk', 13) :param target_answer: The required answer/s for the KPI to pass. :return: True if the answer matches the target; otherwise - False. """ if not isinstance(survey_text, (list, tuple)): entity = 'question_text' value = survey_text else: entity, value = survey_text value = [value] if not isinstance(value, list) else value survey_data = self.survey_response[self.survey_response[entity].isin( value)] if survey_data.empty: Log.warning('Survey with {} = {} doesn\'t exist'.format( entity, value)) return None answer_field = 'selected_option_text' if not survey_data[ 'selected_option_text'].empty else 'number_value' target_answers = [target_answer ] if not isinstance(target_answer, (list, tuple)) else target_answer survey_answers = survey_data[answer_field].values.tolist() for answer in target_answers: if answer in survey_answers: return True return False def calculate_number_of_scenes(self, **filters): """ :param filters: These are the parameters which the data frame is filtered by. :return: The number of scenes matching the filtered Scene Item Facts data frame. """ if filters: scene_data = self.scenes_info[self.get_filter_condition( self.scenes_info, **filters)] else: scene_data = self.scenes_info number_of_scenes = len(scene_data['scene_fk'].unique().tolist()) return number_of_scenes def calculate_availability(self, **filters): """ :param filters: These are the parameters which the data frame is filtered by. :return: Total number of SKUs facings appeared in the filtered Scene Item Facts data frame. """ if set(filters.keys()).difference(self.scif.keys()): filtered_df = self.match_product_in_scene[ self.get_filter_condition(self.match_product_in_scene, **filters)] else: filtered_df = self.scif[self.get_filter_condition( self.scif, **filters)] if self.facings_field in filtered_df.columns: availability = filtered_df[self.facings_field].sum() else: availability = len(filtered_df) return availability def calculate_assortment(self, assortment_entity='product_ean_code', minimum_assortment_for_entity=1, **filters): """ :param assortment_entity: This is the entity on which the assortment is calculated. :param minimum_assortment_for_entity: This is the number of assortment per each unique entity in order for it to be counted in the final assortment result (default is 1). :param filters: These are the parameters which the data frame is filtered by. :return: Number of unique SKUs appeared in the filtered Scene Item Facts data frame. """ if set(filters.keys()).difference(self.scif.keys()): filtered_df = self.match_product_in_scene[ self.get_filter_condition(self.match_product_in_scene, **filters)] else: filtered_df = self.scif[self.get_filter_condition( self.scif, **filters)] if minimum_assortment_for_entity == 1: assortment = len(filtered_df[assortment_entity].unique()) else: assortment = 0 for entity_id in filtered_df[assortment_entity].unique(): assortment_for_entity = filtered_df[ filtered_df[assortment_entity] == entity_id] if self.facings_field in filtered_df.columns: assortment_for_entity = assortment_for_entity[ self.facings_field].sum() else: assortment_for_entity = len(assortment_for_entity) if assortment_for_entity >= minimum_assortment_for_entity: assortment += 1 return assortment def calculate_share_of_shelf(self, sos_filters=None, include_empty=EXCLUDE_EMPTY, **general_filters): """ :param sos_filters: These are the parameters on which ths SOS is calculated (out of the general DF). :param include_empty: This dictates whether Empty-typed SKUs are included in the calculation. :param general_filters: These are the parameters which the general data frame is filtered by. :return: The ratio of the Facings SOS. """ if include_empty == self.EXCLUDE_EMPTY: general_filters['product_type'] = (self.EMPTY, self.EXCLUDE_FILTER) pop_filter = self.get_filter_condition(self.scif, **general_filters) subset_filter = self.get_filter_condition(self.scif, **sos_filters) try: ratio = self.k_engine.calculate_sos_by_facings( pop_filter=pop_filter, subset_filter=subset_filter) except: ratio = 0 if not isinstance(ratio, (float, int)): ratio = 0 return ratio def calculate_linear_share_of_shelf(self, sos_filters, include_empty=EXCLUDE_EMPTY, **general_filters): """ :param sos_filters: These are the parameters on which ths SOS is calculated (out of the general DF). :param include_empty: This dictates whether Empty-typed SKUs are included in the calculation. :param general_filters: These are the parameters which the general data frame is filtered by. :return: The Linear SOS ratio. """ if include_empty == self.EXCLUDE_EMPTY: general_filters['product_type'] = (self.EMPTY, self.EXCLUDE_FILTER) numerator_width = self.calculate_share_space_length( **dict(sos_filters, **general_filters)) denominator_width = self.calculate_share_space_length( **general_filters) if denominator_width == 0: ratio = 0 else: ratio = numerator_width / float(denominator_width) return ratio def calculate_share_space_length(self, **filters): """ :param filters: These are the parameters which the data frame is filtered by. :return: The total shelf width (in mm) the relevant facings occupy. """ filtered_matches = self.match_product_in_scene[ self.get_filter_condition(self.match_product_in_scene, **filters)] space_length = filtered_matches['width_mm_advance'].sum() return space_length def calculate_products_on_edge(self, min_number_of_facings=1, min_number_of_shelves=1, **filters): """ :param min_number_of_facings: Minimum number of edge facings for KPI to pass. :param min_number_of_shelves: Minimum number of different shelves with edge facings for KPI to pass. :param filters: This are the parameters which dictate the relevant SKUs for the edge calculation. :return: A tuple: (Number of scenes which pass, Total number of relevant scenes) """ filters, relevant_scenes = self.separate_location_filters_from_product_filters( **filters) if len(relevant_scenes) == 0: return 0, 0 number_of_edge_scenes = 0 for scene in relevant_scenes: edge_facings = pd.DataFrame( columns=self.match_product_in_scene.columns) matches = self.match_product_in_scene[ self.match_product_in_scene['scene_fk'] == scene] for shelf in matches['shelf_number'].unique(): shelf_matches = matches[matches['shelf_number'] == shelf] if not shelf_matches.empty: shelf_matches = shelf_matches.sort_values( by=['bay_number', 'facing_sequence_number']) edge_facings = edge_facings.append(shelf_matches.iloc[0]) if len(edge_facings) > 1: edge_facings = edge_facings.append( shelf_matches.iloc[-1]) edge_facings = edge_facings[self.get_filter_condition( edge_facings, **filters)] if len(edge_facings) >= min_number_of_facings \ and len(edge_facings['shelf_number'].unique()) >= min_number_of_shelves: number_of_edge_scenes += 1 return number_of_edge_scenes, len(relevant_scenes) def calculate_shelf_level_assortment(self, shelves, from_top_or_bottom=TOP, **filters): """ :param shelves: A shelf number (of type int or string), or a list of shelves (of type int or string). :param from_top_or_bottom: TOP for default shelf number (counted from top) or BOTTOM for shelf number counted from bottom. :param filters: These are the parameters which the data frame is filtered by. :return: Number of unique SKUs appeared in the filtered condition. """ shelves = shelves if isinstance(shelves, list) else [shelves] shelves = [int(shelf) for shelf in shelves] if from_top_or_bottom == self.TOP: assortment = self.calculate_assortment(shelf_number=shelves, **filters) else: assortment = self.calculate_assortment( shelf_number_from_bottom=shelves, **filters) return assortment def calculate_eye_level_assortment(self, eye_level_configurations=DEFAULT, min_number_of_products=ALL, **filters): """ :param eye_level_configurations: A data frame containing information about shelves to ignore (==not eye level) for every number of shelves in each bay. :param min_number_of_products: Minimum number of eye level unique SKUs for KPI to pass. :param filters: This are the parameters which dictate the relevant SKUs for the eye-level calculation. :return: A tuple: (Number of scenes which pass, Total number of relevant scenes) """ filters, relevant_scenes = self.separate_location_filters_from_product_filters( **filters) if len(relevant_scenes) == 0: return 0, 0 if eye_level_configurations == self.DEFAULT: if hasattr(self, 'eye_level_configurations'): eye_level_configurations = self.eye_level_configurations else: Log.error('Eye-level configurations are not set up') return False number_of_products = len(self.all_products[self.get_filter_condition( self.all_products, **filters)]['product_ean_code']) min_shelf, max_shelf, min_ignore, max_ignore = eye_level_configurations.columns number_of_eye_level_scenes = 0 for scene in relevant_scenes: eye_level_facings = pd.DataFrame( columns=self.match_product_in_scene.columns) matches = self.match_product_in_scene[ self.match_product_in_scene['scene_fk'] == scene] for bay in matches['bay_number'].unique(): bay_matches = matches[matches['bay_number'] == bay] number_of_shelves = bay_matches['shelf_number'].max() configuration = eye_level_configurations[ (eye_level_configurations[min_shelf] <= number_of_shelves) & (eye_level_configurations[max_shelf] >= number_of_shelves)] if not configuration.empty: configuration = configuration.iloc[0] else: configuration = {min_ignore: 0, max_ignore: 0} min_include = configuration[min_ignore] + 1 max_include = number_of_shelves - configuration[max_ignore] eye_level_shelves = bay_matches[ bay_matches['shelf_number'].between( min_include, max_include)] eye_level_facings = eye_level_facings.append(eye_level_shelves) eye_level_assortment = len( eye_level_facings[self.get_filter_condition( eye_level_facings, **filters)]['product_ean_code']) if min_number_of_products == self.ALL: min_number_of_products = number_of_products if eye_level_assortment >= min_number_of_products: number_of_eye_level_scenes += 1 return number_of_eye_level_scenes, len(relevant_scenes) def shelf_level_assortment(self, min_number_of_products, shelf_target, strict=True, **filters): filters, relevant_scenes = self.separate_location_filters_from_product_filters( **filters) if len(relevant_scenes) == 0: relevant_scenes = self.scif['scene_fk'].unique().tolist() number_of_products = len(self.all_products[self.get_filter_condition( self.all_products, **filters)]['product_ean_code']) result = 0 # Default score is FALSE for scene in relevant_scenes: eye_level_facings = pd.DataFrame( columns=self.match_product_in_scene.columns) matches = pd.merge(self.match_product_in_scene[ self.match_product_in_scene['scene_fk'] == scene], self.all_products, on=['product_fk']) for bay in matches['bay_number'].unique(): bay_matches = matches[matches['bay_number'] == bay] products_in_target_shelf = bay_matches[ (bay_matches['shelf_number'].isin(shelf_target)) & (bay_matches['product_ean_code'].isin(number_of_products))] eye_level_facings = eye_level_facings.append( products_in_target_shelf) eye_level_assortment = len( eye_level_facings[self.get_filter_condition( eye_level_facings, **filters)]['product_ean_code']) if eye_level_assortment >= min_number_of_products: result = 1 return result def calculate_product_sequence(self, sequence_filters, direction, empties_allowed=True, irrelevant_allowed=False, min_required_to_pass=STRICT_MODE, custom_graph=None, **general_filters): """ :param sequence_filters: One of the following: 1- a list of dictionaries, each containing the filters values of an organ in the sequence. 2- a tuple of (entity_type, [value1, value2, value3...]) in case every organ in the sequence is defined by only one filter (and of the same entity, such as brand_name, etc). :param direction: left/right/top/bottom - the direction of the sequence. :param empties_allowed: This dictates whether or not the sequence can be interrupted by Empty facings. :param irrelevant_allowed: This dictates whether or not the sequence can be interrupted by facings which are not in the sequence. :param min_required_to_pass: The number of sequences needed to exist in order for KPI to pass. If STRICT_MODE is activated, the KPI passes only if it has NO rejects. :param custom_graph: A filtered Positions graph - given in case only certain vertices need to be checked. :param general_filters: These are the parameters which the general data frame is filtered by. :return: True if the KPI passes; otherwise False. """ if isinstance(sequence_filters, (list, tuple)) and isinstance(sequence_filters[0], (str, unicode)): sequence_filters = [{ sequence_filters[0]: values } for values in sequence_filters[1]] pass_counter = 0 reject_counter = 0 if not custom_graph: filtered_scif = self.scif[self.get_filter_condition( self.scif, **general_filters)] scenes = set(filtered_scif['scene_id'].unique()) for filters in sequence_filters: scene_for_filters = filtered_scif[self.get_filter_condition( filtered_scif, **filters)]['scene_id'].unique() scenes = scenes.intersection(scene_for_filters) if not scenes: Log.debug( 'None of the scenes include products from all types relevant for sequence' ) return True for scene in scenes: scene_graph = self.position_graphs.get(scene) scene_passes, scene_rejects = self.calculate_sequence_for_graph( scene_graph, sequence_filters, direction, empties_allowed, irrelevant_allowed) pass_counter += scene_passes reject_counter += scene_rejects if pass_counter >= min_required_to_pass: return True elif min_required_to_pass == self.STRICT_MODE and reject_counter > 0: return False else: scene_passes, scene_rejects = self.calculate_sequence_for_graph( custom_graph, sequence_filters, direction, empties_allowed, irrelevant_allowed) pass_counter += scene_passes reject_counter += scene_rejects if pass_counter >= min_required_to_pass or reject_counter == 0: return True else: return False def calculate_sequence_for_graph(self, graph, sequence_filters, direction, empties_allowed, irrelevant_allowed): """ This function checks for a sequence given a position graph (either a full scene graph or a customized one). """ pass_counter = 0 reject_counter = 0 # removing unnecessary edges filtered_scene_graph = graph.copy() edges_to_remove = filtered_scene_graph.es.select( direction_ne=direction) filtered_scene_graph.delete_edges( [edge.index for edge in edges_to_remove]) reversed_scene_graph = graph.copy() edges_to_remove = reversed_scene_graph.es.select( direction_ne=self._reverse_direction(direction)) reversed_scene_graph.delete_edges( [edge.index for edge in edges_to_remove]) vertices_list = [] for filters in sequence_filters: vertices_list.append( self.filter_vertices_from_graph(graph, **filters)) tested_vertices, sequence_vertices = vertices_list[0], vertices_list[ 1:] vertices_list = reduce(lambda x, y: x + y, sequence_vertices) sequences = [] for vertex in tested_vertices: previous_sequences = self.get_positions_by_direction( reversed_scene_graph, vertex) if previous_sequences and set(vertices_list).intersection( reduce(lambda x, y: x + y, previous_sequences)): reject_counter += 1 continue next_sequences = self.get_positions_by_direction( filtered_scene_graph, vertex) sequences.extend(next_sequences) sequences = self._filter_sequences(sequences) for sequence in sequences: all_products_appeared = True empties_found = False irrelevant_found = False full_sequence = False broken_sequence = False current_index = 0 previous_vertices = list(tested_vertices) for vertices in sequence_vertices: if not set(sequence).intersection(vertices): all_products_appeared = False break for vindex in sequence: vertex = graph.vs[vindex] if vindex not in vertices_list and vindex not in tested_vertices: if current_index < len(sequence_vertices): if vertex['product_type'] == self.EMPTY: empties_found = True else: irrelevant_found = True elif vindex in previous_vertices: pass elif vindex in sequence_vertices[current_index]: previous_vertices = list(sequence_vertices[current_index]) current_index += 1 else: broken_sequence = True if current_index == len(sequence_vertices): full_sequence = True if broken_sequence: reject_counter += 1 elif full_sequence: if not empties_allowed and empties_found: reject_counter += 1 elif not irrelevant_allowed and irrelevant_found: reject_counter += 1 elif all_products_appeared: pass_counter += 1 return pass_counter, reject_counter @staticmethod def _reverse_direction(direction): """ This function returns the opposite of a given direction. """ if direction == 'top': new_direction = 'bottom' elif direction == 'bottom': new_direction = 'top' elif direction == 'left': new_direction = 'right' elif direction == 'right': new_direction = 'left' else: new_direction = direction return new_direction def get_positions_by_direction(self, graph, vertex_index): """ This function gets a filtered graph (contains only edges of a relevant direction) and a Vertex index, and returns all sequences starting in it (until it gets to a dead end). """ sequences = [] edges = [graph.es[e] for e in graph.incident(vertex_index)] next_vertices = [edge.target for edge in edges] for vertex in next_vertices: next_sequences = self.get_positions_by_direction(graph, vertex) if not next_sequences: sequences.append([vertex]) else: for sequence in next_sequences: sequences.append([vertex] + sequence) return sequences @staticmethod def _filter_sequences(sequences): """ This function receives a list of sequences (lists of indexes), and removes sequences which can be represented by a shorter sequence (which is also in the list). """ if not sequences: return sequences sequences = sorted(sequences, key=lambda x: (x[-1], len(x))) filtered_sequences = [sequences[0]] for sequence in sequences[1:]: if sequence[-1] != filtered_sequences[-1][-1]: filtered_sequences.append(sequence) return filtered_sequences def calculate_non_proximity(self, tested_filters, anchor_filters, allowed_diagonal=False, **general_filters): """ :param tested_filters: The tested SKUs' filters. :param anchor_filters: The anchor SKUs' filters. :param allowed_diagonal: True - a tested SKU can be in a direct diagonal from an anchor SKU in order for the KPI to pass; False - a diagonal proximity is NOT allowed. :param general_filters: These are the parameters which the general data frame is filtered by. :return: """ direction_data = [] if allowed_diagonal: direction_data.append({'top': (0, 1), 'bottom': (0, 1)}) direction_data.append({'right': (0, 1), 'left': (0, 1)}) else: direction_data.append({ 'top': (0, 1), 'bottom': (0, 1), 'right': (0, 1), 'left': (0, 1) }) is_proximity = self.calculate_relative_position(tested_filters, anchor_filters, direction_data, min_required_to_pass=1, **general_filters) return not is_proximity def calculate_relative_position(self, tested_filters, anchor_filters, direction_data, min_required_to_pass=1, **general_filters): """ :param tested_filters: The tested SKUs' filters. :param anchor_filters: The anchor SKUs' filters. :param direction_data: The allowed distance between the tested and anchor SKUs. In form: {'top': 4, 'bottom: 0, 'left': 100, 'right': 0} Alternative form: {'top': (0, 1), 'bottom': (1, 1000), ...} - As range. :param min_required_to_pass: The number of appearances needed to be True for relative position in order for KPI to pass. If all appearances are required: ==a string or a big number. :param general_filters: These are the parameters which the general data frame is filtered by. :return: True if (at least) one pair of relevant SKUs fits the distance requirements; otherwise - returns False. """ filtered_scif = self.scif[self.get_filter_condition( self.scif, **general_filters)] tested_scenes = filtered_scif[self.get_filter_condition( filtered_scif, **tested_filters)]['scene_id'].unique() anchor_scenes = filtered_scif[self.get_filter_condition( filtered_scif, **anchor_filters)]['scene_id'].unique() relevant_scenes = set(tested_scenes).intersection(anchor_scenes) if relevant_scenes: pass_counter = 0 reject_counter = 0 for scene in relevant_scenes: scene_graph = self.position_graphs.get(scene) tested_vertices = self.filter_vertices_from_graph( scene_graph, **tested_filters) anchor_vertices = self.filter_vertices_from_graph( scene_graph, **anchor_filters) for tested_vertex in tested_vertices: for anchor_vertex in anchor_vertices: moves = {'top': 0, 'bottom': 0, 'left': 0, 'right': 0} path = scene_graph.get_shortest_paths(anchor_vertex, tested_vertex, output='epath') if path: path = path[0] for edge in path: moves[scene_graph.es[edge]['direction']] += 1 if self.validate_moves(moves, direction_data): pass_counter += 1 if isinstance( min_required_to_pass, int ) and pass_counter >= min_required_to_pass: return True else: reject_counter += 1 else: Log.debug('Tested and Anchor have no direct path') if pass_counter > 0 and reject_counter == 0: return True else: return False else: Log.debug('None of the scenes contain both anchor and tested SKUs') return False def filter_vertices_from_graph(self, graph, **filters): """ This function is given a graph and returns a set of vertices calculated by a given set of filters. """ vertices_indexes = None for field in filters.keys(): field_vertices = set() values = filters[field] if isinstance( filters[field], (list, tuple)) else [filters[field]] for value in values: vertices = [v.index for v in graph.vs.select(**{field: value})] field_vertices = field_vertices.union(vertices) if vertices_indexes is None: vertices_indexes = field_vertices else: vertices_indexes = vertices_indexes.intersection( field_vertices) vertices_indexes = vertices_indexes if vertices_indexes is not None else [ v.index for v in graph.vs ] if self.front_facing: front_facing_vertices = [ v.index for v in graph.vs.select(front_facing='Y') ] vertices_indexes = set(vertices_indexes).intersection( front_facing_vertices) return list(vertices_indexes) @staticmethod def validate_moves(moves, direction_data): """ This function checks whether the distance between the anchor and the tested SKUs fits the requirements. """ direction_data = direction_data if isinstance( direction_data, (list, tuple)) else [direction_data] validated = False for data in direction_data: data_validated = True for direction in moves.keys(): allowed_moves = data.get(direction, (0, 0)) min_move, max_move = allowed_moves if isinstance( allowed_moves, tuple) else (0, allowed_moves) if not min_move <= moves[direction] <= max_move: data_validated = False break if data_validated: validated = True break return validated def calculate_block_together(self, allowed_products_filters=None, include_empty=EXCLUDE_EMPTY, minimum_block_ratio=1, result_by_scene=False, **filters): """ :param allowed_products_filters: These are the parameters which are allowed to corrupt the block without failing it. :param include_empty: This parameter dictates whether or not to discard Empty-typed products. :param minimum_block_ratio: The minimum (block number of facings / total number of relevant facings) ratio in order for KPI to pass (if ratio=1, then only one block is allowed). :param result_by_scene: True - The result is a tuple of (number of passed scenes, total relevant scenes); False - The result is True if at least one scene has a block, False - otherwise. :param filters: These are the parameters which the blocks are checked for. :return: see 'result_by_scene' above. """ filters, relevant_scenes = self.separate_location_filters_from_product_filters( **filters) if len(relevant_scenes) == 0: if result_by_scene: return 0, 0 else: Log.debug( 'Block Together: No relevant SKUs were found for these filters {}' .format(filters)) return True number_of_blocked_scenes = 0 cluster_ratios = [] for scene in relevant_scenes: scene_graph = self.position_graphs.get(scene).copy() relevant_vertices = set( self.filter_vertices_from_graph(scene_graph, **filters)) if allowed_products_filters: allowed_vertices = self.filter_vertices_from_graph( scene_graph, **allowed_products_filters) else: allowed_vertices = set() if include_empty == self.EXCLUDE_EMPTY: empty_vertices = { v.index for v in scene_graph.vs.select(product_type='Empty') } allowed_vertices = set(allowed_vertices).union(empty_vertices) all_vertices = {v.index for v in scene_graph.vs} vertices_to_remove = all_vertices.difference( relevant_vertices.union(allowed_vertices)) scene_graph.delete_vertices(vertices_to_remove) # removing clusters including 'allowed' SKUs only clusters = [ cluster for cluster in scene_graph.clusters() if set(cluster).difference(allowed_vertices) ] new_relevant_vertices = self.filter_vertices_from_graph( scene_graph, **filters) for cluster in clusters: relevant_vertices_in_cluster = set(cluster).intersection( new_relevant_vertices) if len(new_relevant_vertices) > 0: cluster_ratio = len(relevant_vertices_in_cluster) / float( len(new_relevant_vertices)) else: cluster_ratio = 0 cluster_ratios.append(cluster_ratio) if cluster_ratio >= minimum_block_ratio: if result_by_scene: number_of_blocked_scenes += 1 break else: if minimum_block_ratio == 1: return True else: all_vertices = {v.index for v in scene_graph.vs} non_cluster_vertices = all_vertices.difference( cluster) scene_graph.delete_vertices(non_cluster_vertices) return cluster_ratio, scene_graph if result_by_scene: return number_of_blocked_scenes, len(relevant_scenes) else: return False if minimum_block_ratio == 1 else max(cluster_ratios) def get_product_unique_position_on_shelf(self, scene_id, shelf_number, include_empty=False, **filters): """ :param scene_id: The scene ID. :param shelf_number: The number of shelf in question (from top). :param include_empty: This dictates whether or not to include empties as valid positions. :param filters: These are the parameters which the unique position is checked for. :return: The position of the first SKU (from the given filters) to appear in the specific shelf. """ shelf_matches = self.match_product_in_scene[ (self.match_product_in_scene['scene_fk'] == scene_id) & (self.match_product_in_scene['shelf_number'] == shelf_number)] if not include_empty: filters['product_type'] = ('Empty', self.EXCLUDE_FILTER) if filters and shelf_matches[self.get_filter_condition( shelf_matches, **filters)].empty: Log.info( "Products of '{}' are not tagged in shelf number {}".format( filters, shelf_number)) return None shelf_matches = shelf_matches.sort_values( by=['bay_number', 'facing_sequence_number']) shelf_matches = shelf_matches.drop_duplicates( subset=['product_ean_code']) positions = [] for m in xrange(len(shelf_matches)): match = shelf_matches.iloc[m] match_name = 'Empty' if match[ 'product_type'] == 'Empty' else match['product_ean_code'] if positions and positions[-1] == match_name: continue positions.append(match_name) return positions def get_filter_condition(self, df, **filters): """ :param df: The data frame to be filters. :param filters: These are the parameters which the data frame is filtered by. Every parameter would be a tuple of the value and an include/exclude flag. INPUT EXAMPLE (1): manufacturer_name = ('Diageo', DIAGEOAUPEPSICOGENERALToolBox.INCLUDE_FILTER) INPUT EXAMPLE (2): manufacturer_name = 'Diageo' :return: a filtered Scene Item Facts data frame. """ if self.facings_field in df.keys(): filter_condition = (df[self.facings_field] > 0) else: filter_condition = None for field in filters.keys(): if field in df.keys(): if isinstance(filters[field], tuple): value, exclude_or_include = filters[field] else: value, exclude_or_include = filters[ field], self.INCLUDE_FILTER if not value: continue if not isinstance(value, list): value = [value] if exclude_or_include == self.INCLUDE_FILTER: condition = (df[field].isin(value)) elif exclude_or_include == self.EXCLUDE_FILTER: condition = (~df[field].isin(value)) elif exclude_or_include == self.CONTAIN_FILTER: condition = (df[field].str.contains(value[0], regex=False)) for v in value[1:]: condition |= df[field].str.contains(v, regex=False) else: continue if filter_condition is None: filter_condition = condition else: filter_condition &= condition else: Log.warning('field {} is not in the Data Frame'.format(field)) return filter_condition def separate_location_filters_from_product_filters(self, **filters): """ This function gets scene-item-facts filters of all kinds, extracts the relevant scenes by the location filters, and returns them along with the product filters only. """ location_filters = {} for field in filters.keys(): if field not in self.all_products.columns and field in self.scif.columns: location_filters[field] = filters.pop(field) relevant_scenes = self.scif[self.get_filter_condition( self.scif, **location_filters)]['scene_id'].unique() return filters, relevant_scenes @staticmethod def get_json_data(file_path, sheet_name=None, skiprows=0): """ This function gets a file's path and extract its content into a JSON. """ data = {} if sheet_name: sheet_names = [sheet_name] else: sheet_names = xlrd.open_workbook(file_path).sheet_names() for sheet_name in sheet_names: try: output = pd.read_excel(file_path, sheetname=sheet_name, skiprows=skiprows) except xlrd.biffh.XLRDError: Log.warning('Sheet name {} doesn\'t exist'.format(sheet_name)) return None output = output.to_json(orient='records') output = json.loads(output) for x in xrange(len(output)): for y in output[x].keys(): output[x][y] = unicode( '' if output[x][y] is None else output[x][y]).strip() if not output[x][y]: output[x].pop(y, None) data[sheet_name] = output if sheet_name: data = data[sheet_name] elif len(data.keys()) == 1: data = data[data.keys()[0]] return data
class MSCGENERALToolBox: EXCLUDE_FILTER = 0 INCLUDE_FILTER = 1 CONTAIN_FILTER = 2 EXCLUDE_EMPTY = False INCLUDE_EMPTY = True STRICT_MODE = ALL = 1000 EMPTY = 'Empty' DEFAULT = 'Default' TOP = 'Top' BOTTOM = 'Bottom' def __init__(self, data_provider, output, rds_conn=None, ignore_stacking=False, front_facing=False, **kwargs): self.k_engine = BaseCalculationsGroup(data_provider, output) self.rds_conn = rds_conn self.data_provider = data_provider self.project_name = self.data_provider.project_name self.session_uid = self.data_provider.session_uid self.scif = self.data_provider[Data.SCENE_ITEM_FACTS] self.match_product_in_scene = self.data_provider[Data.MATCHES] self.all_products = self.data_provider[Data.ALL_PRODUCTS] self.survey_response = self.data_provider[Data.SURVEY_RESPONSES] self.scenes_info = self.data_provider[Data.SCENES_INFO].merge(self.data_provider[Data.ALL_TEMPLATES], how='left', on='template_fk', suffixes=['', '_y']) self.survey_response = self.data_provider[Data.SURVEY_RESPONSES] self.get_atts() self.ignore_stacking = ignore_stacking self.facings_field = 'facings' if not self.ignore_stacking else 'facings_ign_stack' self.front_facing = front_facing for data in kwargs.keys(): setattr(self, data, kwargs[data]) if self.front_facing: self.scif = self.scif[self.scif['front_face_count'] == 1] self._merge_matches_and_all_product() def _merge_matches_and_all_product(self): """ This method merges the all product data with the match product in scene DataFrame """ self.match_product_in_scene = self.match_product_in_scene.merge(self.all_products, on='product_fk', how='left') def get_atts(self): query = MSCQueries.get_product_atts() product_att3 = pd.read_sql_query(query, self.rds_conn.db) self.scif = self.scif.merge(product_att3, how='left', left_on='product_ean_code', right_on='product_ean_code') def get_survey_answer(self, survey_data, answer_field=None): """ :param survey_data: 1) str - The name of the survey in the DB. 2) tuple - (The field name, the field value). For example: ('question_fk', 13) :param answer_field: The DB field from which the answer is extracted. Default is the usual hierarchy. :return: The required survey response. """ if not isinstance(survey_data, (list, tuple)): entity = 'question_text' value = survey_data else: entity, value = survey_data survey = self.survey_response[self.survey_response[entity] == value] if survey.empty: return None survey = survey.iloc[0] if answer_field is None or answer_field not in survey.keys(): answer_field = 'selected_option_text' if survey['selected_option_text'] else 'number_value' survey_answer = survey[answer_field] return survey_answer def check_survey_answer(self, survey_text, target_answer): """ :param survey_text: 1) str - The name of the survey in the DB. 2) tuple - (The field name, the field value). For example: ('question_fk', 13) :param target_answer: The required answer/s for the KPI to pass. :return: True if the answer matches the target; otherwise - False. """ if not isinstance(survey_text, (list, tuple)): entity = 'question_text' value = survey_text else: entity, value = survey_text value = [value] if not isinstance(value, list) else value survey_data = self.survey_response[self.survey_response[entity].isin(value)] if survey_data.empty: Log.warning('Survey with {} = {} doesn\'t exist'.format(entity, value)) return None answer_field = 'selected_option_text' if not survey_data['selected_option_text'].empty else 'number_value' target_answers = [target_answer] if not isinstance(target_answer, (list, tuple)) else target_answer survey_answers = survey_data[answer_field].values.tolist() for answer in target_answers: if answer in survey_answers: return True return False def calculate_number_of_scenes(self, **filters): """ :param filters: These are the parameters which the data frame is filtered by. :return: The number of scenes matching the filtered Scene Item Facts data frame. """ if filters: if set(filters.keys()).difference(self.scenes_info.keys()): scene_data = self.scif[self.get_filter_condition(self.scif, **filters)] else: scene_data = self.scenes_info[self.get_filter_condition(self.scenes_info, **filters)] else: scene_data = self.scenes_info number_of_scenes = len(scene_data['scene_fk'].unique().tolist()) return number_of_scenes def calculate_availability(self, **filters): """ :param filters: These are the parameters which the data frame is filtered by. :return: Total number of SKUs facings appeared in the filtered Scene Item Facts data frame. """ if set(filters.keys()).difference(self.scif.keys()): filtered_df = self.match_product_in_scene[self.get_filter_condition(self.match_product_in_scene, **filters)] else: filtered_df = self.scif[self.get_filter_condition(self.scif, **filters)] if self.facings_field in filtered_df.columns: availability = filtered_df[self.facings_field].sum() else: availability = len(filtered_df) return availability def calculate_assortment(self, assortment_entity='product_ean_code', minimum_assortment_for_entity=1, **filters): """ :param assortment_entity: This is the entity on which the assortment is calculated. :param minimum_assortment_for_entity: This is the number of assortment per each unique entity in order for it to be counted in the final assortment result (default is 1). :param filters: These are the parameters which the data frame is filtered by. :return: Number of unique SKUs appeared in the filtered Scene Item Facts data frame. """ if set(filters.keys()).difference(self.scif.keys()): filtered_df = self.match_product_in_scene[self.get_filter_condition(self.match_product_in_scene, **filters)] else: filtered_df = self.scif[self.get_filter_condition(self.scif, **filters)] if minimum_assortment_for_entity == 1: assortment = len(filtered_df[assortment_entity].unique()) else: assortment = 0 for entity_id in filtered_df[assortment_entity].unique(): assortment_for_entity = filtered_df[filtered_df[assortment_entity] == entity_id] if self.facings_field in filtered_df.columns: assortment_for_entity = assortment_for_entity[self.facings_field].sum() else: assortment_for_entity = len(assortment_for_entity) if assortment_for_entity >= minimum_assortment_for_entity: assortment += 1 return assortment def calculate_share_of_shelf(self, sos_filters=None, include_empty=EXCLUDE_EMPTY, **general_filters): """ :param sos_filters: These are the parameters on which ths SOS is calculated (out of the general DF). :param include_empty: This dictates whether Empty-typed SKUs are included in the calculation. :param general_filters: These are the parameters which the general data frame is filtered by. :return: The ratio of the Facings SOS. """ if include_empty == self.EXCLUDE_EMPTY and 'product_type' not in sos_filters.keys() + general_filters.keys(): general_filters['product_type'] = (self.EMPTY, self.EXCLUDE_FILTER) pop_filter = self.get_filter_condition(self.scif, **general_filters) subset_filter = self.get_filter_condition(self.scif, **sos_filters) try: ratio = self.k_engine.calculate_sos_by_facings(pop_filter=pop_filter, subset_filter=subset_filter) except: ratio = 0 if not isinstance(ratio, (float, int)): ratio = 0 return ratio def calculate_linear_share_of_shelf(self, sos_filters, include_empty=EXCLUDE_EMPTY, **general_filters): """ :param sos_filters: These are the parameters on which ths SOS is calculated (out of the general DF). :param include_empty: This dictates whether Empty-typed SKUs are included in the calculation. :param general_filters: These are the parameters which the general data frame is filtered by. :return: The Linear SOS ratio. """ if include_empty == self.EXCLUDE_EMPTY: general_filters['product_type'] = (self.EMPTY, self.EXCLUDE_FILTER) numerator_width = self.calculate_share_space_length(**dict(sos_filters, **general_filters)) denominator_width = self.calculate_share_space_length(**general_filters) if denominator_width == 0: ratio = 0 else: ratio = numerator_width / float(denominator_width) return ratio def calculate_share_space_length(self, **filters): """ :param filters: These are the parameters which the data frame is filtered by. :return: The total shelf width (in mm) the relevant facings occupy. """ filtered_matches = self.match_product_in_scene[self.get_filter_condition(self.match_product_in_scene, **filters)] space_length = filtered_matches['width_mm_advance'].sum() return space_length def calculate_products_on_edge(self, min_number_of_facings=1, min_number_of_shelves=1, **filters): """ :param min_number_of_facings: Minimum number of edge facings for KPI to pass. :param min_number_of_shelves: Minimum number of different shelves with edge facings for KPI to pass. :param filters: This are the parameters which dictate the relevant SKUs for the edge calculation. :return: A tuple: (Number of scenes which pass, Total number of relevant scenes) """ filters, relevant_scenes = self.separate_location_filters_from_product_filters(**filters) if len(relevant_scenes) == 0: return 0, 0 number_of_edge_scenes = 0 for scene in relevant_scenes: edge_facings = pd.DataFrame(columns=self.match_product_in_scene.columns) matches = self.match_product_in_scene[self.match_product_in_scene['scene_fk'] == scene] for shelf in matches['shelf_number'].unique(): shelf_matches = matches[matches['shelf_number'] == shelf] if not shelf_matches.empty: shelf_matches = shelf_matches.sort_values(by=['bay_number', 'facing_sequence_number']) edge_facings = edge_facings.append(shelf_matches.iloc[0]) if len(edge_facings) > 1: edge_facings = edge_facings.append(shelf_matches.iloc[-1]) edge_facings = edge_facings[self.get_filter_condition(edge_facings, **filters)] if len(edge_facings) >= min_number_of_facings \ and len(edge_facings['shelf_number'].unique()) >= min_number_of_shelves: number_of_edge_scenes += 1 return number_of_edge_scenes, len(relevant_scenes) def calculate_shelf_level_assortment(self, shelves, from_top_or_bottom=TOP, **filters): """ :param shelves: A shelf number (of type int or string), or a list of shelves (of type int or string). :param from_top_or_bottom: TOP for default shelf number (counted from top) or BOTTOM for shelf number counted from bottom. :param filters: These are the parameters which the data frame is filtered by. :return: Number of unique SKUs appeared in the filtered condition. """ shelves = shelves if isinstance(shelves, list) else [shelves] shelves = [int(shelf) for shelf in shelves] if from_top_or_bottom == self.TOP: assortment = self.calculate_assortment(shelf_number=shelves, **filters) else: assortment = self.calculate_assortment(shelf_number_from_bottom=shelves, **filters) return assortment def calculate_eye_level_assortment(self, eye_level_configurations=DEFAULT, min_number_of_products=ALL, **filters): """ :param eye_level_configurations: A data frame containing information about shelves to ignore (==not eye level) for every number of shelves in each bay. :param min_number_of_products: Minimum number of eye level unique SKUs for KPI to pass. :param filters: This are the parameters which dictate the relevant SKUs for the eye-level calculation. :return: A tuple: (Number of scenes which pass, Total number of relevant scenes) """ filters, relevant_scenes = self.separate_location_filters_from_product_filters(**filters) if len(relevant_scenes) == 0: return 0, 0 if eye_level_configurations == self.DEFAULT: if hasattr(self, 'eye_level_configurations'): eye_level_configurations = self.eye_level_configurations else: Log.error('Eye-level configurations are not set up') return False number_of_products = len(self.all_products[self.get_filter_condition(self.all_products, **filters)]['product_ean_code']) min_shelf, max_shelf, min_ignore, max_ignore = eye_level_configurations.columns number_of_eye_level_scenes = 0 for scene in relevant_scenes: eye_level_facings = pd.DataFrame(columns=self.match_product_in_scene.columns) matches = self.match_product_in_scene[self.match_product_in_scene['scene_fk'] == scene] for bay in matches['bay_number'].unique(): bay_matches = matches[matches['bay_number'] == bay] number_of_shelves = bay_matches['shelf_number'].max() configuration = eye_level_configurations[(eye_level_configurations[min_shelf] <= number_of_shelves) & (eye_level_configurations[max_shelf] >= number_of_shelves)] if not configuration.empty: configuration = configuration.iloc[0] else: configuration = {min_ignore: 0, max_ignore: 0} min_include = configuration[min_ignore] + 1 max_include = number_of_shelves - configuration[max_ignore] eye_level_shelves = bay_matches[bay_matches['shelf_number'].between(min_include, max_include)] eye_level_facings = eye_level_facings.append(eye_level_shelves) eye_level_assortment = len(eye_level_facings[ self.get_filter_condition(eye_level_facings, **filters)]['product_ean_code']) if min_number_of_products == self.ALL: min_number_of_products = number_of_products if eye_level_assortment >= min_number_of_products: number_of_eye_level_scenes += 1 return number_of_eye_level_scenes, len(relevant_scenes) def shelf_level_assortment(self, min_number_of_products ,shelf_target, strict=True, **filters): filters, relevant_scenes = self.separate_location_filters_from_product_filters(**filters) if len(relevant_scenes) == 0: relevant_scenes = self.scif['scene_fk'].unique().tolist() number_of_products = len(self.all_products[self.get_filter_condition(self.all_products, **filters)] ['product_ean_code']) result = 0 # Default score is FALSE for scene in relevant_scenes: eye_level_facings = pd.DataFrame(columns=self.match_product_in_scene.columns) matches = pd.merge(self.match_product_in_scene[self.match_product_in_scene['scene_fk'] == scene], self.all_products, on=['product_fk']) for bay in matches['bay_number'].unique(): bay_matches = matches[matches['bay_number'] == bay] products_in_target_shelf = bay_matches[(bay_matches['shelf_number'].isin(shelf_target)) & ( bay_matches['product_ean_code'].isin(number_of_products))] eye_level_facings = eye_level_facings.append(products_in_target_shelf) eye_level_assortment = len(eye_level_facings[ self.get_filter_condition(eye_level_facings, **filters)][ 'product_ean_code']) if eye_level_assortment >= min_number_of_products: result = 1 return result @staticmethod def _reverse_direction(direction): """ This function returns the opposite of a given direction. """ if direction == 'top': new_direction = 'bottom' elif direction == 'bottom': new_direction = 'top' elif direction == 'left': new_direction = 'right' elif direction == 'right': new_direction = 'left' else: new_direction = direction return new_direction def get_positions_by_direction(self, graph, vertex_index): """ This function gets a filtered graph (contains only edges of a relevant direction) and a Vertex index, and returns all sequences starting in it (until it gets to a dead end). """ sequences = [] edges = [graph.es[e] for e in graph.incident(vertex_index)] next_vertices = [edge.target for edge in edges] for vertex in next_vertices: next_sequences = self.get_positions_by_direction(graph, vertex) if not next_sequences: sequences.append([vertex]) else: for sequence in next_sequences: sequences.append([vertex] + sequence) return sequences @staticmethod def _filter_sequences(sequences): """ This function receives a list of sequences (lists of indexes), and removes sequences which can be represented by a shorter sequence (which is also in the list). """ if not sequences: return sequences sequences = sorted(sequences, key=lambda x: (x[-1], len(x))) filtered_sequences = [sequences[0]] for sequence in sequences[1:]: if sequence[-1] != filtered_sequences[-1][-1]: filtered_sequences.append(sequence) return filtered_sequences def filter_vertices_from_graph(self, graph, **filters): """ This function is given a graph and returns a set of vertices calculated by a given set of filters. """ vertices_indexes = None for field in filters.keys(): field_vertices = set() values = filters[field] if isinstance(filters[field], (list, tuple)) else [filters[field]] for value in values: vertices = [v.index for v in graph.vs.select(**{field: value})] field_vertices = field_vertices.union(vertices) if vertices_indexes is None: vertices_indexes = field_vertices else: vertices_indexes = vertices_indexes.intersection(field_vertices) vertices_indexes = vertices_indexes if vertices_indexes is not None else [v.index for v in graph.vs] if self.front_facing: front_facing_vertices = [v.index for v in graph.vs.select(front_facing='Y')] vertices_indexes = set(vertices_indexes).intersection(front_facing_vertices) return list(vertices_indexes) @staticmethod def validate_moves(moves, direction_data): """ This function checks whether the distance between the anchor and the tested SKUs fits the requirements. """ direction_data = direction_data if isinstance(direction_data, (list, tuple)) else [direction_data] validated = False for data in direction_data: data_validated = True for direction in moves.keys(): allowed_moves = data.get(direction, (0, 0)) min_move, max_move = allowed_moves if isinstance(allowed_moves, tuple) else (0, allowed_moves) if not min_move <= moves[direction] <= max_move: data_validated = False break if data_validated: validated = True break return validated def get_product_unique_position_on_shelf(self, scene_id, shelf_number, include_empty=False, **filters): """ :param scene_id: The scene ID. :param shelf_number: The number of shelf in question (from top). :param include_empty: This dictates whether or not to include empties as valid positions. :param filters: These are the parameters which the unique position is checked for. :return: The position of the first SKU (from the given filters) to appear in the specific shelf. """ shelf_matches = self.match_product_in_scene[(self.match_product_in_scene['scene_fk'] == scene_id) & (self.match_product_in_scene['shelf_number'] == shelf_number)] if not include_empty: filters['product_type'] = ('Empty', self.EXCLUDE_FILTER) if filters and shelf_matches[self.get_filter_condition(shelf_matches, **filters)].empty: Log.info("Products of '{}' are not tagged in shelf number {}".format(filters, shelf_number)) return None shelf_matches = shelf_matches.sort_values(by=['bay_number', 'facing_sequence_number']) shelf_matches = shelf_matches.drop_duplicates(subset=['product_ean_code']) positions = [] for m in xrange(len(shelf_matches)): match = shelf_matches.iloc[m] match_name = 'Empty' if match['product_type'] == 'Empty' else match['product_ean_code'] if positions and positions[-1] == match_name: continue positions.append(match_name) return positions def get_filter_condition(self, df, **filters): """ :param df: The data frame to be filters. :param filters: These are the parameters which the data frame is filtered by. Every parameter would be a tuple of the value and an include/exclude flag. INPUT EXAMPLE (1): manufacturer_name = ('Diageo', DIAGEOAUMSCGENERALToolBox.INCLUDE_FILTER) INPUT EXAMPLE (2): manufacturer_name = 'Diageo' :return: a filtered Scene Item Facts data frame. """ if not filters: return df['pk'].apply(bool) if self.facings_field in df.keys(): filter_condition = (df[self.facings_field] > 0) else: filter_condition = None for field in filters.keys(): if field in df.keys(): if isinstance(filters[field], tuple): value, exclude_or_include = filters[field] else: value, exclude_or_include = filters[field], self.INCLUDE_FILTER if not value: continue if not isinstance(value, list): value = [value] if exclude_or_include == self.INCLUDE_FILTER: condition = (df[field].isin(value)) elif exclude_or_include == self.EXCLUDE_FILTER: condition = (~df[field].isin(value)) elif exclude_or_include == self.CONTAIN_FILTER: condition = (df[field].str.contains(value[0], regex=False)) for v in value[1:]: condition |= df[field].str.contains(v, regex=False) else: continue if filter_condition is None: filter_condition = condition else: filter_condition &= condition else: Log.warning('field {} is not in the Data Frame'.format(field)) return filter_condition def separate_location_filters_from_product_filters(self, **filters): """ This function gets scene-item-facts filters of all kinds, extracts the relevant scenes by the location filters, and returns them along with the product filters only. """ location_filters = {} for field in filters.keys(): if field not in self.all_products.columns and field in self.scif.columns: location_filters[field] = filters.pop(field) relevant_scenes = self.scif[self.get_filter_condition(self.scif, **location_filters)]['scene_id'].unique() return filters, relevant_scenes @staticmethod def get_json_data(file_path, sheet_name=None, skiprows=0): """ This function gets a file's path and extract its content into a JSON. """ data = {} if sheet_name: sheet_names = [sheet_name] else: sheet_names = xlrd.open_workbook(file_path).sheet_names() for sheet_name in sheet_names: try: output = pd.read_excel(file_path, sheetname=sheet_name, skiprows=skiprows) except xlrd.biffh.XLRDError: Log.warning('Sheet name {} doesn\'t exist'.format(sheet_name)) return None output = output.to_json(orient='records') output = json.loads(output) for x in xrange(len(output)): for y in output[x].keys(): output[x][y] = unicode('' if output[x][y] is None else output[x][y]).strip() if not output[x][y]: output[x].pop(y, None) data[sheet_name] = output if sheet_name: data = data[sheet_name] elif len(data.keys()) == 1: data = data[data.keys()[0]] return data
class CCUS_SANDGENERALCCUS_SANDToolBox: """ MOVED TO Trax.Data.ProfessionalServices.KPIUtils.GeneralCCUS_SANDToolBox """ EXCLUDE_FILTER = 0 INCLUDE_FILTER = 1 EXCLUDE_EMPTY = 0 INCLUDE_EMPTY = 1 EMPTY = 'Empty' ASSORTMENT = 'assortment' AVAILABILITY = 'availability' def __init__(self, data_provider, output, kpi_static_data, geometric_kpi_flag=False, **data): self.k_engine = BaseCalculationsGroup(data_provider, output) self.data_provider = data_provider self.project_name = self.data_provider.project_name self.session_uid = self.data_provider.session_uid self.scif = self.data_provider[Data.SCENE_ITEM_FACTS] self.rds_conn = PSProjectConnector(self.project_name, DbUsers.CalcAdmin) self.scif = self.scif.merge(self.data_provider[Data.STORE_INFO], how='left', left_on='store_id', right_on='store_fk') self.match_display_in_scene = data.get('match_display_in_scene') self.all_products = self.data_provider[Data.ALL_PRODUCTS] self.survey_response = self.data_provider[Data.SURVEY_RESPONSES] self.kpi_static_data = kpi_static_data # self.get_atts() if geometric_kpi_flag: self.position_graph_data = CCUS_SANDPositionGraphs( self.data_provider) self.matches = self.position_graph_data.match_product_in_scene self.position_graph = self.position_graph_data.position_graphs else: self.position_graph_data = None self.matches = self.data_provider[Data.MATCHES] self.matches = self.matches.merge(self.match_display_in_scene, how='left', on=['scene_fk', 'bay_number']) @property def position_graphs(self): if not hasattr(self, '_position_graphs'): self._position_graphs = CCUS_SANDPositionGraphs(self.data_provider) return self._position_graphs @property def match_product_in_scene(self): if not hasattr(self, '_match_product_in_scene'): self._match_product_in_scene = self.position_graph_data.match_product_in_scene return self._match_product_in_scene # @property # def match_product_in_scene(self): # if not hasattr(self, '_match_product_in_scene'): # self._match_product_in_scene = self.position_graph_data.match_product_in_scene # return self._match_product_in_scene # # def get_atts(self): # """ # This function extracts the static KPI data and saves it into one global data frame. # The data is taken from static.kpi / static.atomic_kpi / static.kpi_set. # """ # query = CCUS_SANDQueries.get_product_atts() # product_att4 = pd.read_sql_query(query, self.rds_conn.db) # self.scif = self.scif.merge(product_att4, how='left', left_on='product_ean_code', # right_on='product_ean_code') def check_survey_answer(self, survey_text, target_answer): """ :param survey_text: The name of the survey in the DB. :param target_answer: The required answer/s for the KPI to pass. :return: True if the answer matches the target; otherwise - False. """ survey_data = self.survey_response.loc[ self.survey_response['question_text'] == survey_text] answer_field = 'selected_option_text' if not survey_data[ 'selected_option_text'].empty else 'number_value' if target_answer in survey_data[answer_field].values.tolist(): return True else: return False def calculate_products_on_bay_edge(self, min_number_of_facings=1, min_number_of_shelves=1, position='Right', **filters): """ :param Position: select to check edge on left or right :param min_number_of_facings: Minimum number of edge facings for KPI to pass. :param min_number_of_shelves: Minimum number of different shelves with edge facings for KPI to pass. :param filters: This are the parameters which dictate the relevant SKUs for the edge calculation. :return: A tuple: (Number of scenes which pass, Total number of relevant scenes) """ filters, relevant_scenes = self.separate_location_filters_from_product_filters( **filters) if len(relevant_scenes) == 0: return 0, 0 number_of_edge_scenes = 0 for scene in relevant_scenes: edge_facings = pd.DataFrame(columns=self.matches.columns) matches = self.matches[self.matches['scene_fk'] == scene] for bay in matches['bay_number'].unique(): bay_matches = matches[matches['bay_number'] == bay] for shelf in matches['shelf_number'].unique(): shelf_matches = bay_matches[bay_matches['shelf_number'] == shelf] if not shelf_matches.empty: shelf_matches = shelf_matches.sort_values( by=['facing_sequence_number']) if position == 'left': edge_facings = edge_facings.append( shelf_matches.iloc[0]) elif position == 'Right': edge_facings = edge_facings.append( shelf_matches.iloc[-1]) edge_facings = edge_facings[self.get_filter_condition( edge_facings, **filters)] if len(edge_facings) >= min_number_of_facings \ and len(edge_facings['shelf_number'].unique()) >= min_number_of_shelves: number_of_edge_scenes += 1 return number_of_edge_scenes, len(relevant_scenes) def calculate_number_of_scenes(self, **filters): """ :param filters: These are the parameters which the data frame is filtered by. :return: The number of scenes matching the filtered Scene Item Facts data frame. """ filtered_scif = self.scif[self.get_filter_condition( self.scif, **filters)] number_of_scenes = len(filtered_scif['scene_id'].unique()) return number_of_scenes def calculate_availability(self, **filters): """ :param filters: These are the parameters which the data frame is filtered by. :return: Total number of SKUs facings appeared in the filtered Scene Item Facts data frame. """ if set(filters.keys()).difference(self.scif.keys()): for key in filters.keys(): if key == 'brand_name': filters['brand_name_y'] = filters.pop('brand_name') if key == 'product_ean_code': filters['product_ean_code_x'] = filters.pop( 'product_ean_code') match = self.matches.merge(self.all_products, how='left', on=['product_fk']) filtered_df = match[self.get_filter_condition(match, **filters)] else: filtered_df = self.scif[self.get_filter_condition( self.scif, **filters)] if 'facings' in filtered_df.columns: availability = filtered_df['facings'].sum() else: availability = len(filtered_df) return availability def calculate_assortment(self, **filters): """ :param filters: These are the parameters which the data frame is filtered by. :return: Number of unique SKUs appeared in the filtered Scene Item Facts data frame. """ filtered_scif = self.scif[self.get_filter_condition( self.scif, **filters)] assortment = len(filtered_scif['product_ean_code'].unique()) return assortment def calculate_share_of_shelf(self, sos_filters=None, include_empty=EXCLUDE_EMPTY, **general_filters): """ :param sos_filters: These are the parameters on which ths SOS is calculated (out of the general DF). :param include_empty: This dictates whether Empty-typed SKUs are included in the calculation. :param general_filters: These are the parameters which the general data frame is filtered by. :return: The ratio of the SOS. """ if include_empty == self.EXCLUDE_EMPTY: general_filters['product_type'] = (self.EMPTY, self.EXCLUDE_FILTER) pop_filter = self.get_filter_condition(self.scif, **general_filters) subset_filter = self.get_filter_condition(self.scif, **sos_filters) try: ratio = self.k_engine.calculate_sos_by_facings( pop_filter=pop_filter, subset_filter=subset_filter) except: ratio = 0 if not isinstance(ratio, (float, int)): ratio = 0 return ratio def calculate_relative_position(self, tested_filters, anchor_filters, direction_data, min_required_to_pass=1, **general_filters): """ :param tested_filters: The tested SKUs' filters. :param anchor_filters: The anchor SKUs' filters. :param direction_data: The allowed distance between the tested and anchor SKUs. In form: {'top': 4, 'bottom: 0, 'left': 100, 'right': 0} Alternative form: {'top': (0, 1), 'bottom': (1, 1000), ...} - As range. :param min_required_to_pass: Number of appearances needed to be True for relative position in order for KPI to pass. If all appearances are required: ==a string or a big number. :param general_filters: These are the parameters which the general data frame is filtered by. :return: True if (at least) one pair of relevant SKUs fits the distance requirements; otherwise - returns False. """ filtered_scif = self.scif[self.get_filter_condition( self.scif, **general_filters)] tested_scenes = filtered_scif[self.get_filter_condition( filtered_scif, **tested_filters)]['scene_id'].unique() anchor_scenes = filtered_scif[self.get_filter_condition( filtered_scif, **anchor_filters)]['scene_id'].unique() relevant_scenes = set(tested_scenes).intersection(anchor_scenes) if relevant_scenes: pass_counter = 0 reject_counter = 0 for scene in relevant_scenes: scene_graph = self.position_graphs.position_graphs.get(scene) tested_vertices = self.filter_vertices_from_graph( scene_graph, **tested_filters) anchor_vertices = self.filter_vertices_from_graph( scene_graph, **anchor_filters) for tested_vertex in tested_vertices: for anchor_vertex in anchor_vertices: moves = {'top': 0, 'bottom': 0, 'left': 0, 'right': 0} path = scene_graph.get_shortest_paths(anchor_vertex, tested_vertex, output='epath') if path: path = path[0] for edge in path: moves[scene_graph.es[edge]['direction']] += 1 if self.validate_moves(moves, direction_data): pass_counter += 1 if isinstance( min_required_to_pass, int ) and pass_counter >= min_required_to_pass: return True else: reject_counter += 1 else: Log.debug('Tested and Anchor have no direct path') if pass_counter > 0 and reject_counter == 0: return True else: return False else: Log.debug('None of the scenes contain both anchor and tested SKUs') return False @staticmethod def filter_vertices_from_graph(graph, **filters): """ This function is given a graph and returns a set of vertices calculated by a given set of filters. """ vertices_indexes = None for field in filters.keys(): field_vertices = set() values = filters[field] if isinstance( filters[field], (list, tuple)) else [filters[field]] for value in values: vertices = [v.index for v in graph.vs.select(**{field: value})] field_vertices = field_vertices.union(vertices) if vertices_indexes is None: vertices_indexes = field_vertices else: vertices_indexes = vertices_indexes.intersection( field_vertices) vertices_indexes = vertices_indexes if vertices_indexes is not None else [ v.index for v in graph.vs ] return vertices_indexes @staticmethod def validate_moves(moves, direction_data): """ This function checks whether the distance between the anchor and the tested SKUs fits the requirements. """ for direction in moves.keys(): allowed_moves = direction_data[direction] min_move, max_move = allowed_moves if isinstance( allowed_moves, tuple) else (0, allowed_moves) if not min_move <= moves[direction] <= max_move: return False return True def calculate_block_together(self, allowed_products_filters=None, include_empty=EXCLUDE_EMPTY, **filters): """ :param allowed_products_filters: These are the parameters which are allowed to corrupt the block without failing it. :param include_empty: This parameter dictates whether or not to discard Empty-typed products. :param filters: These are the parameters which the blocks are checked for. :return: True - if in (at least) one of the scenes all the relevant SKUs are grouped together in one block; otherwise - returns False. """ relevant_scenes = self.scif[self.get_filter_condition( self.scif, **filters)]['scene_id'].unique().tolist() for field in ['location_type', 'template_name']: filters.pop(field, None) if relevant_scenes: for scene in relevant_scenes: if scene not in self.position_graphs.position_graphs.keys(): Log.debug('Scene {} has not position graph'.format(scene)) continue scene_graph = self.position_graphs.position_graphs[scene].copy( ) relevant_vertices = None for field in filters.keys(): values = filters[field] if isinstance( filters[field], (list, float)) else [filters[field]] vertices_for_field = set() for value in values: condition = {field: value} vertices = { v.index for v in scene_graph.vs.select(**condition) } vertices_for_field = vertices_for_field.union(vertices) if relevant_vertices is None: relevant_vertices = vertices_for_field else: relevant_vertices = relevant_vertices.intersection( vertices_for_field) if allowed_products_filters: allowed_vertices = None for field in allowed_products_filters.keys(): values = allowed_products_filters[field] \ if isinstance(allowed_products_filters[field], (list, float)) \ else [allowed_products_filters[field]] vertices_for_field = set() for value in values: condition = {field: value} vertices = { v.index for v in scene_graph.vs.select(**condition) } vertices_for_field = vertices_for_field.union( vertices) if allowed_vertices is None: allowed_vertices = vertices_for_field else: allowed_vertices = allowed_vertices.intersection( vertices_for_field) if include_empty == self.EXCLUDE_EMPTY: empty_vertices = { v.index for v in scene_graph.vs.select( product_type='Empty') } allowed_vertices = allowed_vertices.union( empty_vertices) relevant_vertices = relevant_vertices if relevant_vertices is not None else set( ) allowed_vertices = allowed_vertices if allowed_vertices is not None else set( ) else: allowed_vertices = [] all_vertices = {v.index for v in scene_graph.vs} vertices_to_remove = all_vertices.difference( relevant_vertices.union(allowed_vertices)) scene_graph.delete_vertices(vertices_to_remove) # removing clusters including 'allowed' SKUs only clusters = [ cluster for cluster in scene_graph.clusters() if set(cluster).difference(allowed_vertices) ] if len(clusters) == 1: return True else: Log.debug('None of the scenes contain relevant SKUs') return False def separate_location_filters_from_product_filters(self, **filters): """ This function gets scene-item-facts filters of all kinds, extracts the relevant scenes by the location filters, and returns them along with the product filters only. """ location_filters = {} for field in filters.keys(): if field not in self.all_products.columns: location_filters[field] = filters.pop(field) relevant_scenes = self.scif[self.get_filter_condition( self.scif, **location_filters)]['scene_id'].unique() return filters, relevant_scenes def get_filter_condition(self, df, **filters): """ :param df: The data frame to be filters. :param filters: These are the parameters which the data frame is filtered by. Every parameter would be a tuple of the value and an include/exclude flag. INPUT EXAMPLE (1): manufacturer_name = ('Diageo', DIAGEOAUGENERALCCUS_SANDToolBox.INCLUDE_FILTER) INPUT EXAMPLE (2): manufacturer_name = 'Diageo' :return: a filtered Scene Item Facts data frame. """ if 'facings' in df.keys(): filter_condition = (df['facings'] > 0) else: filter_condition = None for field in filters.keys(): if field in df.keys(): if isinstance(filters[field], tuple): value, exclude_or_include = filters[field] else: value, exclude_or_include = filters[ field], self.INCLUDE_FILTER if not isinstance(value, list): value = [value] if exclude_or_include == self.INCLUDE_FILTER: condition = (df[field].isin(value)) elif exclude_or_include == self.EXCLUDE_FILTER: condition = (~df[field].isin(value)) else: continue if filter_condition is None: filter_condition = condition else: filter_condition &= condition else: Log.warning('field {} is not in the Data Frame'.format(field)) return filter_condition @staticmethod def get_json_data(file_path, skiprows=0): """ This function gets a file's path and extract its content into a JSON. """ output = pd.read_excel(file_path, skiprows=skiprows) output = output.to_json(orient='records') output = json.loads(output) return output def add_new_kpi_to_static_tables(self, set_fk, new_kpi_list): """ :param set_fk: The relevant KPI set FK. :param new_kpi_list: a list of all new KPI's parameters. This function adds new KPIs to the DB ('Static' table) - both to level2 (KPI) and level3 (Atomic KPI). """ session = OrmSession(self.project_name, writable=True) with session.begin(subtransactions=True): for kpi in new_kpi_list: level2_query = """ INSERT INTO static.kpi (kpi_set_fk, display_text) VALUES ('{0}', '{1}');""".format( set_fk, kpi.get(KPI_NAME)) result = session.execute(level2_query) kpi_fk = result.lastrowid level3_query = """ INSERT INTO static.atomic_kpi (kpi_fk, name, description, display_text, presentation_order, display) VALUES ('{0}', '{1}', '{2}', '{3}', '{4}', '{5}');""".format( kpi_fk, kpi.get(KPI_NAME), kpi.get(KPI_NAME), kpi.get(KPI_NAME), 1, 'Y') session.execute(level3_query) session.close() return def add_kpi_sets_to_static(self, set_names): """ This function is to be ran at a beginning of a projects - and adds the constant KPI sets data to the DB. """ session = OrmSession(self.project_name, writable=True) with session.begin(subtransactions=True): for set_name in set_names: level1_query = """ INSERT INTO static.kpi_set (name, missing_kpi_score, enable, normalize_weight, expose_to_api, is_in_weekly_report) VALUES ('{0}', '{1}', '{2}', '{3}', '{4}', '{5}');""".format( set_name, 'Bad', 'Y', 'N', 'N', 'N') session.execute(level1_query) session.close() return