def Group_and_save_atLeastOneMonth_SKU( unchanged_SKU: pyspark.sql.dataframe.DataFrame, changed_SKU: pyspark.sql.dataframe.DataFrame): """ Separate unadjusted SKU to three sheets within same excel file: Capacity_to_avg_qty<3, Capacity_to_avg_qty<9 and Capacity_to_avg_qty>=3, Capacity_to_avg_qty>=9 """ # Separate SKU and save to excel files. changed_SKU.toPandas().to_csv( '../data/Output/atLeastOneMonth/adjusted_SKU.csv', index=False, encoding='utf-8') print( "Save adjusted SKU(atLeastOneMonth) to Output/atLeastOneMonth/adjusted_SKU.csv" ) unchanged_SKU = unchanged_SKU.toPandas() unchanged_SKU1 = unchanged_SKU.query('Capacity_to_avg_qty<3') unchanged_SKU2 = unchanged_SKU.query( 'Capacity_to_avg_qty<9 and Capacity_to_avg_qty>=3') unchanged_SKU3 = unchanged_SKU.query('Capacity_to_avg_qty>=9') writer = ExcelWriter('../data/Output/atLeastOneMonth/unadjusted_SKU.xlsx') unchanged_SKU1.to_excel(writer, 'lessThan3', index=False) unchanged_SKU2.to_excel(writer, 'between3And9', index=False) unchanged_SKU3.to_excel(writer, 'moreThan9', index=False) writer.save() print( "Save unadjusted SKU(atLeastOneMonth) to Output/atLeastOneMonth/unadjusted_SKU.xlsx" )
def find_and_analysis_fullMonth_SKU( df_atLeastOneMonth: pyspark.sql.dataframe.DataFrame, split_month: int, spark) -> pyspark.sql.dataframe.DataFrame: """ Find SKU, which "soldQty < capacity" in every month """ full_month_items = select_full_month_item( df_atLeastOneMonth.toPandas(), month_list=[split_month, split_month + 1, split_month + 2]) # three month data since split_month full_month_SKU_info = get_full_month_SKU_info( full_month_items, df_atLeastOneMonth.select(selected_column_fullMonth), spark).dropDuplicates() return full_month_SKU_info
def check_OOS_by_rules( df: pyspark.sql.dataframe.DataFrame, date_generated: list, spark: pyspark.sql.session.SparkSession ) -> pandas.core.frame.DataFrame: """ Set rules for OOS items: Rule 1. if 0 sales all the time, no OOS. (did in clean data) Rule 2. if 0 stocks all the time, no OOS. (did in clean data) Rule 3. OOS occurs when both stock and QTY sold are 0, and have sales before and after Rule 4. OOS happens when we have sales before. i.e. not new product case Rule 5. OOS days consider only the last 7 days param df: dataframe with columns: 'date', 'item_name', 'store_name', 'POS Margin on Net Sales (INV)', 'POS Net Sales', 'POS Qty Sold', 'Stock Balance Qty' return output_data: dataframe with header: 'item_name', 'store_name', 'category', 'OOS_days', 'date_list', 'OOS_lastDay','avg_loss_sale_quantity', 'avg_loss_net_sale','avg_loss_INV', 'total_loss_sale_quantity','total_loss_net_sale','total_loss_INV' """ # create dataclass dataset = Dataset(df=df, store_item_list=get_store_item_list(spark)) # subtract the last 7 days from current date one_weeks_ago = (date_generated[-1] + timedelta(days=1) - timedelta(weeks=1)).date() # convert to panda dataframe for checking line by line df = df.toPandas() # define output directory OOS_result = {} # begin to check one by one for SKU_store in dataset.store_item_list: # Establish new status to check given a item in a given store. check = 0 # how many days OOS last_day_OOS = 0 # will be 1 if OOS in the last day of given dataset check_not_new = False # check if it is the new product in this month check_not_removed = False # check if the producted has been removed OOS_date_list = [] # the list to store the date if OOS item = SKU_store[0] store = SKU_store[1] # get subdataset, which contain only info inside #sub_data = get_sub_dataset(SKU = item, Store = store, spark = spark) sub_data = create_sub_time_series_one_item(df, item, store) # Convert to panda for further claculation category = sub_data.Category.unique()[-1] for index, row in sub_data.iterrows(): # Rule 4: OS happens when we have sales before ## i.e. remove new product case ## or the item has only stock, but not sold if row['StockQty'] != 0: check_not_new = True # Rule 3: OOS occurs when both stock and QTY sold are 0 and check_not_new is True # OOS occurs when both stock and QTY sold are 0 if row['StockQty'] == 0 and row['QtySold'] == 0 \ and check_not_new == True and row['Date'] >= one_weeks_ago: check += 1 OOS_date_list.append(row['Date']) #print('Item {} has 0 stock at store {} , Date: {}'.format(item, row['Store'],row['Date'])) ## check OOS in last day last_day_OOS = check_OOS_last_day( df, row['Date']) # return 1 if OOS in the last days # as long as we have stock in this month, we believe this item is not removed from store if row['StockQty'] > 0: check_not_removed = True # When this (item, store) contains the OOS days, # and confirmed that this product is not been removed if check > 0 and check_not_removed == True: key = (item, store) loss_INV, loss_NS, loss_QTY = calculate_possible_loss(sub_data) OOS_result[key] = (check, loss_INV, loss_NS, loss_QTY, last_day_OOS, OOS_date_list) # returned value ## Output to dataframe output_data = out_put_data(OOS_result, category) return output_data