def data_out(df, batch=''): if df.empty == False: outfile = 'C:/Users/xcxg109/NonDriveFiles/Delta Attributes_' + str( batch) + '.xlsx' df = df[['Segment_ID', 'Segment_Name', 'Family_ID', 'Family_Name', 'Category_ID', \ 'Category_Name', 'Grainger_Attr_ID', 'Grainger_Attribute_Name', \ 'GWS_Attribute_Name', 'GWS_Attr_ID', 'GWS_PIM_Path', 'GWS_Category_ID', \ 'GWS_Category_Name', 'GWS_Node_ID', 'GWS_Node_Name', 'STEP_Category_ID', \ 'STEP_Attr_ID']] df = df.sort_values( ['Segment_Name', 'Category_Name', 'Grainger_Attribute_Name'], ascending=[True, True, True]) df_filter = df[df['GWS_Node_ID'].isna()] writer = pd.ExcelWriter(outfile, engine='xlsxwriter') df_filter.to_excel(writer, sheet_name="STEP ONLY Attributes", startrow=0, startcol=0, index=False) df.to_excel(writer, sheet_name="ALL Attributes", startrow=0, startcol=0, index=False) worksheet1 = writer.sheets['STEP ONLY Attributes'] worksheet2 = writer.sheets['ALL Attributes'] col_widths = fd.get_col_widths(df_filter) col_widths = col_widths[1:] for i, width in enumerate(col_widths): if width > 40: width = 40 elif width < 10: width = 10 worksheet1.set_column(i, i, width) col_widths = fd.get_col_widths(df) col_widths = col_widths[1:] for i, width in enumerate(col_widths): if width > 40: width = 40 elif width < 10: width = 10 worksheet2.set_column(i, i, width) writer.save()
def data_out(df, atts_df, batch=''): # output for sku-based pivot table sku_data = df[['Supplier_Parent_Group', 'Supplier_ID', 'Supplier_Name', 'Segment_ID', 'Segment_Name', \ 'Family_ID', 'Family_Name', 'Category_ID', 'Category_Name', 'Grainger_SKU', 'PM_Code', \ 'Sales_Status', 'Relationship_Mgr_Code', '2019_COGS']] sku_data = sku_data.drop_duplicates(subset=['Grainger_SKU']) sku_data = sku_data.rename(columns={'Grainger_SKU':'Material_No'}) fill = atts_df[['Supplier_Parent_Group', 'Supplier_ID', 'Supplier_Name', 'Segment_ID', 'Segment_Name', \ 'Family_ID', 'Family_Name', 'Category_ID', 'Category_Name', 'Attr_ID', 'Attribute_Name', \ 'Endeca_Ranking', 'Supplier_Fill_Rate_%', 'Category_Fill_Rate_%']] fill = fill.drop_duplicates(subset=['Category_ID', 'Attr_ID']) fill = fill.sort_values(by=['Segment_Name', 'Family_Name', 'Category_Name', 'Endeca_Ranking']) outfile = 'C:/Users/xcxg109/NonDriveFiles/SUPPLIER_REPORT_'+str(batch)+'_.xlsx' writer = pd.ExcelWriter(outfile, engine='xlsxwriter') workbook = writer.book sku_data.to_excel (writer, sheet_name="SKU Data", startrow=0, startcol=0, index=False) fill.to_excel(writer, sheet_name='Attribute Fill Rates', startrow =0, startcol=0, index=False) worksheet1 = writer.sheets['Attribute Fill Rates'] worksheet2 = writer.sheets['SKU Data'] layout = workbook.add_format() layout.set_text_wrap('text_wrap') layout.set_align('left') col_widths = fd.get_col_widths(fill) col_widths = col_widths[1:] for i, width in enumerate(col_widths): if width > 40: width = 40 elif width < 10: width = 10 worksheet1.set_column(i, i, width) col_widths = fd.get_col_widths(sku_data) col_widths = col_widths[1:] for i, width in enumerate(col_widths): if width > 40: width = 40 elif width < 10: width = 10 worksheet2.set_column(i, i, width) writer.save()
def data_out(df, quer, batch=''): if df.empty == False: outfile = 'C:/Users/xcxg109/NonDriveFiles/STEP-PIM_' + str( batch) + '_HIER.xlsx' writer = pd.ExcelWriter(outfile, engine='xlsxwriter') df.to_excel(writer, sheet_name="DATA", startrow=0, startcol=0, index=False) worksheet = writer.sheets['DATA'] col_widths = fd.get_col_widths(df) col_widths = col_widths[1:] for i, width in enumerate(col_widths): if width > 40: width = 40 elif width < 10: width = 10 worksheet.set_column(i, i, width) writer.save() else: print('EMPTY DATAFRAME')
def data_out(df): if df.empty == False: outfile = 'C:/Users/xcxg109/NonDriveFiles/STEP_WS_Cats.xlsx' writer = pd.ExcelWriter(outfile, engine='xlsxwriter') df.to_excel(writer, sheet_name="Category", startrow=0, startcol=0, index=False) worksheet1 = writer.sheets['Category'] col_widths = fd.get_col_widths(df) col_widths = col_widths[1:] for i, width in enumerate(col_widths): if width > 40: width = 40 elif width < 10: width = 10 worksheet1.set_column(i, i, width) writer.save() else: print('EMPTY DATAFRAME')
def data_out(df): outfile = 'C:/Users/xcxg109/NonDriveFiles/ALLTIMEHIGH_FINAL_Rankings_WED.xlsx' df = df.sort_values(['GWS_Leaf_Node_ID', 'New_Rank', 'GWS_Attribute_Name'], ascending=[True, True, True]) writer = pd.ExcelWriter(outfile, engine='xlsxwriter') workbook = writer.book df.to_excel(writer, sheet_name="STEP Attributes", startrow=0, startcol=0, index=False) worksheet1 = writer.sheets['STEP Attributes'] col_widths = fd.get_col_widths(df) col_widths = col_widths[1:] for i, width in enumerate(col_widths): if width > 40: width = 40 elif width < 10: width = 10 worksheet1.set_column(i, i, width) layout = workbook.add_format() layout.set_text_wrap('text_wrap') layout.set_align('left') writer.save()
def data_out(df, batch=''): if df.empty == False: outfile = 'C:/Users/xcxg109/NonDriveFiles/Delta_Project_STEP_Values_' + str( batch) + '_.xlsx' writer = pd.ExcelWriter(outfile, engine='xlsxwriter') df.to_excel(writer, sheet_name="ALL STEP Att_Values", startrow=0, startcol=0, index=False) worksheet1 = writer.sheets['ALL STEP Att_Values'] col_widths = fd.get_col_widths(df) col_widths = col_widths[1:] for i, width in enumerate(col_widths): if width > 40: width = 40 elif width < 10: width = 10 worksheet1.set_column(i, i, width) writer.save() else: print('EMPTY DATAFRAME')
def data_out(final_df, node): final_df = final_df.sort_values( ['WS_Category_Name', 'WS_Node_Name', 'WS_Attribute_Name'], ascending=[True, True, True]) final_no_dupes = final_df.drop_duplicates( subset=['WS_Node_ID', 'WS_Attr_ID', 'Normalized_Unit']) final_no_dupes = final_no_dupes [['WS_Category_ID', 'WS_Category_Name', 'WS_Node_ID', 'WS_Node_Name', \ 'WS_SKU', 'WS_Attr_ID', 'WS_Attribute_Name', 'Attribute_Definition', \ 'Numeric_Display_Type', 'Unit_Group_ID', 'Normalized_Unit', 'Attribute_Values', \ 'UOMs in Attribute']] final_no_dupes = final_no_dupes.rename(columns={'WS_SKU': 'Example SKU'}) outfile = 'C:/Users/xcxg109/NonDriveFiles/' + str( node) + '_multi-UOMs.xlsx' writer = pd.ExcelWriter(outfile, engine='xlsxwriter') workbook = writer.book final_no_dupes.to_excel(writer, sheet_name="UOMs", startrow=0, startcol=0, index=False) worksheet1 = writer.sheets['UOMs'] layout = workbook.add_format() layout.set_text_wrap('text_wrap') layout.set_align('left') col_widths = fd.get_col_widths(final_no_dupes) col_widths = col_widths[1:] for i, width in enumerate(col_widths): if width > 40: width = 40 elif width < 10: width = 10 worksheet1.set_column(i, i, width) writer.save()
def data_out(final_df, node, node_name, batch=''): final_df['concat'] = final_df['Grainger_Attribute_Name'].map( str) + final_df['Grainger_Attribute_Value'].map(str) final_df['Group_ID'] = final_df.groupby( final_df['concat']).grouper.group_info[0] + 1 final_df = final_df[['Group_ID', 'Segment_ID', 'Segment_Name', 'Family_ID', 'Family_Name', 'Category_ID', \ 'Category_Name', 'PM_Code', 'Sales_Status', 'Relationship_MGR_Code', 'WS_Category_ID', \ 'WS_Category_Name', 'WS_Node_ID', 'WS_Node_Name', 'Grainger_SKU', 'WS_SKU', 'Grainger_Attr_ID', \ 'WS_Attr_ID', 'WS_Attr_Value_ID', 'Multivalue?', 'Data_Type', 'Numeric_Display_Type', \ 'WS_Attribute_Name', 'WS_Attribute_Definition', 'Normalized_Value', 'Normalized_Unit', \ 'Numerator', 'Denominator', 'Grainger_Attribute_Name', 'Grainger_Attribute_Definition', \ 'Grainger_Category_Specific_Definition', 'Grainger_Attribute_Value', 'WS_Value',\ 'STEP-WS_Match?', 'Potential_Replaced_Values', 'Revised_Value']] final_no_dupes = final_df.drop_duplicates(subset=[ 'Grainger_Attribute_Name', 'Grainger_Attribute_Value', 'Data_Type' ]) final_no_dupes = final_no_dupes [['Group_ID', 'Category_ID', 'Category_Name', 'Grainger_SKU', 'Data_Type', \ 'Numeric_Display_Type', 'WS_Attribute_Name', 'WS_Attribute_Definition', 'Grainger_Attribute_Name', \ 'Grainger_Attribute_Definition', 'Grainger_Category_Specific_Definition', 'Grainger_Attribute_Value', \ 'WS_Value', 'STEP-WS_Match?', 'Potential_Replaced_Values', 'Revised_Value']] final_no_dupes = final_no_dupes.rename( columns={'Grainger_SKU': 'Example SKU'}) outfile = 'C:/Users/xcxg109/NonDriveFiles/' + str(node) + '_' + str( node_name) + '_' + str(batch) + '_STEP-WS_Analysis.xlsx' writer = pd.ExcelWriter(outfile, engine='xlsxwriter') workbook = writer.book final_no_dupes.to_excel(writer, sheet_name="Uniques", startrow=0, startcol=0, index=False) final_df.to_excel(writer, sheet_name="All Text UOMs", startrow=0, startcol=0, index=False) worksheet1 = writer.sheets['Uniques'] worksheet2 = writer.sheets['All Text UOMs'] layout = workbook.add_format() layout.set_text_wrap('text_wrap') layout.set_align('left') col_widths = fd.get_col_widths(final_no_dupes) col_widths = col_widths[1:] for i, width in enumerate(col_widths): if width > 40: width = 40 elif width < 10: width = 10 worksheet1.set_column(i, i, width) worksheet1.set_column('G:G', 50, layout) worksheet1.set_column('H:H', 30, layout) worksheet1.set_column('J:J', 50, layout) col_widths = fd.get_col_widths(final_df) col_widths = col_widths[1:] for i, width in enumerate(col_widths): if width > 40: width = 40 elif width < 10: width = 10 worksheet2.set_column(i, i, width) worksheet2.set_column('V:V', 50, layout) worksheet2.set_column('Y:Y', 50, layout) worksheet2.set_column('AA:AA', 50, layout) writer.save()
count += 1 # if original df < 30K rows, process the entire thing at once else: data_out(final_df, quer) outfile = 'C:/Users/xcxg109/NonDriveFiles/STEP_only_SKUs.xlsx' writer = pd.ExcelWriter(outfile, engine='xlsxwriter') no_match_df.to_excel(writer, sheet_name="DATA", startrow=0, startcol=0, index=False) worksheet = writer.sheets['DATA'] col_widths = fd.get_col_widths(no_match_df) col_widths = col_widths[1:] for i, width in enumerate(col_widths): if width > 40: width = 40 elif width < 10: width = 10 worksheet.set_column(i, i, width) writer.save() print("--- {} minutes ---".format(round((time.time() - start_time) / 60, 2)))
def data_out(final_df, node, node_name): # NOTE: nonconforming rows of df were dropped and df was sorted before being passed here final_df['concat'] = final_df['WS_Attribute_Name'].map( str) + final_df['Normalized_Value'].map(str) final_df['Group_ID'] = final_df.groupby( final_df['concat']).grouper.group_info[0] + 1 final_df = final_df[['Group_ID', 'PIM_Path', 'WS_Category_ID', 'WS_Category_Name', 'WS_Node_ID', 'WS_Node_Name', \ 'STEP_Category_ID', 'WS_SKU', 'STEP_Attr_ID', 'WS_Attr_ID', 'WS_Attr_Value_ID', 'Multivalue', \ 'Data_Type', 'Numeric_Display_Type', 'WS_Attribute_Name', 'Original_Value', 'Original_Unit', \ 'Grainger_Attribute_Value', 'Normalized_Value', 'Normalized_Unit', 'Potential_Issue']] final_no_dupes = final_df.drop_duplicates( subset=['WS_Attribute_Name', 'Normalized_Value', 'Data_Type']) final_no_dupes = final_no_dupes[['Group_ID', 'PIM_Path', 'WS_Category_ID', 'WS_Category_Name', 'WS_Node_ID', \ 'WS_Node_Name', 'WS_SKU', 'Data_Type', 'WS_Attribute_Name', \ 'Grainger_Attribute_Value', 'Normalized_Value', 'Normalized_Unit', \ 'Potential_Issue']] outfile = 'C:/Users/xcxg109/NonDriveFiles/' + str(node) + '_' + str( node_name) + '_MULTIVALUE_ISSUES.xlsx' writer = pd.ExcelWriter(outfile, engine='xlsxwriter') workbook = writer.book final_no_dupes.to_excel(writer, sheet_name="Uniques", startrow=0, startcol=0, index=False) final_df.to_excel(writer, sheet_name="All MultiValue Issues", startrow=0, startcol=0, index=False) worksheet1 = writer.sheets['Uniques'] worksheet2 = writer.sheets['All MultiValue Issues'] layout = workbook.add_format() layout.set_text_wrap('text_wrap') layout.set_align('left') col_widths = fd.get_col_widths(final_no_dupes) col_widths = col_widths[1:] for i, width in enumerate(col_widths): if width > 40: width = 40 elif width < 10: width = 10 worksheet1.set_column(i, i, width) worksheet1.set_column('J:J', 50, layout) worksheet1.set_column('K:K', 50, layout) worksheet1.set_column('M:M', 30, layout) col_widths = fd.get_col_widths(final_df) col_widths = col_widths[1:] for i, width in enumerate(col_widths): if width > 40: width = 40 elif width < 10: width = 10 worksheet2.set_column(i, i, width) worksheet2.set_column('P:P', 50, layout) worksheet2.set_column('R:R', 50, layout) worksheet2.set_column('S:S', 50, layout) worksheet2.set_column('U:U', 50, layout) writer.save()
def data_out(final_df, node, batch=''): # final_df = final_df.drop(final_df[(final_df['STEP-WS_Match?'] == 'Y' or final_df['Potential_Replaced_Values'] == '')]) # final_df = final_df[final_df.Potential_Replaced_Values != ''] final_df = final_df[final_df.Grainger_Attribute_Name != 'Item'] final_df = final_df.sort_values(['Potential_Replaced_Values'], ascending=[True]) final_df['concat'] = final_df['Grainger_Attribute_Name'].map(str) + final_df['Grainger_Attribute_Value'].map(str) final_df['Group_ID'] = final_df.groupby(final_df['concat']).grouper.group_info[0] + 1 final_df = final_df[['Group_ID', 'Segment_ID', 'Segment_Name', 'Family_ID', 'Family_Name', 'Category_ID', \ 'Category_Name', 'WS_Category_ID', 'WS_Category_Name', 'WS_Node_ID', 'WS_Node_Name', 'PM_Code', \ 'Sales_Status', 'RELATIONSHIP_MANAGER_CODE', 'Grainger_SKU', 'WS_SKU', 'WS_Attr_ID', \ 'WS_Attr_Value_ID', 'WS_Attribute_Name', 'WS_Original_Value', 'Grainger_Attr_ID', \ 'Grainger_Attribute_Name', 'Grainger_Attribute_Value']] final_no_dupes = final_df.drop_duplicates(subset=['Grainger_Attribute_Name', 'Grainger_Attribute_Value']) final_no_dupes = final_no_dupes [['Group_ID', 'Category_ID', 'Category_Name', 'Grainger_SKU', 'Grainger_Attr_ID', \ 'Grainger_Attribute_Name', 'Grainger_Attribute_Value']] final_no_dupes = final_no_dupes.rename(columns={'Grainger_SKU':'Example SKU'}) outfile = 'C:/Users/xcxg109/NonDriveFiles/'+str(node)+'_'+str(batch)+'_text_UOMs.xlsx' writer = pd.ExcelWriter(outfile, engine='xlsxwriter') workbook = writer.book final_no_dupes.to_excel (writer, sheet_name="Uniques", startrow=0, startcol=0, index=False) final_df.to_excel (writer, sheet_name="All Text UOMs", startrow=0, startcol=0, index=False) worksheet1 = writer.sheets['Uniques'] worksheet2 = writer.sheets['All Text UOMs'] layout = workbook.add_format() layout.set_text_wrap('text_wrap') layout.set_align('left') col_widths = fd.get_col_widths(final_no_dupes) col_widths = col_widths[1:] for i, width in enumerate(col_widths): if width > 40: width = 40 elif width < 10: width = 10 worksheet1.set_column(i, i, width) worksheet1.set_column('G:G', 50, layout) worksheet1.set_column('H:H', 30, layout) worksheet1.set_column('J:J', 50, layout) col_widths = fd.get_col_widths(final_df) col_widths = col_widths[1:] for i, width in enumerate(col_widths): if width > 40: width = 40 elif width < 10: width = 10 worksheet2.set_column(i, i, width) worksheet2.set_column('V:V', 50, layout) worksheet2.set_column('Y:Y', 50, layout) worksheet2.set_column('AA:AA', 50, layout) writer.save()
no_match_df = no_match_df.drop( ['GWS_Category_ID', 'GWS_Attr_ID', 'GWS_Attribute_Name'], axis=1) outfile = 'C:/Users/xcxg109/NonDriveFiles/Delta_Project_Attribute_Breakdown.xlsx' writer = pd.ExcelWriter(outfile, engine='xlsxwriter') if no_match_df.empty == False: no_match_df.to_excel(writer, sheet_name="Delta Atts not in WS", startrow=0, startcol=0, index=False) worksheet1 = writer.sheets['Delta Atts not in WS'] col_widths = fd.get_col_widths(no_match_df) col_widths = col_widths[1:] for i, width in enumerate(col_widths): if width > 40: width = 40 elif width < 10: width = 10 worksheet1.set_column(i, i, width) gws_atts.to_excel(writer, sheet_name="Delta ALL Atts", startrow=0, startcol=0, index=False) worksheet2 = writer.sheets['Delta ALL Atts']