def using_item_id(): df = dr.get_data('train.xls') item_id = dr.get_unqiue_list(list(df['Item_Id'])) item_id_list = list(dr.get_req_att(df, 'Item_Id')) item_weight = list(dr.get_req_att(df, 'Item_Weight')) item_fat_content = list(dr.get_req_att(df, 'Item_Fat_Content')) item_type = list(dr.get_req_att(df, 'Item_Type')) for outer_itr in item_id: for itr in range(len(item_id_list)): if item_id_list[itr] == outer_itr: print(item_id_list[itr], item_weight[itr], item_fat_content[itr], item_type[itr]) input()
import dretrive as dr df = dr.get_data('train.xls') df = dr.get_req_att(df, ['Item_Type', 'Item_Fat_Content']) item_list = dr.get_unqiue_list(df['Item_Type']) print(item_list) print("Length : ",len(item_list)) item_fat_list = dr.get_unqiue_list(df['Item_Fat_Content']) print(item_fat_list) print("Fat types : ", len(item_fat_list))
iweight = list(df['Item_Weight']) ifat = list(df['Item_Fat_Content']) itype = list(df['Item_Type']) shopid = list(df['Outlet_Id']) sales = list(df['Item_Outlet_Sales']) mrp = list(df['Item_MRP']) print('sales error count : ', dr.count_nan(sales)) ''' count_out019 = 0 for itr in range(len(iweight)): #' ' ' #if (ifat[itr] == 'Low Fat' or ifat[itr] == 'LF' or ifat[itr] == 'low fat') and itype[itr] == 'Baking Goods' and shopid[itr] == 'OUT027' : # print(iweight[itr], " ", sales[itr], " ", mrp[itr]) #' ' ' if shopid[itr] == 'OUT019': count_out019 += 1 ''' #print(count_out019) for outer_itr in list(dr.get_unqiue_list(shopid)): count = 0 for itr in range(len(itype)): if shopid[itr] == outer_itr : if pd.isna(iweight[itr]): count += 1 print(outer_itr, ': Check nan :', count)
df = dr.get_data('train.xls') lis = ['Outlet_Id', 'Item_Outlet_Sales'] df = dr.get_req_att(df, lis) # no empty values ''' lis = df['Item_Outlet_Sales'] count = 0 for itr in lis: if pd.isna(itr): count += 1 print(count) ''' #no cleaning required shop_list = df['Outlet_Id'] std_shop_list = dr.get_unqiue_list(shop_list) sale_list = df['Item_Outlet_Sales'] sales = [] for shop in std_shop_list: count = 0 for itr in range(len(shop_list)): if shop == shop_list[itr]: count += sale_list[itr] sales.append(count) dv.bargraph(std_shop_list, sales)
import dretrive as dr import pandas as pd def generate_excel(df): writer = pd.ExcelWriter('text.xls') df.to_excel(writer, 'Sheet1', index=False) writer.save() if __name__ == '__main__': df = dr.get_data('ctrain.xls') std_shop_list = dr.get_unqiue_list(list(df['Outlet_Id'])) shop_list = list(df['Outlet_Id']) item_visible = list(df['Item_Visibility']) outlet_size = list(df['Outlet_Size']) item_weight = list(df['Item_Weight']) loc_type = list(df['Outlet_Loc_Type']) outing = list(df['Outlet_Type']) tot_visi_list = [] type_list = [] count_list = [] weight_list = [] final_list = [] loc_list = [] out_type = [] for itr in std_shop_list: tot = 0 count = 0 weight = 0
for itr in range(len(item_fat_content)): if item_fat_content[itr] == "Low Fat" or item_fat_content[ itr] == "low fat" or item_fat_content[itr] == "LF": df.loc[itr, 'Item_Fat_Content'] = "Low Fat" else: df.loc[itr, 'Item_Fat_Content'] = "Regular" return df #stub if __name__ == '__main__': df = dr.get_data('train.xls') item_id = dr.get_unqiue_list(list(df['Item_Id'])) item_id_list = list(dr.get_req_att(df, 'Item_Id')) item_weight = list(dr.get_req_att(df, 'Item_Weight')) print(dr.count_nan(item_weight)) item_fat_content = list(dr.get_req_att(df, 'Item_Fat_Content')) item_type = list(dr.get_req_att(df, 'Item_Type')) for outer_itr in item_id: temp_list = [] for itr in range(len(item_id_list)): if item_id_list[itr] == outer_itr: #print(item_id_list[itr], item_weight[itr], item_fat_content[itr], item_type[itr]) temp_list.append(itr) df = clean_nan(outer_itr, temp_list, item_weight, df) item_weight = list(df['Item_Weight']) print(dr.count_nan(item_weight))