示例#1
0
def using_item_id():

    df = dr.get_data('train.xls')
    item_id = dr.get_unqiue_list(list(df['Item_Id']))
    item_id_list = list(dr.get_req_att(df, 'Item_Id'))
    item_weight = list(dr.get_req_att(df, 'Item_Weight'))
    item_fat_content = list(dr.get_req_att(df, 'Item_Fat_Content'))
    item_type = list(dr.get_req_att(df, 'Item_Type'))

    for outer_itr in item_id:
        for itr in range(len(item_id_list)):
            if item_id_list[itr] == outer_itr:
                print(item_id_list[itr], item_weight[itr], item_fat_content[itr], item_type[itr])
        input()
示例#2
0
import dretrive as dr

df = dr.get_data('train.xls')
df = dr.get_req_att(df, ['Item_Type', 'Item_Fat_Content'])

item_list = dr.get_unqiue_list(df['Item_Type'])
print(item_list)
print("Length : ",len(item_list))

item_fat_list = dr.get_unqiue_list(df['Item_Fat_Content'])

print(item_fat_list)
print("Fat types : ", len(item_fat_list))
示例#3
0
iweight = list(df['Item_Weight'])
ifat = list(df['Item_Fat_Content'])
itype = list(df['Item_Type'])
shopid = list(df['Outlet_Id'])
sales = list(df['Item_Outlet_Sales'])
mrp = list(df['Item_MRP'])

print('sales error count : ', dr.count_nan(sales))

'''
count_out019 = 0
for itr in range(len(iweight)):
    #' ' '
    #if (ifat[itr] == 'Low Fat' or ifat[itr] == 'LF' or ifat[itr] == 'low fat') and itype[itr] == 'Baking Goods' and shopid[itr] == 'OUT027' :
    #   print(iweight[itr], " ", sales[itr], " ", mrp[itr])
    #' ' '
    if shopid[itr] == 'OUT019':
        count_out019 += 1
'''
#print(count_out019)


for outer_itr in list(dr.get_unqiue_list(shopid)):
    count = 0
    for itr in range(len(itype)):
        if shopid[itr] == outer_itr :
            if pd.isna(iweight[itr]):
                count += 1

    print(outer_itr, ': Check nan :', count)
示例#4
0
df = dr.get_data('train.xls')
lis = ['Outlet_Id', 'Item_Outlet_Sales']
df = dr.get_req_att(df, lis)

# no empty values
'''
lis = df['Item_Outlet_Sales']
count = 0
for itr in lis:
    if pd.isna(itr):
        count += 1
print(count)
'''

#no cleaning required
shop_list = df['Outlet_Id']
std_shop_list = dr.get_unqiue_list(shop_list)
sale_list = df['Item_Outlet_Sales']
sales = []

for shop in std_shop_list:
    count = 0
    for itr in range(len(shop_list)):
        if shop == shop_list[itr]:
            count += sale_list[itr]

    sales.append(count)

dv.bargraph(std_shop_list, sales)
示例#5
0
import dretrive as dr
import pandas as pd


def generate_excel(df):
    writer = pd.ExcelWriter('text.xls')
    df.to_excel(writer, 'Sheet1', index=False)
    writer.save()


if __name__ == '__main__':

    df = dr.get_data('ctrain.xls')
    std_shop_list = dr.get_unqiue_list(list(df['Outlet_Id']))
    shop_list = list(df['Outlet_Id'])
    item_visible = list(df['Item_Visibility'])
    outlet_size = list(df['Outlet_Size'])
    item_weight = list(df['Item_Weight'])
    loc_type = list(df['Outlet_Loc_Type'])
    outing = list(df['Outlet_Type'])
    tot_visi_list = []
    type_list = []
    count_list = []
    weight_list = []
    final_list = []
    loc_list = []
    out_type = []
    for itr in std_shop_list:
        tot = 0
        count = 0
        weight = 0
示例#6
0
    for itr in range(len(item_fat_content)):

        if item_fat_content[itr] == "Low Fat" or item_fat_content[
                itr] == "low fat" or item_fat_content[itr] == "LF":
            df.loc[itr, 'Item_Fat_Content'] = "Low Fat"
        else:
            df.loc[itr, 'Item_Fat_Content'] = "Regular"

    return df


#stub
if __name__ == '__main__':

    df = dr.get_data('train.xls')
    item_id = dr.get_unqiue_list(list(df['Item_Id']))
    item_id_list = list(dr.get_req_att(df, 'Item_Id'))
    item_weight = list(dr.get_req_att(df, 'Item_Weight'))
    print(dr.count_nan(item_weight))
    item_fat_content = list(dr.get_req_att(df, 'Item_Fat_Content'))
    item_type = list(dr.get_req_att(df, 'Item_Type'))
    for outer_itr in item_id:
        temp_list = []
        for itr in range(len(item_id_list)):
            if item_id_list[itr] == outer_itr:
                #print(item_id_list[itr], item_weight[itr], item_fat_content[itr], item_type[itr])
                temp_list.append(itr)

        df = clean_nan(outer_itr, temp_list, item_weight, df)
    item_weight = list(df['Item_Weight'])
    print(dr.count_nan(item_weight))