LOV_val = LOV_val.replace('wk.', 'wk')

        if 'mo.' in str(uom):
            sample_val = sample_val.replace('mo.', 'mo')
            LOV_val = LOV_val.replace('mo.', 'mo')

        if 'yr.' in str(uom):
            sample_val = sample_val.replace('yr.', 'yr')
            LOV_val = LOV_val.replace('yr.', 'yr')

        if 'µ' in str(uom):
            sample_val = sample_val.replace('µ', 'u')
            LOV_val = LOV_val.replace('µ', 'u')

    df.at[row.Index, 'Sample_Values'] = sample_val
    df.at[row.Index, 'Restricted_Attribute_Value_Domain'] = LOV_val

    return df


df_upload = pd.DataFrame()
search_level = 'cat.CATEGORY_ID'
quer = 'ATTR'

start_time = time.time()
print('working...')

gws_df = gws.gws_q(gws_attr_values, 'tax_att."unitGroupId"', 266)
fd.data_out(settings.directory_name, gws_df, quer, search_level)

print("--- {} minutes ---".format(round((time.time() - start_time) / 60, 2)))
if len(sku_list)>4000:
    num_lists = round(len(sku_list)/4000, 0)
    num_lists = int(num_lists)

    if num_lists == 1:
        num_lists = 2
    print('running SKUs in {} batches'.format(num_lists))

    size = round(len(sku_list)/num_lists, 0)
    size = int(size)

    div_lists = [sku_list[i * size:(i + 1) * size] for i in range((len(sku_list) + size - 1) // size)]

    for k  in range(0, len(div_lists)):
        gr_skus = ", ".join("'" + str(i) + "'" for i in div_lists[k])
        temp_df = gws.gws_q(prod_ids, 'taxonomy_product',  gr_skus)
        prod_id_list = pd.concat([prod_id_list, temp_df], axis=0, sort=False)
        
        prod_ID = prod_id_list['id'].unique().tolist()
        products = ", ".join(str(i) for i in prod_ID)
    
        temp_gov_num = gws.gws_q(numeric_vals, 'tax_att."dataType"', products)
        temp_gov_allow_text = gws.gws_q(allowed_vals, "'text'", products)
        temp_gov_allow_num = gws.gws_q(allowed_vals, "'number'", products)
        
        temp_gov_num = temp_gov_num['count'][0]
        temp_gov_allow_text = temp_gov_allow_text['count'][0]
        temp_gov_allow_num = temp_gov_allow_num['count'][0]

        print ('Round {} - {} SKUs: \nTEMPgov_num = {}\nTEMPgov_allow_text = {}\nTEMPgov_allow_num = {}'.format(k+1, len( div_lists[k]), temp_gov_num, temp_gov_allow_text, temp_gov_allow_num))        
        gov_num = gov_num + temp_gov_num
        num_lists = 2
    print('running SKUs in {} batches'.format(num_lists))

    size = round(len(search_data) / num_lists, 0)
    size = int(size)

    div_lists = [
        search_data[i * size:(i + 1) * size]
        for i in range((len(search_data) + size - 1) // size)
    ]

    for k in range(0, len(div_lists)):
        print('batch {} of {}'.format(k + 1, num_lists))
        sku_str = ", ".join("'" + str(i) + "'" for i in div_lists[k])

        temp_init_ws_df = gws.gws_q(gws_values, 'tprod."gtPartNumber"',
                                    sku_str)
        init_ws_df = pd.concat([init_ws_df, temp_init_ws_df],
                               axis=0,
                               sort=False)

    init_ws_df = pd.merge(data, init_ws_df, how='inner', indicator=True)

else:
    sku_str = ", ".join("'" + str(i) + "'" for i in search_data)
    init_ws_df = gws.gws_q(gws_values, 'tprod."gtPartNumber"', sku_str)
    init_ws_df = pd.merge(data, init_ws_df, how='inner', indicator=True)

node_names = init_ws_df['WS_Node_Name'].unique().tolist()

for n in node_names:
    temp_df = init_ws_df.loc[init_ws_df['WS_Node_Name'] == n]
    ws_df.to_excel(outfile, index=None, header=True, encoding='utf-8')


ws_df = pd.DataFrame()

start_time = time.time()
print('working...')

# read in attribute name from file -- fix this to choice menu at some point
att_df = pd.read_csv('C:/Users/xcxg109/NonDriveFiles/reference/att_test.csv')

attributes = att_df['Attribute'].unique().tolist()

for att in attributes:
    att = "'%" + att + "%'"
    temp_df = gws.gws_q(attr_values, 'tax_att.name', att)

    if temp_df.empty == False:
        ws_df = pd.concat([ws_df, temp_df], axis=0, sort=False)
    else:
        print('EMPTY DATAFRAME')

ws_df.drop_duplicates()

if len(ws_df) > 900000:
    count = 1
    # split into multiple dfs of 40K rows, creating at least 2
    num_lists = round(len(ws_df) / 900000, 0)
    num_lists = int(num_lists)
    if num_lists == 1:
        num_lists = 2
        if num_lists == 1:
            num_lists = 2

        print('running GWS SKUs in {} batches'.format(num_lists))

        size = round(len(search_data)/num_lists, 0)
        size = int(size)

        div_lists = [search_data[i * size:(i + 1) * size] for i in range((len(search_data) + size - 1) // size)]

        for k  in range(0, len(div_lists)):
            print('batch {} of {}'.format(k+1, num_lists))
            sku_str  = ", ".join("'" + str(i) + "'" for i in div_lists[k])

            temp_df = gws.gws_q(ws_basic_query, 'tprod."gtPartNumber"', sku_str)
            ws_df = pd.concat([ws_df, temp_df], axis=0, sort=False)

    else:
        sku_str  = ", ".join("'" + str(i) + "'" for i in search_data)
        
        ws_df = gws.gws_q(ws_basic_query, 'tprod."gtPartNumber"', sku_str)

    if ws_df.empty == False:
        # pull all L3s for the supplier and get attribute data on each node
        suppliers = ws_df['Supplier_ID'].unique().tolist()
        print('# suppliers = ', len(suppliers))
        loop_count = 1
        
        for sup in suppliers:
            start_time = time.time()
search_data = fd.data_in(data_type, settings.directory_name)

print('working...')
start_time = time.time()

# read in grainger data
allCATS_df = q.get_att_values()

if data_type == 'gws_query':
    start_time = time.time()

    if search_level == 'single':
        for node in search_data:
            print('\n\nWS node ', node)
            init_ws_df = gws.gws_q(gws_values_single, 'tprod."categoryId"',
                                   node)

            if init_ws_df.empty == False:
                # clean up pi_mappings data
                init_ws_df['STEP_Attr_ID'] = init_ws_df[
                    'STEP_Attr_ID'].str.replace('_ATTR', '')
                init_ws_df['STEP_Attr_ID'] = init_ws_df[
                    'STEP_Attr_ID'].str.replace('_GATTR', '')
                init_ws_df['STEP_Attr_ID'] = init_ws_df[
                    'STEP_Attr_ID'].str.strip()
                init_ws_df['STEP_Attr_ID'] = init_ws_df['STEP_Attr_ID'].astype(
                    int)

                init_ws_df['STEP_Category_ID'] = init_ws_df[
                    'STEP_Category_ID'].str.replace('_DIV1', '')
                init_ws_df['STEP_Category_ID'] = init_ws_df[
示例#7
0
search_data = fd.data_in(data_type, settings.directory_name)

start_time = time.time()

grainger_df = pd.DataFrame()

if data_type == 'grainger_query':
    count = 1
    num = len(search_data)

    for k in search_data:
        print('{} : {}'.format(count, num))
        div = "'" + str(k) + "_DIV1'"

        # query WS to look for Grainger cat matches in pi_mappings table
        temp_df = gws.gws_q(GWS_cats, div, 'pi_mappings.step_category_ids')

        if temp_df.empty == False:
            # strip string elements from columns and change type to ints
            temp_df['STEP_Attr_ID'] = temp_df['STEP_Attr_ID'].str.replace(
                '_ATTR', '')
            temp_df['STEP_Category_ID'] = temp_df[
                'STEP_Category_ID'].str.replace('_DIV1', '')
            temp_df[['STEP_Category_ID', 'STEP_Attr_ID'
                     ]] = temp_df[['STEP_Category_ID',
                                   'STEP_Attr_ID']].apply(pd.to_numeric)

            grainger_atts = attr_data(temp_df, k)

            if grainger_atts.empty == False:
                grainger_atts = grainger_atts.merge(temp_df, how="left", left_on=['Category_ID', 'Grainger_Attr_ID'],\
示例#8
0
    gws_df = pd.DataFrame()
    
    for k in search_data:
        grainger_df = gcom.grainger_q(grainger_attr_query, search_level, k)
 
        if grainger_df.empty == False:
            df_stats = get_stats(grainger_df)
            df_fill = get_fill_rate(grainger_df)
            
            nodes = grainger_df['Category_ID'].unique()
            
            for n in nodes:
                gws_node = "'" + str(n) + "_DIV1'"
                print(gws_node)
 
                temp_df = gws.gws_q(gws_attr_values, 'pi_mappings.step_category_ids[1]', gws_node)
                gws_df = pd.concat([gws_df, temp_df], axis=0, sort=False) 
 
            gws_df['STEP_Attr_ID'] = gws_df['STEP_Attr_ID'].str.replace('_ATTR', '')
            gws_df['STEP_Attr_ID'] = gws_df['STEP_Attr_ID'].astype(int)
            
            gws_df = gws_values(gws_df)
            
            grainger_df = pd.merge(grainger_df, gws_df, how='left', left_on=['Grainger_SKU', 'Grainger_Attr_ID'], \
                                                                   right_on=['WS_SKU', 'STEP_Attr_ID'])
                               
            grainger_df = compare_values(grainger_df)
            
            grainger_df.dropna(subset=['Segment_ID'], inplace=True)
            
#            fd.attr_data_out(settings.directory_name, grainger_df, df_stats, df_fill, search_level)
                val_type = 'exact'
                break
            elif val_type in ['2', '%']:
                val_type = 'approx'
                break
        except ValueError:
            print('Invalid search type')
    
start_time = time.time()
print('working...')
        
if data_type == 'gws_query':
    grainger_df = pd.DataFrame()
    
    for k in search_data:
        gws_df = gws.gws_q(ws_attr_query, 'tprod."categoryId"', k)
        
        if gws_df.empty == False:
            df_stats = get_ws_stats(gws_df)
            df_fill = get_ws_fill_rate(gws_df)
            
            nodes = gws_['Category_ID'].unique()
            
            for n in nodes:
                gws_node = "'" + str(n) + "_DIV1'"
                print(gws_node)
 
                temp_df = gws.gws_q(gws_attr_values, 'pi_mappings.step_category_ids[1]', gws_node)
                gws_df = pd.concat([gws_df, temp_df], axis=0, sort=False) 
 
            gws_df['STEP_Attr_ID'] = gws_df['STEP_Attr_ID'].str.replace('_ATTR', '')
示例#10
0
                search_level = 'sku'
                break
        except ValueError:
            print('Invalid search type')

search_data = fd.data_in(data_type, settings.directory_name)

start_time = time.time()
print('working...')

if data_type == 'gws_query':
    gws_stat = 'yes'

    if search_level == 'single':
        for k in search_data:
            gws_df = gws.gws_q(gws_hier_query, 'tprod."categoryId"', k)

            if gws_df.empty == False:
                fd.hier_data_out(settings.directory_name, gws_df, quer,
                                 gws_stat, search_level)

            else:
                print('{} No SKUs in node'.format(k))

    elif search_level == 'group':
        for node in search_data:
            df = gws.gws_q(gws_basic_query, 'tprod."categoryId"', node)

            print('k = ', node)

            if df.empty == False:
    
start_time = time.time()

search_level = 'cat.CATEGORY_ID'    # l3 is default search level

# read in uom and LOV files
uom_df = pd.DataFrame()
# get uom list
filename = 'C:/Users/xcxg109/NonDriveFiles/reference/UOM_data_sheet.csv'
uom_df = pd.read_csv(filename)
# create df of the lovs and their concat values

category_list = pd.read_csv('C:/Users/xcxg109/NonDriveFiles/reference/ws_nodes.csv')

for cat in category_list:
    temp_df = gws.gws_q(grainger_attr_ALL_query, search_level, level_1)

        print('k = ', level_1)
        if grainger_df.empty == False:
            temp_df_upload, gamut_dict = attribute_process(grainger_df, uom_df, lov_df, lov_list, gamut_dict, level_1)
        else:
            print('{} No attribute data'.format(level_1))
                    
#        if df_upload.empty==False:
#            temp_df_upload = ws_gather_data(df_upload)    
        df_upload = pd.concat([df_upload, temp_df_upload], axis=0)

        fd.GWS_upload_data_out(settings.directory_name, df_upload, search_level)
               
#        else:
#            print('EMPTY DATAFRAME')
示例#12
0
    if num_lists == 1:
        num_lists = 2
    print('running SKUs in {} batches'.format(num_lists))

    size = round(len(sku_list) / num_lists, 0)
    size = int(size)

    div_lists = [
        sku_list[i * size:(i + 1) * size]
        for i in range((len(sku_list) + size - 1) // size)
    ]

    for k in range(0, len(div_lists)):
        gr_skus = ", ".join("'" + str(i) + "'" for i in div_lists[k])
        temp_df = gws.gws_q(prod_ids, 'taxonomy_product', gr_skus)
        prod_id_list = pd.concat([prod_id_list, temp_df], axis=0, sort=False)

        prod_ID = prod_id_list['id'].unique().tolist()
        products = ", ".join(str(i) for i in prod_ID)

        temp_num = gws.gws_q(all_vals, '"attributeId"', products)

        temp_num = temp_num['count'][0]

        print('Round {} - {} SKUs: \nTEMP_num = {}'.format(
            k + 1, len(div_lists[k]), temp_num))
        all_num = all_num + temp_num
        print('Round {}: \nall_num = {}'.format(k + 1, all_num))

#else:
    gr_skus = ", ".join("'" + str(i) + "'" for i in sku_list)
    gr_sku_df = gcom.grainger_q(grainger_value_query, 'item.MATERIAL_NO',
                                gr_skus)

cats = gr_sku_df['Category_ID'].unique().tolist()

for cat in cats:
    temp_df = gr_sku_df.loc[gr_sku_df['Category_ID'] == cat]
    cat = "'" + str(cat) + "_DIV1'"

    atts = temp_df['STEP_Attr_ID'].unique().tolist()

    for att in atts:
        att = "'" + str(att) + "_ATTR'"

        temp_att_df = gws.gws_q(ws_map, cat, att)

        if temp_att_df.empty == False:
            att_df = pd.concat([att_df, temp_att_df], axis=0, sort=False)

    if att_df.empty == False:
        category_df = pd.concat([category_df, att_df], axis=0, sort=False)

gr_sku_df = gr_sku_df.drop_duplicates()
final_df = gr_sku_df

category_df = category_df.drop_duplicates()

lst_col = 'step_category_ids'
x = category_df.assign(**{lst_col: category_df[lst_col].str.split(',')})
category_df = pd.DataFrame({col:np.repeat(x[col].values, x[lst_col].str.len()) \
                search_level = 'sku'
                break
        except ValueError:
            print('Invalid search type')

search_data = fd.data_in(data_type, settings.directory_name)

start_time = time.time()
print('working...')

if data_type == 'gws_query':
    gws_stat = 'yes'

    if search_level == 'single':
        for k in search_data:
            gws_df = gws.gws_q(gws_hier_query, 'tprod."categoryId"', k)

            if gws_df.empty == False:
                fd.hier_data_out(settings.directory_name, gws_df, quer,
                                 gws_stat, search_level)

            else:
                print('{} No SKUs in node'.format(k))

    elif search_level == 'group':
        for node in search_data:
            df = gws.gws_q(gws_basic_query, 'tprod."categoryId"', node)

            print('k = ', node)

            if df.empty == False:
            break
        elif search_level in ['2', 's', 'S']:
            search_level = 'single'
            break
    except ValueError:
        print('Invalid search type')

search_data = fd.data_in(data_type, settings.directory_name)

print('working...')

for node in search_data:
    start_time = time.time()

    if search_level == 'single':
        df = gws.gws_q(gws_values_single, 'tprod."categoryId"', node)
    elif search_level == 'group':
        df = gws.gws_q(gws_values_group, 'tprod."categoryId"', node)

    print('k = ', node)

    if df.empty == False:
        atts = df['WS_Attr_ID'].unique()

        df['Potential_UOMs'] = ''
        df['Recommended_Value_Update'] = ''

        for attribute in atts:
            temp_df = df.loc[df['WS_Attr_ID'] == attribute]
            temp_df = determine_uoms(temp_df, uom_df, current_uoms)
data_type = 'gws_query'

search_data = fd.data_in(data_type, settings.directory_name)

print('working...')
start_time = time.time()

# read in grainger data
allCATS_df = q.get_att_values()

if data_type == 'gws_query':
    for node in search_data:
        start_time = time.time()

        init_ws_df = gws.gws_q(gws_values_single, 'tprod."categoryId"', node)

        print('k = ', node)

        if init_ws_df.empty == False:
            node_names = init_ws_df['WS_Node_Name'].unique().tolist()

            for n in node_names:
                temp_df = init_ws_df.loc[init_ws_df['WS_Node_Name'] == n]
                temp_df['Count'] = 1

                temp_grainger = allCATS_df.loc[allCATS_df['STEP_Category_Name']
                                               == n]

                if temp_grainger.empty == False:
                    print(temp_grainger['STEP_Category_Name'].unique())
示例#17
0
        if sku_status == 'filtered':
            grainger_df = gcom.grainger_q(STEP_query, 'item.MATERIAL_NO', sku_str)

        elif sku_status == 'all':
            grainger_df = gcom.grainger_q(STEP_query, 'item.MATERIAL_NO', sku_str)
            
    if grainger_df.empty == False:
        gws_df = gws_data(grainger_df)

        if gws_df.empty == False:
            gws_stat = 'yes'
            grainger_df = grainger_df.merge(gws_df, how="left", left_on="STEP_SKU", right_on='WS_SKU')

elif data_type == 'gws_query':
    gws_stat = 'yes'
    
    for k in search_data:
        temp_df = gws.gws_q(ws_hier_query, 'tprod."categoryId"', k)
        
        if temp_df.empty == False:
            grainger_skus_df = grainger_data(temp_df, sku_status)

            if grainger_skus_df.empty == False:
                temp_df = temp_df.merge(grainger_skus_df, how="left", left_on="STEP_SKU", right_on='WS_SKU')

        grainger_df = pd.concat([grainger_df, temp_df], axis=0)            
        print(k)

data_out(grainger_df, quer)
print("--- {} minutes ---".format(round((time.time() - start_time)/60, 2)))
示例#18
0
#request the type of data to pull: blue or yellow, SKUs or node, single entry or read from file
data_type = fd.search_type()

#ask user for node number/SKU or pull from file if desired    
search_data = fd.data_in(data_type, settings.directory_name)


start_time = time.time()
print('working...')

total_df = pd.DataFrame()


for node in search_data:
    temp_df = gws.gws_q(multi_atts, 'tprod."categoryId"', node)
    temp_df['Count'] = 1

    temp_df = pd.DataFrame(temp_df.groupby(['WS_SKU', 'WS_Attr_ID'])['Count'].sum())
    temp_df = temp_df.reset_index()
    
    total_df = pd.concat([total_df, temp_df], axis=0, sort=False)
        
total_df = total_df[total_df['Count'] > 1]
total_df = total_df.drop_duplicates(subset=['WS_SKU', 'WS_Attr_ID'])

if len(temp_df) > 1000000:
    count = 1
    num_lists = round(len(temp_df)/45000, 0)
    num_lists = int(num_lists)
示例#19
0
        if num_lists == 1:
            num_lists = 2

        print('running GWS SKUs in {} batches'.format(num_lists))

        size = round(len(search_data)/num_lists, 0)
        size = int(size)

        div_lists = [search_data[i * size:(i + 1) * size] for i in range((len(search_data) + size - 1) // size)]

        for k  in range(0, len(div_lists)):
            print('batch {} of {}'.format(k+1, num_lists))
            sku_str  = ", ".join("'" + str(i) + "'" for i in div_lists[k])

            temp_df = gws.gws_q(ws_attr_values, 'tprod."gtPartNumber"', sku_str)
            list_df = pd.concat([list_df, temp_df], axis=0, sort=False)

    else:
        sku_str  = ", ".join("'" + str(i) + "'" for i in search_data)
        
        list_df = gws.gws_q(ws_attr_values, 'tprod."gtPartNumber"', sku_str)

    if list_df.empty == False:
        # pull all L3s for the supplier and get attribute data on each node
        suppliers = list_df['Supplier_ID'].unique().tolist()
        print('# suppliers = ', len(suppliers))
        loop_count = 1
        
        for sup in suppliers:
            start_time = time.time()
search_data = fd.data_in(data_type, settings.directory_name)

ws_df = pd.DataFrame()
temp_df_2 = pd.DataFrame()

init_time = time.time()
print('working...')

for node in search_data:
    start_time = time.time()
    ws_df = pd.DataFrame()

    print('k = ', node)

    if search_level == 'single':
        df = gws.gws_q(gws_attr_single, 'tprod."categoryId"', node)

    elif search_level == 'group':
        df = gws.gws_q(gws_attr_group, 'tprod."categoryId"', node)

    if df.empty == False:
        node_list = df['WS_Node_ID'].unique().tolist()

        print('{} nodes to process'.format(len(node_list)))

        for n in node_list:
            temp_df = df.loc[df['WS_Node_ID'] == n]
            print('node {} : {} rows'.format(n, len(temp_df)))

            # split large df into 2 and print separately
            if len(temp_df) > 40000:
        elif search_level in ['3', 'sku', 'SKU']:
            data_type = 'sku'
            break
    except ValueError:
        print('Invalid search type')

search_data = fd.data_in(data_type, settings.directory_name)

print('working...')

if data_type == 'gws_query':
    start_time = time.time()

    if search_level == 'single':
        for node in search_data:
            ws_df = gws.gws_q(gws_values_single, 'tprod."categoryId"', node)

            if ws_df.empty == False:
                node_ids = ws_df['WS_Node_ID'].unique().tolist()
                print(node_ids)

                ws_df['Potential_Replaced_Values'] = ''
                ws_df['Revised Value'] = ''
                ws_df['Count'] = 1

                ws_df = ws_df.replace(np.nan, '', regex=True)
                ws_df = ws_df.reset_index()
                ws_df = process_vals(ws_df)

                data_out(ws_df, node)
                print("--- {} minutes ---".format(
start_time = time.time()
print('working...')

# read in attribute name from file -- fix this to choice menu at some point
att_df = pd.read_csv('C:/Users/xcxg109/NonDriveFiles/reference/atts.csv')

attributes = att_df['WS_Attr_ID'].unique().tolist()
print('attribute # = ', len(attributes))

count_att = 1

for att in attributes:
    print('{}. {}'.format(count_att, att))

    #    att = "'" + att + "'"
    temp_df = gws.gws_q(attr_values, 'tax_att.id', att)

    ws_df = pd.concat([ws_df, temp_df], axis=0, sort=False)

    count_att += 1

ws_df = ws_df.drop(columns=['WS_SKU'])

#ws_df = ws_df.drop_duplicates()

if len(ws_df) > 900000:
    count = 1
    # split into multiple dfs of 40K rows, creating at least 2
    num_lists = round(len(ws_df) / 900000, 0)
    num_lists = int(num_lists)
    if num_lists == 1:
示例#23
0
ws_df = pd.DataFrame()

search_data = input('Input Blue node ID or hit ENTER to read from file: ')

if search_data != "":
    search_data = search_data.strip()
    search_data = [search_data]

else:
    file_data = settings.get_file_data()
    search_data = [row[0] for row in file_data[1:]]

for cat in search_data:
    print('node = ', cat)

    temp_df = gws.gws_q(ws_attr_values, 'tprod."categoryId"', cat)
    #    temp_df = gws.gws_q(ws_attr_values, 'tax.ancestors[1]', cat)

    ws_df = pd.concat([ws_df, temp_df], axis=0, sort=False)

if len(ws_df) > 900000:
    count = 1
    # split into multiple dfs of 40K rows, creating at least 2
    num_lists = round(len(ws_df) / 900000, 0)
    num_lists = int(num_lists)
    if num_lists == 1:
        num_lists = 2
    print('creating {} output files'.format(num_lists))
    # np.array_split creates [num_lists] number of chunks, each referred to as an object in a loop
    split_df = np.array_split(ws_df, num_lists)
    for object in split_df:
search_data = data_in(data_type, settings.directory_name)

start_time = time.time()

category_gr = pd.DataFrame()
category_gws = pd.DataFrame()

for cat in search_data:

    temp_gr = gcom.grainger_q(grainger_cat_query, 'cat.CATEGORY_ID', cat)

    if temp_gr.empty == False:
        category_gr = pd.concat([category_gr, temp_gr], axis=0, sort=False)

    cat = "'" + str(cat) + "_DIV1'"
    temp_gws = gws.gws_q(ws_map, 'tprod."categoryId"', cat)

    if temp_gws.empty == False:
        category_gws = pd.concat([category_gws, temp_gws], axis=0, sort=False)

lst_col = 'step_category_ids'
x = category_gws.assign(**{lst_col: category_gws[lst_col].str.split(',')})
category_gws = pd.DataFrame({col:np.repeat(x[col].values, x[lst_col].str.len()) \
              for col in x.columns.difference([lst_col])}).assign(**{lst_col:np.concatenate(x[lst_col].values)})[x.columns.tolist()]

category_gws = category_gws.astype({'step_category_ids': int})

final_df = category_gr.merge(category_gws, how="left", left_on=['Category_ID'], \
                                                    right_on=['step_category_ids'])

final_df = final_df.drop_duplicates()
示例#25
0
        print('Invalid search type')

search_data = fd.data_in(data_type, settings.directory_name)

print('working...')
start_time = time.time()

# read in grainger data
allCATS_df = q.get_att_values()

if data_type == 'gws_query':
    for node in search_data:
        start_time = time.time()

        if search_level == 'single':
            init_ws_df = gws.gws_q(gws_values_single, 'tprod."categoryId"',
                                   node)

        elif search_level == 'group':
            init_ws_df = gws.gws_q(gws_values_group, 'tprod."categoryId"',
                                   node)

        print('k = ', node)

        if init_ws_df.empty == False:
            node_names = init_ws_df['WS_Node_Name'].unique().tolist()

            for n in node_names:
                temp_df = init_ws_df.loc[init_ws_df['WS_Node_Name'] == n]
                temp_df['Count'] = 1

                temp_grainger = allCATS_df.loc[allCATS_df['STEP_Category_Name']
示例#26
0
file_data = settings.get_file_data()
gws_data = [int(row[0]) for row in file_data[1:]]

print('working....')
start_time = time.time()

print('Grainger nodes')
for k in gr_data:
    print(k)
    temp_gr = gcom.grainger_q(STEP_query, 'cat.SEGMENT_ID', k)
    grainger_df = pd.concat([grainger_df, temp_gr], axis=0)

print('\n\nGWS nodes')
for k in gws_data:
    print(k)
    temp_gws = gws.gws_q(PIM_query, 'tprod."categoryId"', k)
    gws_df = pd.concat([gws_df, temp_gws], axis=0)

grainger_df.drop_duplicates()
gws_df = gws_df.drop_duplicates(ignore_index=True)

final_df = grainger_df.merge(gws_df,
                             how="left",
                             left_on="STEP_SKU",
                             right_on='WS_SKU')
no_match_df = final_df[final_df['WS_SKU'].isna()]

if len(final_df) > 900000:
    count = 1

    # split into multiple dfs of 40K rows, creating at least 2
    filename = os.fsdecode(file)
    filename = filename.lower()
    # read in the sheet name and parse for the category ID and name

    if 'high touch' in filename or 'high_touch' in filename:
        pass # do nothing -- we've already read this file above
        
    else:
        main_df = pd.read_excel(filename, header=None)
#        main_df.to_csv('C:/Users/xcxg109/NonDriveFiles/pre.csv')
        
        cat_ID = main_df[0].unique().tolist()
        cat_ID = cat_ID[11]
        
        cat_name = gws.gws_q(ws_category, 'cat.id', cat_ID)
        cat_name = cat_name['name'].unique()
        cat_name = cat_name[0].strip()

        # flip buildsheet read-in to get it closer to our final format
        main_df = main_df.T 
        main_df[0].fillna(method='ffill', inplace=True)
        main_df[1].fillna(method='ffill', inplace=True)
        main_df[2].fillna(method='ffill', inplace=True)
        main_df[3].fillna(method='ffill', inplace=True)
        main_df[4].fillna(method='ffill', inplace=True)
        main_df[5].fillna(method='ffill', inplace=True)
        main_df[6].fillna(method='ffill', inplace=True)
        main_df[7].fillna(method='ffill', inplace=True)
        main_df[8].fillna(method='ffill', inplace=True)
        main_df[9].fillna(method='ffill', inplace=True)
示例#28
0
INNER JOIN tax
ON tax.id = tprod."categoryId"
AND tprod.status = 3

INNER JOIN taxonomy_attribute tax_att
ON tax_att."categoryId" = tprod."categoryId"
AND tax_att.deleted = 'false'

INNER JOIN  taxonomy_product_attribute_value tprodvalue
ON tprod.id = tprodvalue."productId"
AND tax_att.id = tprodvalue."attributeId"
AND tprodvalue.deleted = 'false'
AND tax_att."multiValue" = 'true'
        
LEFT OUTER JOIN pi_mappings
ON pi_mappings.gws_attribute_ids[1] = tax_att.id
AND pi_mappings.gws_category_id = tax_att."categoryId"
"""

start_time = time.time()
print('working...')

#gws_df = moist.query(test_q)
gws_df = gws.gws_q(test_q, 'taxonomy_product_backfeed.value', 4000)

outfile = Path(settings.directory_name) / "test.xlsx"
gws_df.to_excel(outfile, index=None, header=True, encoding='utf-8')

print("--- {} seconds ---".format(round(time.time() - start_time, 2)))