def gamut_process(gnode, gamut_dict):
    """if gamut node has not been preiously process (in gamut_dict), process and add it to the dictionary"""
    gamut_df = q.gamut_definition(gnode, 'tax_att."categoryId"')
        
    if gamut_df.empty==False:
        gamut_df['alt_gamut_name'] = process.process_att(gamut_df['Gamut_Attribute_Name'])  #prep att name for merge
        gamut_dict[gnode] = gamut_df #store the processed df in dict for future reference

    else:
        print('{} EMPTY Gamut DATAFRAME'.format(gnode))    
        
    return gamut_dict, gamut_df
def gws_process(gws_node, gws_dict: Dict):
    """if gws node has not been previously processed (in gws_dict), process and add it to the dictionary"""
    gws_df = q.gws_atts(ws_attr_query, gws_node, 'tax.id')  #tprod."categoryId"')  #get gws attribute values for each gamut_l3 node\
    
    if gws_df.empty==False:
        gws_df = gws_df.drop_duplicates(subset='GWS_Attr_ID')  #gws attribute IDs are unique, so no need to group by pim node before getting unique
        gws_df['alt_gws_name'] = process.process_att(gws_df['GWS_Attribute_Name'])  #prep att name for merge
        gws_dict[gws_node] = gws_df #store the processed df in dict for future reference

    else:
        print('{} EMPTY GWS DATAFRAME'.format(gws_node))    
        
    return gws_dict, gws_df
def gamut_process(node, Gamut_allCATS_df, gamut_dict: Dict, k):
    """if gamut node has not been previously processed (in gamut_dict), process and add it to the dictionary"""
    
    gamut_sample_vals = pd.DataFrame()
    gamut_att_vals = pd.DataFrame()

    gamut_df_1 = Gamut_allCATS_df.loc[Gamut_allCATS_df['Gamut_Node_ID']== node] #get gamut attribute values for each gamut_l3 node

    if gamut_df_1.empty==False:
        gamut_df_2 = gamut.gamut_q(gamut_usage_query, 'tax_att."categoryId"', node)
        print('Gamut ', node)
        
        if gamut_df_2.empty==False:
            gamut_df_2 = gamut_df_2.groupby(['Gamut_Attr_ID'])['Gamut_MERCH_Usage'].apply('; '.join).reset_index()

            gamut_df = pd.merge(gamut_df_1, gamut_df_2, how = 'outer', on = 'Gamut_Attr_ID')

        else:
            gamut_df = gamut_df_1
            gamut_df['Gamut_MERCH_Usage'] = ""
            
        gamut_df.loc[gamut_df['Gamut_MERCH_Usage'] == '', 'Gamut_MERCH_Usage'] = np.nan
        gamut_df = gamut_df.sort_values(['Gamut_Attr_ID', 'Gamut_MERCH_Usage']).drop_duplicates(subset = 'Gamut_Attr_ID')
        
        gamut_att_vals, gamut_sample_vals = gamut_values(gamut_df, node) #gamut_values exports a list of --all-- normalized values and sample_values
        
        if gamut_att_vals.empty==False:
            gamut_sample_vals = gamut_sample_vals.rename(columns={'Gamut_Normalized Value': 'Gamut Attribute Sample Values'})

            gamut_df = pd.merge(gamut_df, gamut_sample_vals, on=['Gamut_Attribute_Name'])  #add top 10 normalized values to report
            gamut_df = pd.merge(gamut_df, gamut_att_vals, on=['Gamut_Attr_ID'])

        gamut_df = gamut_df.drop_duplicates(subset='Gamut_Attr_ID')  #gamut attribute IDs are unique, so no need to group by pim node before getting unique
        gamut_df['alt_gamut_name'] = process.process_att(gamut_df['Gamut_Attribute_Name'])  #prep att name for merge
        
        gamut_dict[node] = gamut_df #store the processed df in dict for future reference
 
    else:
        print('{} EMPTY DATAFRAME'.format(node))    
        
    return gamut_dict, gamut_df
def ws_process(ws_df, ws_sample, ws_all, fill_rate, Gamut_allCATS_df, gamut_dict: Dict, k):
    """create a list of grainger skus, run through through the gamut_skus query and pull gamut attribute data if skus are present
        concat both dataframs and join them on matching attribute names"""
    
    df = pd.DataFrame()
    gamut_skus = pd.DataFrame()
    
    node_name = ws_df['WS_Node_Name'].unique().tolist()
    node_name = cat_name.pop()
    print('node name = {} {}'.format(k, node_name))

#    ws_skus = ws_df.drop_duplicates(subset='WS_SKU')  #create list of unique WS skus that feed into gamut query
    ws_skus = ws_df['WS_SKU'].unique().tolist()
    print('ws sku count = ', len(ws_skus))

    ws_df = ws_df.drop_duplicates(subset=['WS_Category_ID', 'WS_Attr_ID'])  #group by WS_Node_ID and attribute name and keep unique

    ws_df = ws_df.drop(['WS_SKU', 'WS_Attribute_Value'], axis=1) #remove unneeded columns
    
    ws_df = pd.merge(ws_df, ws_sample, on=['WS_Attribute_Name'])
    ws_df = pd.merge(ws_df, ws_all, on=['WS_Attr_ID'])
    ws_df = pd.merge(ws_df, fill_rate, on=['WS_Attribute_Name'])
    
    ws_df['alt_ws_name'] = process.process_att(ws_df['WS_Attribute_Name'])  #prep att name for merge

    gamut_skus = Gamut_allCATS_df[Gamut_allCATS_df['Gamut_SKU'].isin(ws_skus)]
    
    if gamut_skus.empty==False:
        #create a dictionary of the unique gamut nodes that corresponde to the grainger node 
        gamut_l3 = gamut_skus['Gamut_Node_ID'].unique()  #create list of pim nodes to pull
        print('GWS L3s ', gamut_l3)
        
        for node in gamut_l3:
            if node in gamut_dict:
                gamut_df = gamut_dict[node]
                print ('node {} in gamut dict'.format(node))

            else:
                gamut_dict, gamut_df = gamut_process(node, Gamut_allCATS_df, gamut_dict, k)
def gamut_process(node, gamut_dict: Dict, k):
    """if gamut node has not been previously processed (in gamut_dict), process and add it to the dictionary"""
    gamut_sample_vals = pd.DataFrame()
    gamut_att_vals = pd.DataFrame()

    gamut_df = q.gamut_atts(
        gamut_attr_query, node, 'tax.id'
    )  #tprod."categoryId"')  #get gamut attribute values for each gamut_l3 node\

    if gamut_df.empty == False:
        gamut_att_vals, gamut_sample_vals = q.gamut_values(
            gamut_attr_values, node, 'tax.id'
        )  #gamut_values exports a list of --all-- normalized values and sample_values

        if gamut_att_vals.empty == False:
            gamut_sample_vals = gamut_sample_vals.rename(
                columns={'Normalized Value': 'Gamut Attribute Sample Values'})
            gamut_df = pd.merge(gamut_df,
                                gamut_sample_vals,
                                on=['Gamut_Attribute_Name'
                                    ])  #add t0p 5 normalized values to report
            gamut_df = pd.merge(gamut_df, gamut_att_vals,
                                on=['Gamut_Attr_ID'
                                    ])  #add t0p 5 normalized values to report

        gamut_df = gamut_df.drop_duplicates(
            subset='Gamut_Attr_ID'
        )  #gamut attribute IDs are unique, so no need to group by pim node before getting unique
        gamut_df['alt_gamut_name'] = process.process_att(
            gamut_df['Gamut_Attribute_Name'])  #prep att name for merge

        gamut_dict[
            node] = gamut_df  #store the processed df in dict for future reference
    else:
        print('{} EMPTY DATAFRAME'.format(node))

    return gamut_dict, gamut_df
def grainger_process(grainger_df, grainger_all, uom_df, lov_df, lov_list,
                     gamut_dict, grainger_node):
    """create a list of grainger skus, run through through the gws_skus query and pull gws attribute data if skus are present
        concat both dataframs and join them on matching attribute names"""

    df = pd.DataFrame()
    grainger_new = pd.DataFrame()

    cat_name = grainger_df['Category_Name'].unique()
    cat_name = list(cat_name)
    cat_name = cat_name.pop()
    print('cat name = {} {}'.format(grainger_node, cat_name))

    grainger_skus = grainger_df.drop_duplicates(
        subset='Grainger_SKU'
    )  #create list of unique grainger skus that feed into gws query
    grainger_sku_count = len(grainger_skus)
    print('grainger sku count = ', grainger_sku_count)

    if len(grainger_skus) > 7000:
        num_split = round(len(grainger_skus) / 7000, 0)
        num_split = int(num_split)

        if num_split == 1:
            num_split = 2

        print('splitting grainger_df into {} batches'.format(num_split))
        grainger_split = np.array_split(grainger_df, num_split)

        for i in range(0, num_split):
            loop_time = time.time()
            print('batch no: ', i + 1)

            temp_df = analyze(grainger_split[i], uom_df, lov_df, lov_list)
            grainger_new = pd.concat([grainger_new, temp_df],
                                     axis=0,
                                     sort=False)

            print("--- grainger loop time = {} minutes ---".format(
                round((time.time() - loop_time) / 60, 2)))

        grainger_df = grainger_new

    else:
        grainger_df = analyze(grainger_df, uom_df, lov_df, lov_list)

    grainger_df = grainger_df.drop_duplicates(subset=[
        'Category_ID', 'Grainger_Attr_ID'
    ])  #group by Category_ID and attribute name and keep unique
    grainger_df['STEP Blue Path'] = grainger_df['Segment_Name'] + ' > ' + grainger_df['Family_Name'] + \
                                                        ' > ' + grainger_df['Category_Name']

    grainger_df = grainger_df.drop(
        ['Grainger_SKU', 'Grainger_Attribute_Value'],
        axis=1)  #remove unneeded columns

    if grainger_all.empty == False:

        # for non-text rows, clean up UOMs in sample value column
        for row in grainger_df.itertuples():
            potential_uoms = str(row.Potential_UOMs)
            dt = str(row.Recommended_Data_Type)

        if dt != 'text':
            grainger_df = process_sample_vals(grainger_df, row, potential_uoms)

    grainger_df['alt_grainger_name'] = process.process_att(
        grainger_df['Grainger_Attribute_Name'])  #prep att name for merge

    gamut_skus = q.gws_skus(
        grainger_skus)  #get gamut sku list to determine pim nodes to pull

    # if gws skus are present, go get the gamut attribute definition for the node
    if gamut_skus.empty == False:
        gamut_l3 = gamut_skus['Gamut_Node_ID'].unique()

        print('gamut L3s ', gamut_l3)

        for gamut_node in gamut_l3:
            if gamut_node in gamut_dict:
                gamut_df = gamut_dict[gamut_node]
                print('node {} in Gamut dict'.format(gamut_node))
            else:
                gamut_dict, gamut_df = gamut_process(gamut_node, gamut_dict)

            if gamut_df.empty == False:
                node_name = gamut_df['Gamut_Node_Name'].unique()
                node_name = list(node_name)
                node_name = node_name.pop()
                print('node name = {} {}'.format(gamut_node, node_name))

                #add correlating grainger and gamut data to opposite dataframes
                grainger_df = q.grainger_assign_nodes(grainger_df, gamut_df)
                gamut_df = q.gamut_assign_nodes(grainger_df, gamut_df)

                temp_df = pd.merge(grainger_df, gamut_df, left_on=['alt_grainger_name', 'Category_ID', 'Gamut_Node_ID', 'Gamut_Category_ID', \
                                                                   'Gamut_Category_Name', 'Gamut_Node_Name', 'Gamut_PIM_Path', 'STEP Blue Path', \
                                                                   'Segment_ID', 'Segment_Name', 'Family_ID', 'Family_Name', 'Category_Name'],
                                                right_on=['alt_gamut_name', 'Category_ID', 'Gamut_Node_ID', 'Gamut_Category_ID', \
                                                          'Gamut_Category_Name', 'Gamut_Node_Name', 'Gamut_PIM_Path', 'STEP Blue Path', \
                                                          'Segment_ID', 'Segment_Name', 'Family_ID', 'Family_Name', 'Category_Name'], how='outer')

                temp_df = match_category(
                    temp_df
                )  #compare grainger and gamut atts and create column to say whether they match

                df = pd.concat(
                    [df, temp_df], axis=0, sort=False
                )  #add prepped df for this gamut node to the final df
                df['Matching'] = df['Matching'].str.replace(
                    'no', 'Potential Match')
                df = df[df.Matching != 'GWS only']
        #         temp_df.to_csv('C:/Users/xcxg109/NonDriveFiles/graingerProcessDF.csv')
            else:
                print('Gamut {} EMPTY DATAFRAME'.format(gamut_node))
                df = grainger_df
                df['Gamut_Attribute_Definition'] = ''

    else:
        df = grainger_df
        df['Gamut_Attribute_Definition'] = ''

        print('No Gamut SKUs for Grainger node {}'.format(grainger_node))


#                df = pd.merge(df, gamut_df, left_on=['alt_grainger_name'], \
#                                               right_on=['alt_gamut_name'], how='outer')

#              df = match_category(df) #compare grainger and gws atts and create column to say whether they match
#              df['Matching'] = df['Matching'].str.replace('no', 'Potential Match')

#    if gws_skus.empty==False:
#create a dictionary of the unique gws nodes that corresponde to the grainger node
#       gws_l3 = gws_skus['GWS_Node_ID'].unique()  #create list of pim nodes to pull
#       print('GWS L3s ', gws_l3)

#       for node in gws_l3:
#           if node in gws_dict:
#               gws_df = gws_dict[node]
#               print ('node {} in GWS dict'.format(node))

#           else:
#              gws_dict, gws_df = gws_process(node, gws_dict)

#          if gws_df.empty==False:
#               node_name = gws_df['GWS_Node_Name'].unique()
#              node_name = list(node_name)
#             node_name = node_name.pop()
#            print('node name = {} {}'.format(node, node_name))
#add correlating grainger and gws data to opposite dataframes
#             grainger_df = q.grainger_assign_nodes(grainger_df, gws_df)
#             gws_df = q.gws_assign_nodes(grainger_df, gws_df)

#           temp_df = pd.merge(grainger_df, gws_df, left_on=['alt_grainger_name', 'Category_ID', 'GWS_Node_ID', 'GWS_Category_ID', \
#                                                             'GWS_Category_Name', 'GWS_Node_Name', 'GWS_PIM_Path', 'STEP Blue Path', \
##                                                            'Segment_ID', 'Segment_Name', 'Family_ID', 'Family_Name', 'Category_Name'],
#                                       right_on=['alt_gws_name', 'Category_ID', 'GWS_Node_ID', 'GWS_Category_ID', \
#                                                'GWS_Category_Name', 'GWS_Node_Name', 'GWS_PIM_Path', 'STEP Blue Path', \
#                                               'Segment_ID', 'Segment_Name', 'Family_ID', 'Family_Name', 'Category_Name'], how='outer')

#                temp_df = match_category(temp_df) #compare grainger and gws atts and create column to say whether they match

#    df = pd.concat([df, temp_df], axis=0, sort=False) #add prepped df for this gws node to the final df
#                df['Matching'] = df['Matching'].str.replace('no', 'Potential Match')

# drop all of the rows that are 'GWS only' in the Match column
#                df = df[df.Matching != 'GWS only']

#                df = df.drop(['alt_grainger_name', 'GWS_Node_ID', 'GWS_Category_ID', 'GWS_Category_Name', \
#                              'GWS_Node_Name', 'GWS_PIM_Path'], axis=1)

#else:
#   print('GWS Node {} EMPTY DATAFRAME'.format(node))

#    else:
#       df = grainger_df
#      print('No GWS SKUs for Grainger node {}'.format(grainger_node))

    df.reset_index(drop=True, inplace=True)
    df = choose_definition(df)

    return df, gamut_dict  #where gamut_att_temp is the list of all normalized values for gamut attributes
Пример #7
0
def grainger_process(grainger_df, grainger_sample, grainger_all,
                     gamut_dict: Dict, k):
    """create a list of grainger skus, run through through the gamut_skus query and pull gamut attribute data if skus are present
        concat both dataframs and join them on matching attribute names"""

    df = pd.DataFrame()

    cat_name = grainger_df['Category_Name'].unique()
    cat_name = list(cat_name)
    cat_name = cat_name.pop()
    print('cat name = {} {}'.format(k, cat_name))

    grainger_skus = grainger_df.drop_duplicates(
        subset='Grainger_SKU'
    )  #create list of unique grainger skus that feed into gamut query
    grainger_sku_count = len(grainger_skus)
    print('grainger sku count = ', grainger_sku_count)
    grainger_df = grainger_df.drop_duplicates(subset=[
        'Category_ID', 'Grainger_Attr_ID'
    ])  #group by Category_ID and attribute name and keep unique
    grainger_df['Grainger Blue Path'] = grainger_df['Segment_Name'] + ' > ' + grainger_df['Family_Name'] + \
                                                        ' > ' + grainger_df['Category_Name']

    grainger_df = grainger_df.drop(
        ['Grainger_SKU', 'Grainger_Attribute_Value'],
        axis=1)  #remove unneeded columns
    grainger_df = pd.merge(grainger_df,
                           grainger_sample,
                           on=['Grainger_Attribute_Name'])
    grainger_df = pd.merge(grainger_df, grainger_all, on=['Grainger_Attr_ID'])

    grainger_df['alt_grainger_name'] = process.process_att(
        grainger_df['Grainger_Attribute_Name'])  #prep att name for merge
    #grainger_df.to_csv ("F:/CGabriel/Grainger_Shorties/OUTPUT/grainger_test.csv")

    gamut_skus = q.gamut_skus(
        grainger_skus)  #get gamut sku list to determine pim nodes to pull
    if gamut_skus.empty == False:
        #create a dictionary of the unique gamut nodes that corresponde to the grainger node
        gamut_l3 = gamut_skus['Gamut_Node_ID'].unique(
        )  #create list of pim nodes to pull
        print('GAMUT L3s ', gamut_l3)

        for node in gamut_l3:
            if node in gamut_dict:
                gamut_df = gamut_dict[node]
            else:
                gamut_dict, gamut_df = gamut_process(node, gamut_dict, k)

            if gamut_df.empty == False:
                node_name = gamut_df['Gamut_Node_Name'].unique()
                node_name = list(node_name)
                node_name = node_name.pop()
                print('node name = {} {}'.format(node, node_name))
                #add correlating grainger and gamut data to opposite dataframes
                grainger_df = grainger_assign_nodes(grainger_df, gamut_df)
                gamut_df = gamut_assign_nodes(grainger_df, gamut_df)

                skus = gamut_skus[gamut_skus['Gamut_Node_ID'] == node]
                temp_df = pd.merge(grainger_df, gamut_df, left_on=['alt_grainger_name', 'Category_ID', 'Gamut_Node_ID', 'Gamut_Category_ID', \
                                                                   'Gamut_Category_Name', 'Gamut_Node_Name', 'Gamut_PIM_Path', 'Grainger Blue Path', \
                                                                   'Segment_ID', 'Segment_Name', 'Family_ID', 'Family_Name', 'Category_Name'],
                                                right_on=['alt_gamut_name', 'Category_ID', 'Gamut_Node_ID', 'Gamut_Category_ID', \
                                                          'Gamut_Category_Name', 'Gamut_Node_Name', 'Gamut_PIM_Path', 'Grainger Blue Path', \
                                                          'Segment_ID', 'Segment_Name', 'Family_ID', 'Family_Name', 'Category_Name'], how='outer')
                temp_df = match_category(
                    temp_df
                )  #compare grainger and gamut atts and create column to say whether they match
                temp_df['grainger_sku_count'] = grainger_sku_count
                temp_df['gamut_sku_count'] = len(skus)  #temp_skus['Gamut_SKU']
                temp_df[
                    'Grainger-Gamut Terminal Node Mapping'] = cat_name + ' -- ' + node_name
                temp_df['Gamut/Grainger SKU Counts'] = temp_df[
                    'gamut_sku_count'].map(
                        str) + ' / ' + temp_df['grainger_sku_count'].map(str)

                df = pd.concat(
                    [df, temp_df], axis=0, sort=False
                )  #add prepped df for this gamut node to the final df
                df['Matching'] = df['Matching'].str.replace(
                    'no', 'Potential Match')
            else:
                print('Gamut Node {} EMPTY DATAFRAME'.format(node))

    else:
        print('No Gamut SKUs for Grainger node {}'.format(k))

    return df, gamut_dict  #where gamut_att_temp is the list of all normalized values for gamut attributes
def grainger_process(grainger_df, grainger_sample, grainger_all, k):
    """create a list of grainger skus, run through through the gamut_skus query and pull gamut attribute data if skus are present
        concat both dataframs and join them on matching attribute names"""

    df = pd.DataFrame()
    gamut_sample_vals = pd.DataFrame()
    gamut_att_vals = pd.DataFrame()
    #  gamut_l3 = dict()

    grainger_skus = grainger_df.drop_duplicates(
        subset='Grainger_SKU'
    )  #create list of unique grainger skus that feed into gamut query
    grainger_sku_count = len(grainger_skus)
    print('Grainger SKU count = ', grainger_sku_count)

    grainger_df = grainger_df.drop_duplicates(subset=[
        'Category_ID', 'Grainger_Attr_ID'
    ])  #group by Category_ID and attribute name and keep unique
    grainger_df['Grainger Blue Path'] = grainger_df['Segment_Name'] + ' > ' + grainger_df['Family_Name'] + \
                                                        ' > ' + grainger_df['Category_Name']
    grainger_df = grainger_df.drop(
        ['Grainger_SKU', 'Grainger_Attribute_Value'],
        axis=1)  #remove unneeded columns
    grainger_df = pd.merge(grainger_df,
                           grainger_sample,
                           on=['Grainger_Attribute_Name'])
    grainger_df = pd.merge(grainger_df,
                           grainger_all,
                           on=['Grainger_Attribute_Name'])

    grainger_df['Grainger_Attribute_Name'] = process.process_att(
        grainger_df['Grainger_Attribute_Name'])  #prep att name for merge
    grainger_df.to_csv(
        "F:/CGabriel/Grainger_Shorties/OUTPUT/grainger_test.csv")

    gamut_skus = q.gamut_skus(
        grainger_skus)  #get gamut sku list to determine pim nodes to pull
    gamut_skus = gamut_skus.drop_duplicates(subset='Gamut_SKU')

    #   gamut_sku_counts = gamut_sku_list.groupby('Gamut_SKU')['Gamut_SKU']).count())
    if gamut_skus.empty == False:
        #create a dictionary of the unique gamut nodes that corresponde to the grainger node
        gamut_l3 = gamut_skus['Gamut_Node_ID'].unique(
        )  #create list of pim nodes to pull
        for node in gamut_l3:
            gamut_df = q.gamut_atts(
                node, 'tax.id'
            )  #tprod."categoryId"')  #get gamut attribute values for each gamut_l3 node
            gamut_att_vals, gamut_sample_vals = q.gamut_values(
                gamut_df
            )  #gamut_values exports a list of --all-- normalized values (temp_df) and sample_values
            gamut_sample_vals = gamut_sample_vals.rename(
                columns={'Normalized Value': 'Gamut Attribute Sample Values'})
            gamut_att_vals = gamut_att_vals.rename(
                columns={'Normalized Value': 'Gamut ALL Values'})

            gamut_df = gamut_df.drop_duplicates(
                subset='Gamut_Attr_ID'
            )  #gamut attribute IDs are unique, so no need to group by pim node before getting unique
            gamut_df = gamut_df.drop(
                [
                    'Gamut_SKU', 'Grainger_SKU', 'Original Value',
                    'Normalized Value'
                ],
                axis=1)  #normalized values are collected as sample_value

            grainger_df['Gamut_Node_ID'] = int(
                node)  #add correlating gamut node to grainger_df

            gamut_df = pd.merge(gamut_df,
                                gamut_sample_vals,
                                on=['Gamut_Attribute_Name'
                                    ])  #add t0p 5 normalized values to report
            gamut_df = pd.merge(gamut_df,
                                gamut_att_vals,
                                on=['Gamut_Attribute_Name'
                                    ])  #add t0p 5 normalized values to report
            gamut_df['Category_ID'] = int(
                k)  #add grainger Category_ID column for gamut attributes
            gamut_df['Gamut_Attribute_Name'] = process.process_att(
                gamut_df['Gamut_Attribute_Name'])  #prep att name for merge
            #create df based on names that match exactly
            gamut_df.to_csv(
                "F:/CGabriel/Grainger_Shorties/OUTPUT/gamut_test.csv")

            temp_df = pd.merge(grainger_df,
                               gamut_df,
                               left_on=[
                                   'Grainger_Attribute_Name', 'Category_ID',
                                   'Gamut_Node_ID'
                               ],
                               right_on=[
                                   'Gamut_Attribute_Name', 'Category_ID',
                                   'Gamut_Node_ID'
                               ],
                               how='outer')
            temp_df = match_category(
                temp_df
            )  #compare grainger and gamut atts and create column to say whether they match
            temp_df['Grainger-Gamut Terminal Node Mapping'] = temp_df[
                'Category_Name'] + ' -- ' + temp_df['Gamut_Node_Name']

            df = pd.concat(
                [df, temp_df],
                axis=0)  #add prepped df for this gamut node to the final df

    return df  #where gamut_att_temp is the list of all normalized values for gamut attributes