Exemplo n.º 1
0
def get_data(dsn, db, options_file_in, query):

    df = sql_retrieve_df_specified_query(dsn=dsn,
                                         db=db,
                                         options_file=options_file_in,
                                         query=query)

    return df
Exemplo n.º 2
0
def get_dataset_sql(others_dict, options_file_in, query):
    df = level_1_a_data_acquisition.sql_retrieve_df_specified_query(options_file_in.DSN_MLG_PRD, options_file_in.sql_info['database_final'], options_file_in, query)
    df['Part_Description'] = df['Part_Description'].astype('str')
    df.drop('Date', axis=1, inplace=True)

    df['Product_Group_DW'] = df['Product_Group_DW'].map(others_dict).fillna(df['Product_Group_DW'])
    df['Classification'] = df['Classification'].map(others_dict).fillna(df['Classification'])

    return df
Exemplo n.º 3
0
def data_acquisition():
    performance_info_append(time.time(), 'Section_A_Start')
    log_record('Início Secção A...', options_file.project_id)

    current_date, _ = time_tags()

    dfs = []

    for query in [
            options_file.sales_query, options_file.stock_query,
            options_file.product_db_query, options_file.customer_group_query,
            options_file.dealers_query
    ]:
        df = sql_retrieve_df_specified_query(
            options_file.DSN_SRV3_PRD,
            options_file.sql_info['database_source'], options_file, query)
        # df.to_csv(file_name + '.csv')
        dfs.append(df)

    df_sales = dfs[0]
    df_stock = dfs[1]
    df_pdb = dfs[2]
    df_customers = dfs[3]
    df_dealers = dfs[4]

    df_pdb.drop_duplicates(
        subset='VehicleData_Code', inplace=True
    )  # There are repeated VehicleData_Code inside this union between BI_DTR and BI_DW_History

    df_sales['NLR_Code'] = pd.to_numeric(df_sales['NLR_Code'], errors='ignore')

    # Adding missing information regarding customers
    missing_customer_info_treatment(df_sales)

    # Addition of customer information
    df_customers_and_dealers = df_join_function(
        df_dealers,
        df_customers[['Customer_Group_Code',
                      'Customer_Group_Desc']].set_index('Customer_Group_Code'),
        on='Customer_Group_Code',
        how='left')
    df_sales = df_join_function(
        df_sales,
        df_customers_and_dealers[[
            'SLR_Account_CHS_Key', 'NDB_VATGroup_Desc', 'VAT_Number_Display',
            'NDB_Contract_Dealer_Desc', 'NDB_VHE_PerformGroup_Desc',
            'NDB_VHE_Team_Desc', 'Customer_Display', 'Customer_Group_Code',
            'Customer_Group_Desc', 'NDB_Dealer_Code'
        ]].set_index('SLR_Account_CHS_Key'),
        on='SLR_Account_CHS_Key',
        how='left')

    log_record('Fim Secção A.', options_file.project_id)
    performance_info_append(time.time(), 'Section_A_End')
    return df_sales, df_stock, df_pdb, df_customers, df_dealers
Exemplo n.º 4
0
def get_data_product_group_sql(others_dict, options_file_in):
    df = sql_retrieve_df_specified_query(options_file_in.DSN_SRV3_PRD, options_file_in.sql_info['database_BI_AFR'], options_file_in, options_file_in.product_group_complete_app_query)
    df['Product_Group_Code'] = df['Product_Group_Code'].astype('str')

    df = df[df['Product_Group_Code'] != '77']
    df.loc[df['Product_Group_Code'] == '75', 'Product_Group_Code'] = '75/77'
    df.loc[df['PT_Product_Group_Desc'] == 'Lazer', 'PT_Product_Group_Desc'] = 'Lazer/Marroquinaria'

    for key in others_dict.keys():
        df.loc[df['Product_Group_Code'] == str(key), 'PT_Product_Group_Desc'] = others_dict[key]

    return df
Exemplo n.º 5
0
def get_suggestions_dict(options_file_in):
    saved_suggestions_dict = {}
    saved_suggestions_df = level_1_a_data_acquisition.sql_retrieve_df_specified_query(
        options_file_in.DSN_MLG_PRD,
        options_file_in.sql_info['database_final'],
        options_file_in,
        query=saved_solutions_pairs_query)

    saved_suggestions_df_grouped = saved_suggestions_df.groupby(
        'Part_Ref_Group_Desc')
    for key, group in saved_suggestions_df_grouped:
        saved_suggestions_dict[key] = list(group.values)

    return saved_suggestions_dict, saved_suggestions_df
Exemplo n.º 6
0
def data_acquisition():
    performance_info_append(time.time(), 'Section_A_Start')
    log_record('Início Secção A...', project_id)

    df = sql_retrieve_df_specified_query(options_file.DSN_MLG_PRD,
                                         options_file.sql_info['database_mlg'],
                                         options_file,
                                         options_file.get_train_dataset_query)

    # df.to_csv('dbs/dataset_train_20200817_v6.csv', index=False)

    log_record('Fim Secção A.', project_id)
    performance_info_append(time.time(), 'Section_A_End')
    return df
Exemplo n.º 7
0
def brand_codes_retrieval():
    platforms = ['BI_AFR', 'BI_CRP', 'BI_IBE', 'BI_CA']
    query = ' UNION ALL '.join([options_file.brand_codes_per_platform.format(x) for x in platforms])
    brand_codes_df = sql_retrieve_df_specified_query(options_file.DSN_SRV3_PRD, 'BI_AFR', options_file, query)
    brand_codes_df['code_len'] = brand_codes_df['Franchise_Code_DMS'].str.len()
    brand_codes_df.sort_values(by='code_len', ascending=False, inplace=True)

    # brand_codes_platform = brand_codes_df[brand_codes_df['Client_ID'] == int(x['Client_ID'])]
    brand_codes_regex_dict = {}
    for client_id in brand_codes_df['Client_ID'].unique():
        filtered_df = brand_codes_df[brand_codes_df['Client_ID'] == client_id]
        unique_brands = filtered_df['Franchise_Code_DMS'].unique()
        regex_rules = 'r^(' + '|'.join(unique_brands) + ')'
        brand_codes_regex_dict[str(client_id)] = regex_rules

    return brand_codes_regex_dict
Exemplo n.º 8
0
def get_data_product_group_sql(others_dict, options_file_in):
    df = level_1_a_data_acquisition.sql_retrieve_df_specified_query(options_file_in.DSN_SRV3_PRD, options_file_in.sql_info['database_BI_AFR'], options_file_in, options_file_in.product_group_app_query)
    df['Product_Group_Code'] = df['Product_Group_Code'].astype('str')

    df = df[df['Product_Group_Code'] != '77']
    df.loc[df['Product_Group_Code'] == '75', 'Product_Group_Code'] = '75/77'
    df.loc[df['PT_Product_Group_Desc'] == 'Lazer', 'PT_Product_Group_Desc'] = 'Lazer/Marroquinaria'

    for key in others_dict.keys():
        df.loc[df['Product_Group_Code'] == str(key), 'PT_Product_Group_Desc'] = others_dict[key]

    df['Product_Group_Merge'] = df['PT_Product_Group_Level_1_Desc'] + ', ' + df['PT_Product_Group_Level_2_Desc'] + ', ' + df['PT_Product_Group_Desc']
    df.sort_values(by='Product_Group_Merge', inplace=True)
    # df['PT_Product_Group_Desc'] = df['PT_Product_Group_Desc'].map(others_dict).fillna(df['PT_Product_Group_Desc'])

    return df
Exemplo n.º 9
0
def get_suggestions_dict(options_file_in):
    saved_suggestions_dict = {}
    saved_suggestions_df = level_1_a_data_acquisition.sql_retrieve_df_specified_query(
        options_file_in.DSN_MLG_PRD,
        options_file_in.sql_info['database_final'],
        options_file_in,
        query=saved_solutions_pairs_query)
    saved_suggestions_df = level_1_b_data_processing.column_rename(
        saved_suggestions_df, ['Model_Code', 'Sales_Place_Fase2_Level_1'], [
            column_translate['Model_Code'],
            column_translate['Sales_Place_Fase2_Level_1']
        ])

    saved_suggestions_df_grouped = saved_suggestions_df[[
        column_translate['Sales_Place_Fase2_Level_1'],
        column_translate['Model_Code']
    ]].groupby(column_translate['Sales_Place_Fase2_Level_1'])
    for key, group in saved_suggestions_df_grouped:
        saved_suggestions_dict[key] = list(
            group[column_translate['Model_Code']].values)

    return saved_suggestions_dict, saved_suggestions_df
Exemplo n.º 10
0
def run_single_query(dsn, database, options_file_in, query):
    query_result = level_1_a_data_acquisition.sql_retrieve_df_specified_query(
        dsn, database, options_file_in, query)

    return query_result