Пример #1
0
def model_evaluation(df, models, best_models, running_times, datasets,
                     number_of_features, options_file, oversample_check,
                     proj_id):
    performance_info_append(time.time(), 'Section_D_Start')
    log_record('Início Secção D...', proj_id)

    results_training, results_test, predictions = performance_evaluation_classification(
        models, best_models, running_times, datasets, options_file, proj_id
    )  # Creates a df with the performance of each model evaluated in various metrics, explained in the provided pdf
    plot_roc_curve(best_models, models, datasets,
                   'roc_curve_temp_' + str(number_of_features))

    df_model_dict = multiprocess_model_evaluation(df, models, datasets,
                                                  best_models, predictions,
                                                  configuration_parameters,
                                                  oversample_check, proj_id)
    model_choice_message, best_model_name, _, section_e_upload_flag = model_choice(
        options_file.DSN_MLG_PRD, options_file, results_test)

    if not section_e_upload_flag:
        best_model = None
    else:
        best_model = df_model_dict[best_model_name]
        feature_contribution(best_model, configuration_parameters, 'Modelo',
                             options_file, proj_id)

    log_record('Fim Secção D.', proj_id)

    performance_info_append(time.time(), 'Section_D_End')
    return model_choice_message, best_model, df.shape[0]
Пример #2
0
def main():
    log_record('Projeto: Sugestão Encomenda Baviera - Viaturas', project_id)

    ### Options:
    input_file = 'dbs/' + 'full_data_bmw.csv'

    target_variable = [
        'new_score'
    ]  # possible targets = ['stock_class1', 'stock_class2', 'margem_class1', 'score_class', 'new_score']
    oversample_check = 0
    query_filters = {'NLR_CODE': '701'}
    ###

    number_of_features = 'all'
    df = data_acquisition(input_file, query_filters, local=0)
    control_prints(df, 'after A')
    df = data_processing(df, target_variable, oversample_check,
                         number_of_features)
    control_prints(df, 'after B')

    model_choice_message, best_model, vehicle_count = data_grouping_by_locals_temp(
        df, configuration_parameters,
        level_2_optionals_baviera_options.project_id)

    deployment(best_model,
               level_2_optionals_baviera_options.sql_info['database'],
               level_2_optionals_baviera_options.sql_info['final_table'])

    performance_info(level_2_optionals_baviera_options.project_id,
                     level_2_optionals_baviera_options, model_choice_message,
                     vehicle_count)

    log_record(
        'Conclusão com sucesso - Projeto {}.\n'.format(
            project_dict[project_id]), project_id)
Пример #3
0
def deployment(df, db, view):
    performance_info_append(time.time(), 'Section_E_Start')
    log_record('Início Secção E...', project_id)

    if df is not None:
        df['NLR_Code'] = level_2_optionals_cdsu_options.nlr_code
        # df = column_rename(df, list(level_2_optionals_cdsu_options.column_sql_renaming.keys()), list(level_2_optionals_cdsu_options.column_sql_renaming.values()))
        df = df.rename(
            columns=level_2_optionals_cdsu_options.column_sql_renaming)
        control_prints(df, 'before deployment, after renaming', head=1)
        sql_delete(
            level_2_optionals_cdsu_options.DSN_MLG_PRD, db, view,
            level_2_optionals_cdsu_options,
            {'NLR_Code': '{}'.format(level_2_optionals_cdsu_options.nlr_code)})
        sql_inject(df,
                   level_2_optionals_cdsu_options.DSN_MLG_PRD,
                   db,
                   view,
                   level_2_optionals_cdsu_options,
                   list(level_2_optionals_cdsu_options.
                        column_checkpoint_sql_renaming.values()),
                   check_date=1)

    log_record('Fim Secção E.', project_id)
    performance_info_append(time.time(), 'Section_E_End')
    return
Пример #4
0
def data_acquisition(input_files, query_filters, local=0):
    performance_info_append(time.time(), 'Section_A_Start')
    df_facts, df_facts_duration, df_clients, df_pbi_categories, df_manual_classifications, keywords_df = pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
    log_record('Início Secção A...', options_file.project_id)

    if local:
        df_facts = read_csv(input_files[0], index_col=0, parse_dates=options_file.date_columns, infer_datetime_format=True)
        df_facts_duration = read_csv(input_files[1], index_col=0)
        df_clients = read_csv(input_files[2], index_col=0)
        df_pbi_categories = read_csv(input_files[3], index_col=0)
        df_manual_classifications = read_csv(input_files[4], index_col=0)
        keywords_df = read_csv(input_files[5], index_col=0)
    elif not local:
        df_facts = sql_retrieve_df(options_file.DSN_SRV3_PRD, options_file.sql_info['database_source'],  options_file.sql_info['initial_table_facts'], options_file,  options_file.sql_fact_columns, query_filters=query_filters[0], parse_dates=options_file.date_columns)
        df_facts_duration = sql_retrieve_df(options_file.DSN_SRV3_PRD, options_file.sql_info['database_source'], options_file.sql_info['initial_table_facts_durations'], options_file, options_file.sql_facts_durations_columns, query_filters=query_filters[1])
        df_clients = sql_retrieve_df(options_file.DSN_SRV3_PRD, options_file.sql_info['database_source'], options_file.sql_info['initial_table_clients'], options_file, options_file.sql_dim_contacts_columns)
        df_pbi_categories = sql_retrieve_df(options_file.DSN_SRV3_PRD, options_file.sql_info['database_source'], options_file.sql_info['initial_table_pbi_categories'], options_file, options_file.sql_pbi_categories_columns, query_filters=query_filters[1])
        df_manual_classifications = sql_retrieve_df(options_file.DSN_SRV3_PRD, options_file.sql_info['database_source'], options_file.sql_info['aux_table'], options_file)
        keywords_df = sql_retrieve_df(options_file.DSN_MLG_PRD, options_file.sql_info['database_final'], options_file.sql_info['keywords_table_str'], options_file, columns=['Keywords_PT', 'Keywords_ES']).dropna()

        save_csv([df_facts, df_facts_duration, df_clients, df_pbi_categories, df_manual_classifications, keywords_df], ['dbs/db_facts_initial', 'dbs/db_facts_duration', 'dbs/db_clients_initial', 'dbs/db_pbi_categories_initial', 'dbs/db_manual_classification', 'dbs/db_keywords_df'])

    keyword_dict, ranking_dict = sql_mapping_retrieval(options_file.DSN_MLG_PRD, options_file.sql_info['database_final'], options_file.sql_info['keywords_table'], 'Keyword_Group', options_file, multiple_columns=1)
    keyword_dict = keyword_dict[0]

    log_record('Fim Secção A...', options_file.project_id)
    performance_info_append(time.time(), 'Section_A_End')

    return df_facts, df_facts_duration, df_clients, df_pbi_categories, df_manual_classifications, keywords_df, keyword_dict, ranking_dict
Пример #5
0
def multiprocess_evaluation(args):
    df, model_name, datasets, best_models, predictions, configuration_parameters, oversample_check, project_id = args
    level_0_performance_report.log_record(
        'A avaliar o modelo {} @ {}...'.format(
            level_0_performance_report.dict_models_name_conversion[model_name]
            [0], time.strftime("%H:%M:%S @ %d/%m/%y")), project_id)

    train_x_copy, test_x_copy = datasets['train_x'].copy(
        deep=True), datasets['test_x'].copy(deep=True)

    probabilities = probability_evaluation(model_name, best_models,
                                           train_x_copy, test_x_copy)
    df_model = add_new_columns_to_df(df, probabilities,
                                     predictions[model_name], datasets,
                                     configuration_parameters,
                                     oversample_check, project_id)
    df_model = df_decimal_places_rounding(df_model, {
        'proba_0': 2,
        'proba_1': 2
    })

    level_1_e_deployment.save_csv(
        [df_model],
        [base_path + '/output/' + 'db_final_classification_' + model_name])

    level_0_performance_report.log_record(
        'Modelo {} terminou @ {}'.format(
            level_0_performance_report.dict_models_name_conversion[model_name]
            [0], time.strftime("%H:%M:%S @ %d/%m/%y")), project_id)
    return model_name, df_model
Пример #6
0
def flow_step_2(df):
    log_record('Step 2 started.', options_file.project_id)

    df = df.drop_duplicates(subset=['Part_Ref', 'Part_Desc', 'Product_Group_DW', 'Client_Id', 'Average_Cost', 'PVP_1', 'PLR_Account', 'Part_Desc_PT'])

    log_record('Step 2 ended.', options_file.project_id)
    return df
Пример #7
0
def sql_retrieve_df_specified_query(dsn, db, options_file, query):
    start = time.time()

    try:
        if OS_PLATFORM == 'WINDOWS':
            cnxn = pyodbc.connect('DSN={};UID={};PWD={};DATABASE={}'.format(
                dsn, options_file.UID, options_file.PWD, db),
                                  searchescape='\\')
        elif OS_PLATFORM == 'LINUX':
            cnxn = pyodbc.connect(
                'Driver=ODBC Driver 17 for SQL Server;Server=tcp:' + str(dsn) +
                ';UID=' + str(options_file.UID) + ';PWD=' +
                str(options_file.PWD) + ';DATABASE=' + str(db),
                searchescape='\\')

        df = pd.read_sql(query, cnxn)
        cnxn.close()

        print('Elapsed time: {:.2f} seconds.'.format(time.time() - start))
        return df

    except (pyodbc.ProgrammingError, pyodbc.OperationalError) as error:
        level_0_performance_report.log_record(
            'Erro ao obter os dados do DW - {}'.format(error),
            options_file.project_id,
            flag=2)
        return
Пример #8
0
def model_training(df):
    performance_info_append(time.time(), 'Section_E_Start')
    log_record('Início Secção D...', options_file.project_id)

    non_classified_df_scored = pa_servicedesk_models_training.main()
    df = update_labels(df, non_classified_df_scored, 'Request_Num', 'Label')

    log_record('Fim Secção D.', options_file.project_id)
    return df
Пример #9
0
def sql_retrieve_df(dsn,
                    db,
                    view,
                    options_file,
                    columns='*',
                    query_filters=0,
                    column_renaming=0,
                    **kwargs):
    start = time.time()
    query, query_filters_string_list = None, []

    if columns != '*':
        columns = str(columns)[1:-1].replace('\'', '')

    try:
        if OS_PLATFORM == 'WINDOWS':
            cnxn = pyodbc.connect('DSN={};UID={};PWD={};DATABASE={}'.format(
                dsn, options_file.UID, options_file.PWD, db),
                                  searchescape='\\')
        elif OS_PLATFORM == 'LINUX':
            cnxn = pyodbc.connect(
                'Driver=ODBC Driver 17 for SQL Server;Server=tcp:' + str(dsn) +
                ';UID=' + str(options_file.UID) + ';PWD=' +
                str(options_file.PWD) + ';DATABASE=' + str(db),
                searchescape='\\')

        if not query_filters:
            query = 'SELECT ' + columns + ' FROM ' + view + ' WITH (NOLOCK)'
        elif type(query_filters) == dict:
            for key in query_filters:
                if type(query_filters[key]) == list:
                    testing_string = '\'%s\'' % "\', \'".join(
                        [str(x) for x in query_filters[key]])
                    query_filters_string_list.append(key + ' in (' +
                                                     testing_string + ')')
                else:
                    query_filters_string_list.append(key + ' = \'%s\'' %
                                                     str(query_filters[key]))
            query = 'SELECT ' + columns + ' FROM ' + view + ' WITH (NOLOCK) WHERE ' + ' and '.join(
                query_filters_string_list)
        df = pd.read_sql(query, cnxn, **kwargs)
        if column_renaming:
            level_1_b_data_processing.column_rename(
                df, list(options_file.sql_to_code_renaming.keys()),
                list(options_file.sql_to_code_renaming.values()))

        cnxn.close()

        print('Elapsed time: {:.2f} seconds.'.format(time.time() - start))
        return df

    except (pyodbc.ProgrammingError, pyodbc.OperationalError) as error:
        level_0_performance_report.log_record(
            'Erro ao obter os dados do DW - {}'.format(error),
            options_file.project_id,
            flag=2)
        return
Пример #10
0
def data_modelling(df, df_top_words, df_manual_classification, keyword_dict, ranking_dict):
    performance_info_append(time.time(), 'Section_C_Start')
    log_record('Início Secção C...', options_file.project_id)

    df = new_request_type(df, df_top_words, df_manual_classification, keyword_dict, ranking_dict, options_file)

    log_record('Fim Secção C.', options_file.project_id)
    performance_info_append(time.time(), 'Section_C_End')

    return df
Пример #11
0
def data_acquisition():
    performance_info_append(time.time(), 'Section_A_Start')
    log_record('Início Secção A...', options_file.project_id)

    current_date, _ = time_tags()

    dfs = []

    for query in [
            options_file.sales_query, options_file.stock_query,
            options_file.product_db_query, options_file.customer_group_query,
            options_file.dealers_query
    ]:
        df = sql_retrieve_df_specified_query(
            options_file.DSN_SRV3_PRD,
            options_file.sql_info['database_source'], options_file, query)
        # df.to_csv(file_name + '.csv')
        dfs.append(df)

    df_sales = dfs[0]
    df_stock = dfs[1]
    df_pdb = dfs[2]
    df_customers = dfs[3]
    df_dealers = dfs[4]

    df_pdb.drop_duplicates(
        subset='VehicleData_Code', inplace=True
    )  # There are repeated VehicleData_Code inside this union between BI_DTR and BI_DW_History

    df_sales['NLR_Code'] = pd.to_numeric(df_sales['NLR_Code'], errors='ignore')

    # Adding missing information regarding customers
    missing_customer_info_treatment(df_sales)

    # Addition of customer information
    df_customers_and_dealers = df_join_function(
        df_dealers,
        df_customers[['Customer_Group_Code',
                      'Customer_Group_Desc']].set_index('Customer_Group_Code'),
        on='Customer_Group_Code',
        how='left')
    df_sales = df_join_function(
        df_sales,
        df_customers_and_dealers[[
            'SLR_Account_CHS_Key', 'NDB_VATGroup_Desc', 'VAT_Number_Display',
            'NDB_Contract_Dealer_Desc', 'NDB_VHE_PerformGroup_Desc',
            'NDB_VHE_Team_Desc', 'Customer_Display', 'Customer_Group_Code',
            'Customer_Group_Desc', 'NDB_Dealer_Code'
        ]].set_index('SLR_Account_CHS_Key'),
        on='SLR_Account_CHS_Key',
        how='left')

    log_record('Fim Secção A.', options_file.project_id)
    performance_info_append(time.time(), 'Section_A_End')
    return df_sales, df_stock, df_pdb, df_customers, df_dealers
Пример #12
0
def main():
    log_record('Projeto: {}'.format(project_dict[options_file.project_id]),
               options_file.project_id)

    df = data_acquisition()
    # df = pd.read_csv('dbs/dataset_train_20200817_v5.csv')

    df_prob_cost = data_modelling(df)

    deployment(df_prob_cost, options_file.DATA_PROB_PATH_ALL_COST)
    return
Пример #13
0
def deployment(df):
    performance_info_append(time.time(), 'Section_E_Start')
    log_record('Início Secção E...', options_file.project_id)
    df = df.astype(object).where(pd.notnull(df), None)

    sql_inject(df, options_file.DSN_SRV3_PRD, options_file.sql_info['database_source'], options_file.sql_info['final_table'], options_file, ['Request_Num', 'StemmedDescription', 'Description', 'Language', 'Open_Date', 'Label', 'Classification_Flag'], truncate=1)

    log_record('Fim Secção E.', options_file.project_id)
    performance_info_append(time.time(), 'Section_E_End')

    return
Пример #14
0
def flow_step_4_5(df, keywords_df):
    log_record('Step 4.5 started', options_file.project_id)
    keywords_col_1 = list(set(keywords_df.loc[keywords_df['Column'] == 'Part_Desc', 'Word']))
    keywords_col_2 = list(set(keywords_df.loc[keywords_df['Column'] == 'Part_Desc_PT', 'Word']))

    # Detects keywords data
    df = detect_keywords(df, 'Part_Desc_concat', keywords_col_1)
    df = detect_keywords(df, 'Part_Desc_PT_concat', keywords_col_2)

    log_record('Step 4.5 ended.', options_file.project_id)
    return df
Пример #15
0
def data_acquisition(input_file, query_filters, local=0):
    performance_info_append(time.time(), 'Section_A_Start')
    log_record('Início Secção A...', project_id)

    if local:
        try:
            df = read_csv(input_file,
                          encoding='utf-8',
                          parse_dates=['Purchase_Date', 'Sell_Date'],
                          usecols=level_2_optionals_baviera_options.
                          sql_to_code_renaming.keys(),
                          infer_datetime_format=True,
                          decimal='.')
            column_rename(
                df,
                list(level_2_optionals_baviera_options.sql_to_code_renaming.
                     keys()),
                list(level_2_optionals_baviera_options.sql_to_code_renaming.
                     values()))
        except UnicodeDecodeError:
            df = read_csv(input_file,
                          encoding='latin-1',
                          parse_dates=['Purchase_Date', 'Sell_Date'],
                          usecols=level_2_optionals_baviera_options.
                          sql_to_code_renaming.keys(),
                          infer_datetime_format=True,
                          decimal='.')
            column_rename(
                df,
                list(level_2_optionals_baviera_options.sql_to_code_renaming.
                     keys()),
                list(level_2_optionals_baviera_options.sql_to_code_renaming.
                     values()))
    else:
        df = sql_retrieve_df(
            level_2_optionals_baviera_options.DSN_MLG_PRD,
            level_2_optionals_baviera_options.sql_info['database'],
            level_2_optionals_baviera_options.sql_info['initial_table'],
            level_2_optionals_baviera_options,
            list(
                level_2_optionals_baviera_options.sql_to_code_renaming.keys()),
            query_filters,
            column_renaming=1,
            parse_dates=['Purchase_Date', 'Sell_Date'])
        project_units_count_checkup(df,
                                    'Nº Stock',
                                    level_2_optionals_baviera_options,
                                    sql_check=0)

    log_record('Fim Secção A.', project_id)
    performance_info_append(time.time(), 'Section_A_End')

    return df
Пример #16
0
def nan_detection(df, original_col, new_col):
    new_col_rows_with_nan = df[df[new_col].isnull()]

    if new_col_rows_with_nan.shape[0]:
        log_record(
            'Aviso no Merge de Colunas  - NaNs detetados em: {}, não encontrada a parametrização para {} nos veículos(s) com VHE_Number(s): {}'
            .format(new_col, new_col_rows_with_nan[original_col].unique(),
                    new_col_rows_with_nan['Nº Stock'].unique()),
            project_id,
            flag=1)

    return
Пример #17
0
def data_processing(df_sales, pse_code, options_info):
    performance_info_append(time.time(), 'Section_B_Start')
    log_record('Início Secção B...', options_file.project_id)
    start_treatment = time.time()

    df_sales = apv_dataset_treatment(df_sales, pse_code, options_info.urgent_purchases_flags, options_info.project_id)

    print('Elapsed time: {:.2f} seconds.'.format(time.time() - start_treatment))

    log_record('Fim Secção B.', options_file.project_id)
    performance_info_append(time.time(), 'Section_B_End')
    return df_sales
Пример #18
0
def data_acquisition(options_info, pse_group, last_processed_date, current_date):
    performance_info_append(time.time(), 'Section_A_Start')
    log_record('Início Secção A...', options_file.project_id)

    start = time.time()

    df_sales, df_history, df_product_group_dw = dw_data_retrieval(pse_group, current_date, options_info, last_processed_date)

    print('Elapsed time: {:.2f} seconds.'.format(time.time() - start))

    log_record('Fim Secção A.', options_file.project_id)
    performance_info_append(time.time(), 'Section_A_End')
    return df_sales, df_product_group_dw, df_history
Пример #19
0
def data_acquisition():
    performance_info_append(time.time(), 'Section_A_Start')
    log_record('Início Secção A...', project_id)

    df = sql_retrieve_df_specified_query(options_file.DSN_MLG_PRD,
                                         options_file.sql_info['database_mlg'],
                                         options_file,
                                         options_file.get_train_dataset_query)

    # df.to_csv('dbs/dataset_train_20200817_v6.csv', index=False)

    log_record('Fim Secção A.', project_id)
    performance_info_append(time.time(), 'Section_A_End')
    return df
Пример #20
0
def data_modelling(datasets, datasets_non_ohe, models):
    performance_info_append(time.time(), 'Section_C_Start')
    log_record('Início Secção C...', options_file.project_id)

    best_models, running_times = regression_model_training(
        models, datasets['train_x'], datasets_non_ohe['train_x'],
        datasets['train_y'], datasets_non_ohe['train_y'],
        options_file.regression_models_standard, options_file.k,
        options_file.gridsearch_score, options_file.project_id)
    save_model(best_models, models, options_file.project_id)

    log_record('Fim Secção C.', options_file.project_id)
    performance_info_append(time.time(), 'Section_C_End')
    return best_models, running_times
Пример #21
0
def flow_step_5(df, manual_classified_files_loc):
    log_record('Step 5 started.', options_file.project_id)

    # Step 0 - Merge all manual/application classifications
    manual_classified_families = get_dgo_manual_classifications(manual_classified_files_loc)
    pse_fact_pa_parts_classification_refs = get_fact_pa_classifications()
    all_classifications = pd.concat([manual_classified_families, pse_fact_pa_parts_classification_refs])

    # Step 1
    df = classification_corrections_start(df, all_classifications)

    df = product_group_dw_complete_replaces(df.copy())

    log_record('Step 5 ended.', options_file.project_id)
    return df, all_classifications
Пример #22
0
def main():
    log_record('Projeto: {}'.format(project_dict[options_file.project_id]), options_file.project_id)
    input_file_facts, input_file_durations, input_file_clients, input_file_pbi_categories, input_file_manual_classification, input_keywords_df = 'dbs/db_facts_initial.csv', 'dbs/db_facts_duration.csv', 'dbs/db_clients_initial.csv', 'dbs/db_pbi_categories_initial.csv', 'dbs/db_manual_classification.csv', 'dbs/db_keywords_df.csv'
    query_filters = [{'Cost_Centre': '6825', 'Record_Type': ['1', '2']}, {'Cost_Centre': '6825'}]

    df_facts, df_facts_duration, df_clients, df_pbi_categories, df_manual_classifications, keywords_df, keyword_dict, ranking_dict = data_acquisition([input_file_facts, input_file_durations, input_file_clients, input_file_pbi_categories, input_file_manual_classification, input_keywords_df], query_filters, local=0)
    df, df_top_words = data_processing(df_facts, df_facts_duration, df_clients, df_pbi_categories, keywords_df)
    df = data_modelling(df, df_top_words, df_manual_classifications, keyword_dict, ranking_dict)

    df['Classification_Flag'] = np.where(df['Label'] == 'Não Definido', 1, 0)
    deployment(df)  # Deploys all according to current classification
    df = model_training(df)
    deployment(df)

    performance_info(options_file.project_id, options_file, model_choice_message='N/A')
    log_record('Conclusão com sucesso - Projeto: {}'.format(project_dict[options_file.project_id]), options_file.project_id)
Пример #23
0
def deployment(df, db, view):
    performance_info_append(time.time(), 'Section_E_Start')
    log_record('Início Secção E...', project_id)

    for col in list(df):
        df[col] = df[col].astype(str)

    df['NLR_Code'] = level_2_optionals_baviera_options.nlr_code

    if df is not None:
        df = column_rename(
            df,
            list(level_2_optionals_baviera_options.column_sql_renaming.keys()),
            list(level_2_optionals_baviera_options.column_sql_renaming.values(
            )))
        if model_training_check:
            sql_delete(
                level_2_optionals_baviera_options.DSN_MLG_PRD, db, view,
                level_2_optionals_baviera_options, {
                    'NLR_Code':
                    '{}'.format(level_2_optionals_baviera_options.nlr_code)
                })
            sql_inject(df,
                       level_2_optionals_baviera_options.DSN_MLG_PRD,
                       db,
                       view,
                       level_2_optionals_baviera_options,
                       level_2_optionals_baviera_options.columns_for_sql,
                       check_date=1)
        else:
            sql_delete(
                level_2_optionals_baviera_options.DSN_MLG_PRD, db, view,
                level_2_optionals_baviera_options, {
                    'NLR_Code':
                    '{}'.format(level_2_optionals_baviera_options.nlr_code)
                })
            sql_inject(df,
                       level_2_optionals_baviera_options.DSN_MLG_PRD,
                       db,
                       view,
                       level_2_optionals_baviera_options,
                       level_2_optionals_baviera_options.columns_for_sql_temp,
                       check_date=1)

    log_record('Fim Secção E.', project_id)
    performance_info_append(time.time(), 'Section_E_End')
    return
Пример #24
0
def main():
    log_record('Projeto: {}'.format(project_dict[options_file.project_id]), options_file.project_id)

    for pse_group in options_file.pse_code_groups:
        current_date, _ = time_tags(format_date='%Y%m%d')
        # current_date = '20210218'

        last_processed_date, second_to_last_processed_date, preprocessed_data_exists_flag = apv_last_stock_calculation(options_file.min_date, current_date, pse_group[0], options_file.project_id)  # Considering all PSE Groups were processed in the same day
        print('Processing data from {} to {}'.format(last_processed_date, current_date))
        print('Deleting data for the day of {}'.format(second_to_last_processed_date))

        df_sales_group, df_product_group_dw, df_history_group = data_acquisition(options_file, pse_group, last_processed_date, current_date)

        for pse_code in pse_group:
            print('Starting PSE_Code {}'.format(pse_code))
            # log_record('Começou PSE = {}'.format(pse_code), options_file.project_id)
            #
            # df_sales = df_sales_group.loc[df_sales_group['PSE_Code'] == pse_code, :]
            try:
                df_history = df_history_group.loc[df_history_group['SO_Code'] == pse_code, :]
            except FileNotFoundError:
                print('df_history not found for pse_code {} and date {}'.format(pse_code, current_date))

            try:
                df_sales_cleaned = pd.read_csv('dbs/df_sales_cleaned_' + str(pse_code) + '_' + str(current_date) + '.csv', parse_dates=['Movement_Date'])
                print('File found for pse_code {} and date {}'.format(pse_code, current_date))
            except FileNotFoundError:
                print('df_sales not found for pse_code {} and date {}'.format(pse_code, current_date))
            #
            # if not df_sales.shape[0]:
            #     log_record('Sem vendas para o PSE Code = {}'.format(pse_code), options_file.project_id, flag=1)
            #     flag_testing = 1
            #
            # if not df_history.shape[0]:
            #     log_record('Sem histórico para o PSE Code = {}'.format(pse_code), options_file.project_id, flag=1)
            #     flag_testing = 1
            #
            # if flag_testing:
            #     continue
            #
            # df_sales_cleaned = data_processing(df_sales, pse_code, options_file)

            try:
                df_solver, df_part_ref_ta = data_modelling(pse_code, df_sales_cleaned, df_history, last_processed_date, current_date, preprocessed_data_exists_flag, options_file.project_id)
            except ValueError:
                log_record('Não foram encontradas peças para o PSE_Code {}.'.format(pse_code), options_file.project_id, flag=1)
                continue
            deployment(df_solver, df_part_ref_ta, pse_code)

            log_record('Terminou PSE = {}'.format(pse_code), options_file.project_id)

    # performance_info(options_file.project_id, options_file, model_choice_message='N/A')

    # delete_temp_files(options_file.pse_codes_groups, second_to_last_processed_date)

    log_record('Conclusão com sucesso - Projeto: {} .\n'.format(project_dict[options_file.project_id]), options_file.project_id)
Пример #25
0
def sql_truncate(dsn, options_file, database, view, query=None):
    level_0_performance_report.log_record(
        'A truncar view {} da DB {}.'.format(view, database),
        options_file.project_id)

    cnxn = odbc_connection_creation(dsn, options_file.UID, options_file.PWD,
                                    database)

    if query is None:
        query = "TRUNCATE TABLE " + view

    cursor = cnxn.cursor()
    cursor.execute(query)

    cnxn.commit()
    cursor.close()
    cnxn.close()
Пример #26
0
def deployment(df, db, view):
    performance_info_append(time.time(), 'Section_E_Start')
    log_record('Início Secção E...', options_file.project_id)

    if df is not None:
        sel_df = df.loc[:, options_file.sql_columns_vhe_fact_bi].copy()

        sel_df['NLR_Posting_Date'] = sel_df['NLR_Posting_Date'].astype(
            object).where(sel_df['NLR_Posting_Date'].notnull(), None)
        sel_df['SLR_Document_Date_CHS'] = sel_df[
            'SLR_Document_Date_CHS'].astype(object).where(
                sel_df['SLR_Document_Date_CHS'].notnull(), None)
        sel_df['SLR_Document_Date_RGN'] = sel_df[
            'SLR_Document_Date_RGN'].astype(object).where(
                sel_df['SLR_Document_Date_RGN'].notnull(), None)
        sel_df['Ship_Arrival_Date'] = sel_df['Ship_Arrival_Date'].astype(
            object).where(sel_df['Ship_Arrival_Date'].notnull(), None)
        sel_df['Registration_Request_Date'] = sel_df[
            'Registration_Request_Date'].astype(object).where(
                sel_df['Registration_Request_Date'].notnull(), None)
        sel_df['Registration_Date'] = sel_df['Registration_Date'].astype(
            object).where(sel_df['Registration_Date'].notnull(), None)
        sel_df['PDB_Start_Order_Date'] = sel_df['PDB_Start_Order_Date'].astype(
            object).where(sel_df['PDB_Start_Order_Date'].notnull(), None)
        sel_df['PDB_End_Order_Date'] = sel_df['PDB_End_Order_Date'].astype(
            object).where(sel_df['PDB_End_Order_Date'].notnull(), None)
        sel_df['Fixed_Margin_II'] = sel_df['Fixed_Margin_II'].round(2)
        sel_df = sel_df.where(sel_df.notnull(), None)
        sel_df.rename(columns={
            'prev_sales_check': 'Previous_Sales_Flag',
            'number_prev_sales': 'Previous_Sales_Count'
        },
                      inplace=True)

        sql_inject(sel_df,
                   options_file.DSN_SRV3_PRD,
                   db,
                   view,
                   options_file,
                   list(sel_df),
                   truncate=1,
                   check_date=1)

    log_record('Fim Secção E.', options_file.project_id)
    performance_info_append(time.time(), 'Section_E_End')