def _params_producer() -> tuple: """ Generates parameters for imbalance training. Returns: ---------- Bugs description, classes codes, SMOTE instance and model name. """ for metric in filtered_classes.keys(): if metric == "Priority" or metric == "Time to Resolve": filtered_df = df[df[metric].isin(filtered_classes[metric])] smt = SMOTE( ratio="minority", random_state=0, kind="borderline1", n_jobs=4, ) smt.k_neighbors = get_k_neighbors(df[metric.lower() + "_codes"]) classes_codes = filtered_df[metric.lower() + "_codes"] model_name = metric.split("_")[0] yield filtered_df.Description_tr, classes_codes, smt, model_name else: for class_ in filtered_classes[metric]: df_index = ("Resolution_" + class_ if metric == "Resolution" else class_) smt = SMOTE( ratio="minority", random_state=0, kind="borderline1", n_jobs=4, ) smt.k_neighbors = get_k_neighbors(df[df_index]) yield df.Description_tr, df[df_index], smt, class_
def train_model(model_path, dataframe_path, areas_of_testing, resolution): if not exists(model_path): makedirs(model_path) df = read_pickle(dataframe_path) if not check_bugs_count(df, 100): raise ValueError( 'Oops! Too little data to analyze. Model can\'t be trained.') filtered_classes, dataframes = prepare_df(df, areas_of_testing, resolution) for _class in filtered_classes: check_classes_count(filtered_classes[_class]) # model/ folder cleanup remove_models() # updated predictions parameters writing write_classes(filtered_classes) from main.config_processor import load_config_to_session load_config_to_session( str(Path(__file__).parents[1]) + '/models/selected/' + 'predictions_parameters.ini') smt = SMOTE(ratio='minority', random_state=0, kind='borderline1', n_jobs=4) svm_imb = SVC(gamma=2, C=1, probability=True, class_weight='balanced') chi2 = feature_selection.chi2 # areas of testing models training for area in filtered_classes['areas_of_testing_classes']: smt.k_neighbors = get_k_neighbors( df[areas_of_testing[area]['series_name']]) training_imbalance(df['Description_tr'], df[areas_of_testing[area]['series_name']], session['tfidf'], smt, chi2, 50, svm_imb, model_path + secure_filename(area)) # priority models training smt.k_neighbors = get_k_neighbors( dataframes['priority_df']['Priority_codes']) training_imbalance(dataframes['priority_df']['Description_tr'], dataframes['priority_df']['Priority_codes'], session['tfidf'], smt, chi2, 50, svm_imb, model_path + secure_filename('priority')) # ttr models training smt.k_neighbors = get_k_neighbors( dataframes['ttr_df']['coded_ttr_intervals']) training_imbalance(dataframes['ttr_df']['Description_tr'], dataframes['ttr_df']['coded_ttr_intervals'], session['tfidf'], smt, chi2, 50, svm_imb, model_path + secure_filename('ttr')) # resolution models training for resol in filtered_classes['resolutions_classes']: smt.k_neighbors = get_k_neighbors( dataframes['resolution_df']['Resolution_' + resol]) training_imbalance(dataframes['resolution_df']['Description_tr'], dataframes['resolution_df']['Resolution_' + resol], session['tfidf'], smt, chi2, 50, svm_imb, model_path + secure_filename(resol))