def exp(v1, v2): version1 = Metrics_Origin(v1, METRICS_DIR) version2 = Metrics_Origin(v2, METRICS_DIR) print(v1 + '-' + v2) alike_metrics = st.compare_two_versions(version1, version2) print(alike_metrics) predict(v1, v2, alike_metrics)
def exp(model, metrics_dir): metrics_dir = '/Users/{}/Dropbox/STUDY/{}/Derby/all'\ .format(ENV, model.dir_name) ex01 = Ex01(model, metrics_dir) ex01.METRICS_DIR = metrics_dir v1 = model.curr_version v2 = model.pre_version version1 = Metrics_Origin(v1, metrics_dir) version2 = Metrics_Origin(v2, metrics_dir) print(v1 + '-' + v2) alike_metrics = st.compare_two_versions(version1, version2) print(alike_metrics) ex01.predict(v1, v2, alike_metrics)
def predict(ver, predict_ver, alike_metrics): predictor_rep = PredictorRepository(predict_ver, ver) training_m = Metrics_Origin(ver, METRICS_DIR) evaluate_m = Metrics_Origin(predict_ver, METRICS_DIR) ens_analyzer = AUCAnalyzer(predict_ver, 'ENS', TARGET) for i in tqdm(range(ITER)): # NML MODEL predictor = predictor_rep.get_predictor('ENS', PRED_TYPE) if predictor is None: print(' predictor has not found, type: ' + PRED_TYPE) return # sm = RandomOverSampler(ratio='auto', random_state=random.randint(1,100)) # X_resampled, y_resampled = sm.fit_sample(training_m.product_df, training_m.fault) X_resampled, y_resampled = training_m.product_df.as_matrix( ), training_m.fault.as_matrix() nml_model = predictor.train_model(X_resampled, y_resampled) ev_data, dv_data = evaluate_m.get_not_modified_df() nml_value, _ = predictor.predict_ensemble_test_data( nml_model, ev_data, dv_data, None) # RFN MODEL sm = RandomOverSampler(ratio='auto', random_state=random.randint(1, 100)) X_resampled, y_resampled = sm.fit_sample(training_m.mrg_df, training_m.fault) rfn_model = predictor.train_model(X_resampled, y_resampled) ev_data, dv_data = evaluate_m.get_modified_df() mrg_value, _ = predictor.predict_ensemble_test_data( rfn_model, ev_data, dv_data, None) predictor.set_is_new_df(evaluate_m.isNew) predictor.set_is_modified_df(evaluate_m.isModified) report_df = predictor.export_report(predict_ver) report_df[REPORT_COLUMNS].to_csv('df.csv') if report_df is not None: ens_analyzer.set_report_df(report_df[REPORT_COLUMNS]) ens_analyzer.calculate() ens_analyzer.analyze_predict_result() # export report ens_df = ens_analyzer.calculate_average(ITER) ens_analyzer.export(target_sw=TARGET, df=ens_df, predictor_type=PRED_TYPE) ens_df = ens_analyzer.calculate_num_report_averge(ITER) ens_analyzer.export_count_report(target_sw=TARGET, df=ens_df, predictor_type=PRED_TYPE)
def predict(ver, predict_ver, alike_metrics): training_m = Metrics_Origin(ver, METRICS_DIR) evaluate_m = Metrics_Origin(predict_ver, METRICS_DIR) ens_analyzer = Analyzer(predict_ver, 'ENS') predictor_rep = PredictorRepository(predict_ver, ver) for i in tqdm(range(ITER)): # NML MODEL predictor = predictor_rep.get_predictor('ENS', PRED_TYPE) if predictor is None: print(' predictor has not found, type: ' + PRED_TYPE) return sm = RandomOverSampler(ratio='auto', random_state=random.randint(1, 100)) X_resampled, y_resampled = sm.fit_sample(training_m.product_df, training_m.fault) model = predictor.train_model(X_resampled, y_resampled) ev_data, dv_data = evaluate_m.get_not_modified_df() nml_value, _ = predictor.predict_ensemble_proba( model, ev_data, dv_data, None) # DST MODEL predictor2 = predictor_rep.get_predictor('ENS', PRED_TYPE2) if predictor2 is None: print(' predictor has not found, type: ' + PRED_TYPE2) return sm = RandomOverSampler(ratio='auto', random_state=random.randint(1, 100)) X_resampled, y_resampled = sm.fit_sample(training_m.mrg_df, training_m.fault) model = predictor2.train_model(X_resampled, y_resampled) ev_data, dv_data = evaluate_m.get_modified_df() mrg_value, _ = predictor2.predict_ensemble_proba( model, ev_data, dv_data, None) predictor2.set_is_new_df(evaluate_m.isNew) predictor2.set_is_modified_df(evaluate_m.isModified) report_df = predictor2.export_report(predict_ver) if report_df is not None: ens_analyzer.set_report_df(report_df[REPORT_COLUMNS]) ens_analyzer.calculate() # export report ens_df = ens_analyzer.calculate_average(ITER) predictor_type_name = "{0}{1}".format(PRED_TYPE, PRED_TYPE2) ens_analyzer.export(target_sw=TARGET, df=ens_df, predictor_type=PRED_TYPE) ens_analyzer.export_accum_df(target_sw=TARGET)
def main(): import csv f = open('sw_indecies.csv', 'w') writer = csv.writer(f, lineterminator='\n') model_dict = model_creator.get_model_dictionary() indecies_list = [] col_list = ['total modules', 'modified modules', 'bug modules', 'modified bug modules'] writer.writerow(col_list) for sw_name, models in model_dict.items(): for model in models: print(sw_name, model.final_version) metrics_dir = "/Users/{}/Dropbox/STUDY/Metrics/".format(ENV) version = model.final_version metrics = Metrics_Origin(version, metrics_dir, model) modified_df, modified_fault = metrics.get_modified_df() df = pd.concat([modified_df, modified_fault], axis=1) modified_df = df.rename(columns = {df.columns[-1]:'fault'}) mrg_df, fault = metrics.mrg_df, metrics.fault df = pd.concat([mrg_df, fault], axis=1) df = df.rename(columns = {df.columns[-1]:'fault'}) # print(df) li = ['{} {}'.format(sw_name, model.final_version)] # 総モジュール数 print('total modules, {}'.format(len(df))) li.append(len(df)) # 変更モジュール数 print('modified modules, {}'.format(len(modified_df))) li.append(len(modified_df)) # 総欠陥数 d = df[fault.apply(lambda x: x==1)] print('bug modules, {}'.format(len(d))) li.append(len(d)) # 変更が合ったうちの総欠陥数 d = modified_df[fault.apply(lambda x: x==1)] print('modified bug modules, {}'.format(len(d))) li.append(len(d)) writer.writerow(li)
def __create_boxplot_seaborn(g1, g2, save_name, title=None): # create graph just one version import seaborn as sns g1.columns = [['pre']] g2.columns = [['cre']] hige = pd.concat([g1, g2], axis=1) # print(hige) sns.stripplot(data=hige) # ax.set_xticklabels(['pre', 'cre']) plt.grid() plt.xlabel('VERSION') plt.ylabel('METRICS VALUE') plt.savefig(save_name) if __name__ == '__main__': """ メトリクスの分布を箱ひげ図で表すスクリプト """ v1 = '4.1.0' v2 = '4.2.0' METRICS_DIR = '/Users/' + ENV + '/Dropbox/STUDY/Metrics/Solr/all' version1 = Metrics_Origin(v1, METRICS_DIR) version2 = Metrics_Origin(v2, METRICS_DIR) metrics = 'pd2' _v1 = version1.mrg_df[[metrics]] _v2 = version2.mrg_df[[metrics]] save_name = 'compare-wisky-' + metrics + '.png' # _create_boxplot_diagram(_v1, _v2, save_name) __create_boxplot_seaborn(_v1, _v2, save_name)
def predict(ver, predict_ver, alike_metrics): predictor_rep = PredictorRepository(predict_ver, ver) # if TARGET == 'Derby': # # Apache-Derby # training_m = Metrics_Origin(ver, METRICS_DIR) # evaluate_m = Metrics_Origin(predict_ver, METRICS_DIR) # else: # # NO SERVUCE # return training_m = Metrics_Origin(ver, METRICS_DIR) evaluate_m = Metrics_Origin(predict_ver, METRICS_DIR) # nml_analyzer = AUCAnalyzer(predict_ver, 'NML', TARGET) # rfn_analyzer = AUCAnalyzer(predict_ver, 'RFN', TARGET) # itg_analyzer = AUCAnalyzer(predict_ver, 'ITG', TARGET) nml_analyzer = Analyzer(predict_ver, 'NML') rfn_analyzer = Analyzer(predict_ver, 'RFN') itg_analyzer = Analyzer(predict_ver, 'ITG') for i in tqdm(range(ITER)): # NML MODEL predictor = predictor_rep.get_predictor('NML', PRED_TYPE) if predictor is None: print(' predictor has not found, type: ' + PRED_TYPE) return sm = RandomOverSampler(ratio='auto', random_state=random.randint(1, 100)) X_resampled, y_resampled = sm.fit_sample(training_m.product_df, training_m.fault) model = predictor.train_model(X_resampled, y_resampled) nml_value, importance = predictor.predict_proba( model, evaluate_m.product_df, evaluate_m.fault, TARGET + "-ex1nml.csv") predictor.set_is_new_df(evaluate_m.isNew) predictor.set_is_modified_df(evaluate_m.isModified) report_df = predictor.export_report(predict_ver) if report_df is not None: nml_analyzer.set_report_df(report_df[REPORT_COLUMNS]) nml_analyzer.calculate() # RFN MODEL predictor = predictor_rep.get_predictor('RFN', PRED_TYPE) sm = RandomOverSampler(ratio='auto', random_state=random.randint(1, 100)) X_resampled, y_resampled = sm.fit_sample(training_m.mrg_df, training_m.fault) model = predictor.train_model(X_resampled, y_resampled) rfn_value, importance = predictor.predict_proba( model, evaluate_m.mrg_df, evaluate_m.fault, TARGET + "-ex1rfn.csv") predictor.set_is_new_df(evaluate_m.isNew) predictor.set_is_modified_df(evaluate_m.isModified) report_df = predictor.export_report(predict_ver) if report_df is not None: rfn_analyzer.set_report_df(report_df[REPORT_COLUMNS]) rfn_analyzer.calculate() # INTELLIGENCE MODEL predictor = predictor_rep.get_predictor('ITG', PRED_TYPE) sm = RandomOverSampler(ratio='auto', random_state=random.randint(1, 100)) alike_df = training_m.get_specific_df(alike_metrics) X_resampled, y_resampled = sm.fit_sample(alike_df, training_m.fault) model = predictor.train_model(X_resampled, y_resampled) alike_df = evaluate_m.get_specific_df(alike_metrics) rfn_value, importance = predictor.predict_proba( model, alike_df, evaluate_m.fault, TARGET + "-ex1itg.csv") predictor.set_is_new_df(evaluate_m.isNew) predictor.set_is_modified_df(evaluate_m.isModified) report_df = predictor.export_report(predict_ver) if report_df is not None: itg_analyzer.set_report_df(report_df[REPORT_COLUMNS]) itg_analyzer.calculate() # export report nml_df = nml_analyzer.calculate_average(ITER) rfn_df = rfn_analyzer.calculate_average(ITER) itg_df = itg_analyzer.calculate_average(ITER) df = pd.concat([nml_df, rfn_df, itg_df], ignore_index=True) nml_analyzer.export_accum_df(target_sw=TARGET) rfn_analyzer.export_accum_df(target_sw=TARGET) itg_analyzer.export_accum_df(target_sw=TARGET) nml_analyzer.export(target_sw=TARGET, df=df, predictor_type=PRED_TYPE) # どのanalyzerクラスでも良い