示例#1
0
def exp(v1, v2):
    version1 = Metrics_Origin(v1, METRICS_DIR)
    version2 = Metrics_Origin(v2, METRICS_DIR)
    print(v1 + '-' + v2)
    alike_metrics = st.compare_two_versions(version1, version2)
    print(alike_metrics)
    predict(v1, v2, alike_metrics)
示例#2
0
def exp(model, metrics_dir):
    metrics_dir = '/Users/{}/Dropbox/STUDY/{}/Derby/all'\
    .format(ENV, model.dir_name)
    ex01 = Ex01(model, metrics_dir)

    ex01.METRICS_DIR = metrics_dir

    v1 = model.curr_version
    v2 = model.pre_version
    version1 = Metrics_Origin(v1, metrics_dir)
    version2 = Metrics_Origin(v2, metrics_dir)
    print(v1 + '-' + v2)
    alike_metrics = st.compare_two_versions(version1, version2)
    print(alike_metrics)
    ex01.predict(v1, v2, alike_metrics)
示例#3
0
def predict(ver, predict_ver, alike_metrics):
    predictor_rep = PredictorRepository(predict_ver, ver)

    training_m = Metrics_Origin(ver, METRICS_DIR)
    evaluate_m = Metrics_Origin(predict_ver, METRICS_DIR)

    ens_analyzer = AUCAnalyzer(predict_ver, 'ENS', TARGET)

    for i in tqdm(range(ITER)):
        # NML MODEL
        predictor = predictor_rep.get_predictor('ENS', PRED_TYPE)
        if predictor is None:
            print(' predictor has not found, type: ' + PRED_TYPE)
            return
        # sm = RandomOverSampler(ratio='auto', random_state=random.randint(1,100))
        # X_resampled, y_resampled = sm.fit_sample(training_m.product_df, training_m.fault)
        X_resampled, y_resampled = training_m.product_df.as_matrix(
        ), training_m.fault.as_matrix()
        nml_model = predictor.train_model(X_resampled, y_resampled)
        ev_data, dv_data = evaluate_m.get_not_modified_df()
        nml_value, _ = predictor.predict_ensemble_test_data(
            nml_model, ev_data, dv_data, None)

        # RFN MODEL
        sm = RandomOverSampler(ratio='auto',
                               random_state=random.randint(1, 100))
        X_resampled, y_resampled = sm.fit_sample(training_m.mrg_df,
                                                 training_m.fault)
        rfn_model = predictor.train_model(X_resampled, y_resampled)
        ev_data, dv_data = evaluate_m.get_modified_df()
        mrg_value, _ = predictor.predict_ensemble_test_data(
            rfn_model, ev_data, dv_data, None)
        predictor.set_is_new_df(evaluate_m.isNew)
        predictor.set_is_modified_df(evaluate_m.isModified)
        report_df = predictor.export_report(predict_ver)
        report_df[REPORT_COLUMNS].to_csv('df.csv')
        if report_df is not None:
            ens_analyzer.set_report_df(report_df[REPORT_COLUMNS])
            ens_analyzer.calculate()
            ens_analyzer.analyze_predict_result()

    # export report
    ens_df = ens_analyzer.calculate_average(ITER)
    ens_analyzer.export(target_sw=TARGET, df=ens_df, predictor_type=PRED_TYPE)
    ens_df = ens_analyzer.calculate_num_report_averge(ITER)
    ens_analyzer.export_count_report(target_sw=TARGET,
                                     df=ens_df,
                                     predictor_type=PRED_TYPE)
示例#4
0
def predict(ver, predict_ver, alike_metrics):

    training_m = Metrics_Origin(ver, METRICS_DIR)
    evaluate_m = Metrics_Origin(predict_ver, METRICS_DIR)
    ens_analyzer = Analyzer(predict_ver, 'ENS')

    predictor_rep = PredictorRepository(predict_ver, ver)

    for i in tqdm(range(ITER)):
        # NML MODEL
        predictor = predictor_rep.get_predictor('ENS', PRED_TYPE)
        if predictor is None:
            print(' predictor has not found, type: ' + PRED_TYPE)
            return
        sm = RandomOverSampler(ratio='auto',
                               random_state=random.randint(1, 100))
        X_resampled, y_resampled = sm.fit_sample(training_m.product_df,
                                                 training_m.fault)
        model = predictor.train_model(X_resampled, y_resampled)
        ev_data, dv_data = evaluate_m.get_not_modified_df()
        nml_value, _ = predictor.predict_ensemble_proba(
            model, ev_data, dv_data, None)

        # DST MODEL
        predictor2 = predictor_rep.get_predictor('ENS', PRED_TYPE2)
        if predictor2 is None:
            print(' predictor has not found, type: ' + PRED_TYPE2)
            return
        sm = RandomOverSampler(ratio='auto',
                               random_state=random.randint(1, 100))
        X_resampled, y_resampled = sm.fit_sample(training_m.mrg_df,
                                                 training_m.fault)
        model = predictor2.train_model(X_resampled, y_resampled)
        ev_data, dv_data = evaluate_m.get_modified_df()
        mrg_value, _ = predictor2.predict_ensemble_proba(
            model, ev_data, dv_data, None)
        predictor2.set_is_new_df(evaluate_m.isNew)
        predictor2.set_is_modified_df(evaluate_m.isModified)
        report_df = predictor2.export_report(predict_ver)
        if report_df is not None:
            ens_analyzer.set_report_df(report_df[REPORT_COLUMNS])
            ens_analyzer.calculate()

    # export report
    ens_df = ens_analyzer.calculate_average(ITER)
    predictor_type_name = "{0}{1}".format(PRED_TYPE, PRED_TYPE2)
    ens_analyzer.export(target_sw=TARGET, df=ens_df, predictor_type=PRED_TYPE)
    ens_analyzer.export_accum_df(target_sw=TARGET)
示例#5
0
def main():
    import csv
    f = open('sw_indecies.csv', 'w')
    writer = csv.writer(f, lineterminator='\n')

    model_dict = model_creator.get_model_dictionary()
    indecies_list = []
    col_list = ['total modules', 'modified modules', 'bug modules',
                'modified bug modules']
    writer.writerow(col_list)
    for sw_name, models in model_dict.items():
        for model in models:
            print(sw_name, model.final_version)
            metrics_dir = "/Users/{}/Dropbox/STUDY/Metrics/".format(ENV)
            version = model.final_version
            metrics = Metrics_Origin(version, metrics_dir, model)
            modified_df, modified_fault = metrics.get_modified_df()
            df = pd.concat([modified_df, modified_fault], axis=1)
            modified_df = df.rename(columns = {df.columns[-1]:'fault'})

            mrg_df, fault = metrics.mrg_df, metrics.fault
            df = pd.concat([mrg_df, fault], axis=1)
            df = df.rename(columns = {df.columns[-1]:'fault'})
            # print(df)
            li = ['{} {}'.format(sw_name, model.final_version)]
            # 総モジュール数
            print('total modules, {}'.format(len(df)))
            li.append(len(df))
            # 変更モジュール数
            print('modified modules, {}'.format(len(modified_df)))
            li.append(len(modified_df))

            #  総欠陥数
            d = df[fault.apply(lambda x: x==1)]
            print('bug modules, {}'.format(len(d)))
            li.append(len(d))

            #  変更が合ったうちの総欠陥数
            d = modified_df[fault.apply(lambda x: x==1)]
            print('modified bug modules, {}'.format(len(d)))
            li.append(len(d))
            writer.writerow(li)
示例#6
0
def __create_boxplot_seaborn(g1, g2, save_name, title=None):
    # create graph just one version
    import seaborn as sns
    g1.columns = [['pre']]
    g2.columns = [['cre']]
    hige = pd.concat([g1, g2], axis=1)
    # print(hige)
    sns.stripplot(data=hige)
    # ax.set_xticklabels(['pre', 'cre'])
    plt.grid()
    plt.xlabel('VERSION')
    plt.ylabel('METRICS VALUE')
    plt.savefig(save_name)


if __name__ == '__main__':
    """
    メトリクスの分布を箱ひげ図で表すスクリプト
    """
    v1 = '4.1.0'
    v2 = '4.2.0'
    METRICS_DIR = '/Users/' + ENV + '/Dropbox/STUDY/Metrics/Solr/all'
    version1 = Metrics_Origin(v1, METRICS_DIR)
    version2 = Metrics_Origin(v2, METRICS_DIR)
    metrics = 'pd2'
    _v1 = version1.mrg_df[[metrics]]
    _v2 = version2.mrg_df[[metrics]]
    save_name = 'compare-wisky-' + metrics + '.png'
    # _create_boxplot_diagram(_v1, _v2, save_name)
    __create_boxplot_seaborn(_v1, _v2, save_name)
示例#7
0
def predict(ver, predict_ver, alike_metrics):
    predictor_rep = PredictorRepository(predict_ver, ver)
    # if TARGET == 'Derby':
    #     #  Apache-Derby
    #     training_m = Metrics_Origin(ver, METRICS_DIR)
    #     evaluate_m = Metrics_Origin(predict_ver, METRICS_DIR)
    # else:
    #     # NO SERVUCE
    #     return

    training_m = Metrics_Origin(ver, METRICS_DIR)
    evaluate_m = Metrics_Origin(predict_ver, METRICS_DIR)

    # nml_analyzer = AUCAnalyzer(predict_ver, 'NML', TARGET)
    # rfn_analyzer = AUCAnalyzer(predict_ver, 'RFN', TARGET)
    # itg_analyzer = AUCAnalyzer(predict_ver, 'ITG', TARGET)
    nml_analyzer = Analyzer(predict_ver, 'NML')
    rfn_analyzer = Analyzer(predict_ver, 'RFN')
    itg_analyzer = Analyzer(predict_ver, 'ITG')

    for i in tqdm(range(ITER)):
        # NML MODEL
        predictor = predictor_rep.get_predictor('NML', PRED_TYPE)
        if predictor is None:
            print(' predictor has not found, type: ' + PRED_TYPE)
            return
        sm = RandomOverSampler(ratio='auto',
                               random_state=random.randint(1, 100))
        X_resampled, y_resampled = sm.fit_sample(training_m.product_df,
                                                 training_m.fault)
        model = predictor.train_model(X_resampled, y_resampled)
        nml_value, importance = predictor.predict_proba(
            model, evaluate_m.product_df, evaluate_m.fault,
            TARGET + "-ex1nml.csv")
        predictor.set_is_new_df(evaluate_m.isNew)
        predictor.set_is_modified_df(evaluate_m.isModified)
        report_df = predictor.export_report(predict_ver)
        if report_df is not None:
            nml_analyzer.set_report_df(report_df[REPORT_COLUMNS])
            nml_analyzer.calculate()

        # RFN MODEL
        predictor = predictor_rep.get_predictor('RFN', PRED_TYPE)
        sm = RandomOverSampler(ratio='auto',
                               random_state=random.randint(1, 100))
        X_resampled, y_resampled = sm.fit_sample(training_m.mrg_df,
                                                 training_m.fault)
        model = predictor.train_model(X_resampled, y_resampled)
        rfn_value, importance = predictor.predict_proba(
            model, evaluate_m.mrg_df, evaluate_m.fault, TARGET + "-ex1rfn.csv")
        predictor.set_is_new_df(evaluate_m.isNew)
        predictor.set_is_modified_df(evaluate_m.isModified)
        report_df = predictor.export_report(predict_ver)
        if report_df is not None:
            rfn_analyzer.set_report_df(report_df[REPORT_COLUMNS])
            rfn_analyzer.calculate()

        # INTELLIGENCE MODEL
        predictor = predictor_rep.get_predictor('ITG', PRED_TYPE)
        sm = RandomOverSampler(ratio='auto',
                               random_state=random.randint(1, 100))
        alike_df = training_m.get_specific_df(alike_metrics)
        X_resampled, y_resampled = sm.fit_sample(alike_df, training_m.fault)
        model = predictor.train_model(X_resampled, y_resampled)
        alike_df = evaluate_m.get_specific_df(alike_metrics)
        rfn_value, importance = predictor.predict_proba(
            model, alike_df, evaluate_m.fault, TARGET + "-ex1itg.csv")
        predictor.set_is_new_df(evaluate_m.isNew)
        predictor.set_is_modified_df(evaluate_m.isModified)
        report_df = predictor.export_report(predict_ver)
        if report_df is not None:
            itg_analyzer.set_report_df(report_df[REPORT_COLUMNS])
            itg_analyzer.calculate()

    # export report
    nml_df = nml_analyzer.calculate_average(ITER)
    rfn_df = rfn_analyzer.calculate_average(ITER)
    itg_df = itg_analyzer.calculate_average(ITER)
    df = pd.concat([nml_df, rfn_df, itg_df], ignore_index=True)

    nml_analyzer.export_accum_df(target_sw=TARGET)
    rfn_analyzer.export_accum_df(target_sw=TARGET)
    itg_analyzer.export_accum_df(target_sw=TARGET)

    nml_analyzer.export(target_sw=TARGET, df=df,
                        predictor_type=PRED_TYPE)  # どのanalyzerクラスでも良い