Exemplo n.º 1
0
def ex12_short(mdl_typ):
    if mdl_typ == 'nml':
        prms = ['TCchar', 'CountLineCode', 'CountLineComment' , 'N', 'NN', 'NF','SumCyclomatic']
    elif mdl_typ == 'rfn':
        prms = ['TCchar', 'CountLineCode', 'CountLineComment', 'N', 'NN', 'NF','SumCyclomatic','chum','relatedChum','delectChum','ncdChum']
    elif mdl_typ == 'chrn':
        prms = ['chum','relatedChum','delectChum','ncdChum']

    THRESHOLD = 0.5
    vers = version.get_version_short_list()
    ev_values = []
    prm_note = []

    for curr_ver in vers :
        next_ver = version.get_next_version(curr_ver)
        print curr_ver
        print next_ver
        if curr_ver != '4.5.0':
            curr_df = ex02.create_df_used_all_predate_nml(curr_ver)

            # get paramaters that has lowest aic value
            best_prms = ex11.get_best_paramaters(prms, curr_df, curr_ver, mdl_typ)
            # dependent value
            dv_data = curr_df['fault']
            # explanatory value
            ev_data = curr_df[list(best_prms)]

            # create mdl
            logit = sm.Logit(dv_data, ev_data)
            result = logit.fit()

            # get coefficients
            params = result.params.values
            coef = pd.Series(params, index=best_prms)

            # create model used evaluatopn_ex
            next_df = mmm.create_df(next_ver)
            # explanatory value
            ev_data = next_df[list(best_prms)]

            # operate evaluation_ex
            logit_odds = ev_data.dot(coef)
            evals = logit.cdf(logit_odds)

            ev_value = [curr_ver,]
            ev_value.extend( ex01.evaluate_ex(next_df, evals) )
            ev_values.append(ev_value)

            prm_note.append(curr_ver)
            prm_note.append(best_prms)

    df = pd.DataFrame(ev_values)
    df.columns = ['version','nm','np','nf','nc','f_value']
    df = df.sort_index(ascending=False)
    df.to_csv( './../result/ex12/record_ex12_' + mdl_typ + '_' + str(THRESHOLD) + '.csv', index=False, cols=None)
    df = pd.DataFrame(prm_note)
    df.to_csv( './../result/ex12/prm_note_'+mdl_typ+'.csv', index=False, cols=None)
Exemplo n.º 2
0
def ex1_short(mdl_typ, threshold):
    if mdl_typ == 'nml':
        prms = ['TCchar', 'CountLineCode', 'CountLineComment' , 'N', 'NN', 'NF','SumCyclomatic']
    elif mdl_typ == 'rfn':
        prms = ['TCchar', 'CountLineCode', 'CountLineComment', 'N', 'NN', 'NF','SumCyclomatic','chum','relatedChum','delectChum','ncdChum']
    elif mdl_typ == 'chrn':
        prms = ['chum','relatedChum','delectChum','ncdChum']

    THRESHOLD = threshold
    vers = version.get_version_short_list()
    ev_values = []
    print 'operation starts'
    for curr_ver in vers :
        next_ver = version.get_next_version(curr_ver)
        print curr_ver
        print next_ver
        if curr_ver != '4.5.0':
            curr_df = mmm.create_df(curr_ver)

            # dependent value
            dv_data = curr_df['fault']
            # explanatory value
            ev_data = curr_df[prms]

            # create mdl
            logit = sm.Logit(dv_data, ev_data)
            result = logit.fit()

            # get coefficients
            params = result.params.values
            coef = pd.Series(params, index=prms)
            print coef

            # create model used evaluatopn_ex
            next_df = mmm.create_df(next_ver)
            ev_data = next_df[prms]
            print ev_data

            # operate evaluation_ex
            logit_odds = ev_data.dot(coef)
            # print logit_odds
            evals = logit.cdf(logit_odds)
            print evals

            ev_value = [curr_ver,]

            # ev_value.extend( evaluate_ex_report(next_df, evals, mdl_typ,curr_ver) )
            ev_value.extend( evaluate_ex(next_df, evals) )
            ev_values.append(ev_value)


    df = pd.DataFrame(ev_values)
    df.columns = ['version','nm','np','nf','nc','f_value']
    df = df.sort_index(ascending=False)
    df.to_csv( './../result/ex1/record_ex1_' + mdl_typ + '_' + str(THRESHOLD) + '.csv', index=False, cols=None)
Exemplo n.º 3
0
def evaluate_merged(exNum):
    THRESHOLD = 0.5
    vers = version.get_version_short_list()
    ev_values = []
    print 'operation starts'
    for curr_ver in vers :
        next_ver = version.get_next_version(curr_ver)
        print curr_ver
        print next_ver
        if curr_ver != '4.5.0':

            ev_value = [curr_ver,]
            ev_value.extend( evaluate_ex_merge(exNum,curr_ver) )
            ev_values.append(ev_value)


    df = pd.DataFrame(ev_values)
    df.columns = ['version','nm','np','nf','nc','f_value']
    df = df.sort_index(ascending=False)
    df.to_csv( './../result/ex'+str(exNum)+'/Mrecord_ex'+str(exNum)+'_' + str(THRESHOLD) + '.csv', index=False, cols=None)
Exemplo n.º 4
0
def ex12_short(mdl_typ):
    if mdl_typ == 'nml':
        prms = [
            'TCchar', 'CountLineCode', 'CountLineComment', 'N', 'NN', 'NF',
            'SumCyclomatic'
        ]
    elif mdl_typ == 'rfn':
        prms = [
            'TCchar', 'CountLineCode', 'CountLineComment', 'N', 'NN', 'NF',
            'SumCyclomatic', 'chum', 'relatedChum', 'delectChum', 'ncdChum'
        ]
    elif mdl_typ == 'chrn':
        prms = ['chum', 'relatedChum', 'delectChum', 'ncdChum']

    THRESHOLD = 0.5
    vers = version.get_version_short_list()
    ev_values = []
    prm_note = []

    for curr_ver in vers:
        next_ver = version.get_next_version(curr_ver)
        print curr_ver
        print next_ver
        if curr_ver != '4.5.0':
            curr_df = ex02.create_df_used_all_predate_nml(curr_ver)

            # get paramaters that has lowest aic value
            best_prms = ex11.get_best_paramaters(prms, curr_df, curr_ver,
                                                 mdl_typ)
            # dependent value
            dv_data = curr_df['fault']
            # explanatory value
            ev_data = curr_df[list(best_prms)]

            # create mdl
            logit = sm.Logit(dv_data, ev_data)
            result = logit.fit()

            # get coefficients
            params = result.params.values
            coef = pd.Series(params, index=best_prms)

            # create model used evaluatopn_ex
            next_df = mmm.create_df(next_ver)
            # explanatory value
            ev_data = next_df[list(best_prms)]

            # operate evaluation_ex
            logit_odds = ev_data.dot(coef)
            evals = logit.cdf(logit_odds)

            ev_value = [
                curr_ver,
            ]
            ev_value.extend(ex01.evaluate_ex(next_df, evals))
            ev_values.append(ev_value)

            prm_note.append(curr_ver)
            prm_note.append(best_prms)

    df = pd.DataFrame(ev_values)
    df.columns = ['version', 'nm', 'np', 'nf', 'nc', 'f_value']
    df = df.sort_index(ascending=False)
    df.to_csv('./../result/ex12/record_ex12_' + mdl_typ + '_' +
              str(THRESHOLD) + '.csv',
              index=False,
              cols=None)
    df = pd.DataFrame(prm_note)
    df.to_csv('./../result/ex12/prm_note_' + mdl_typ + '.csv',
              index=False,
              cols=None)
def ex4():

    global record
    global THRESHOLD
    list = version.get_version_list()
    records = []
    for curr_ver in list:
        if curr_ver != '10.10.2.0':
            next_version = version.get_next_version(curr_ver)
            print curr_ver
            # print next_version

            record = []
            record.append(curr_ver)
            df = create_df_ref('./../data/metrics/METRICS_V' + curr_ver +
                               '.csv')
            # normalize
            df = nrmlize(df)

            # dependent value
            dv_data = df['fault']
            # explanatory value
            ev_data = df[[
                'TCchar', 'LOC', 'N', 'NN', 'NF', 'chum', 'relatedChum',
                'delectChum', 'ncdChum', 'intercept'
            ]]

            # create mdl
            logit = sm.Logit(dv_data, ev_data)
            try:
                result = logit.fit()
            except:
                print curr_ver + 'is singular.'
                # result = logit.fit(method='bfgs')
            # print result.summary()

            # get coefficients
            params = result.params.values
            coef = pd.Series(params,
                             index=[
                                 'TCchar', 'LOC', 'N', 'NN', 'NF', 'chum',
                                 'relatedChum', 'delectChum', 'ncdChum',
                                 'intercept'
                             ])

            # create model used evaluatopn_ex
            df = create_df_ref('./../data/metrics/METRICS_V' + next_version +
                               '.csv')
            # normalize
            df = nrmlize(df)
            ev_data = df[[
                'TCchar', 'LOC', 'N', 'NN', 'NF', 'chum', 'relatedChum',
                'delectChum', 'ncdChum', 'intercept'
            ]]

            # operate evaluation_ex
            logit_odds = ev_data.dot(coef)
            print '---logit_odds---'
            print logit_odds
            evals = logit.cdf(logit_odds)
            print '---evals---'
            print evals

            # operate evaluation_ex
            df = evaluate_ex(df, evals)
            pd.DataFrame(df).to_csv('./../data/result/ex4_ref/result_' +
                                    curr_ver + 'ex4_ref.csv',
                                    index=False)
            records.append(record)

    pd.DataFrame(records).to_csv('./../data/result/record_ex4_ref_' +
                                 str(THRESHOLD) + '.csv',
                                 index=False,
                                 cols=None)
Exemplo n.º 6
0
def ex3(threshold):
    # global record
    global THRESHOLD
    global nrm_prms
    global ref_prms

    THRESHOLD = threshold
    vers = version.get_version_short_list()
    ev_values_nml = []
    ev_values_ref = []
    print 'operation starts'
    for curr_ver in vers:
        next_ver = version.get_next_version(curr_ver)
        print curr_ver
        print next_ver
        if curr_ver != '4.5.0':
            curr_df = mm.create_df(curr_ver)

            # dependent value
            dv_data = curr_df['fault']
            # explanatory value
            ev_data = curr_df[nrm_prms]
            # normalize
            ev_data = ev_data.div(ev_data.sum(1), axis=0)

            # create mdl
            logit = sm.Logit(dv_data, ev_data)
            result = logit.fit()

            # get coefficients
            params = result.params.values
            coef = pd.Series(params, index=nrm_prms)
            print coef

            # create model used evaluatopn_ex
            next_df = mm.create_df(next_ver)
            # explanatory value
            ev_data = next_df[nrm_prms]
            # normalize
            ev_data = ev_data.div(ev_data.sum(1), axis=0)

            # operate evaluation_ex
            logit_odds = ev_data.dot(coef)
            evals = logit.cdf(logit_odds)

            ev_value = [
                curr_ver,
            ]
            ev_value.extend(ex01.evaluate_ex(next_df, evals))
            ev_values_nml.append(ev_value)

            # explanatory value
            ev_data = curr_df[ref_prms]
            # normalize
            ev_data = ev_data.div(ev_data.sum(1), axis=0)

            # create mdl
            logit = sm.Logit(dv_data, ev_data)
            try:
                result = logit.fit()
            except:
                print 'Singular matrix occured'

            # get coefficients
            params = result.params.values
            coef = pd.Series(params, index=ref_prms)

            # create model used evaluatopn_ex
            ev_data = next_df[ref_prms]
            # normalize
            ev_data = ev_data.div(ev_data.sum(1), axis=0)

            # operate evaluation_ex
            logit_odds = ev_data.dot(coef)
            evals = logit.cdf(logit_odds)

            ev_value = [
                curr_ver,
            ]
            ev_value.extend(ex01.evaluate_ex(next_df, evals))
            ev_values_ref.append(ev_value)

    df = pd.DataFrame(ev_values_nml)
    df.columns = ['version', 'nm', 'np', 'nf', 'nc', 'f_value']
    df.to_csv('./../result/ex3/record_ex3_nml_' + str(THRESHOLD) + '.csv',
              index=False,
              cols=None)
    df = pd.DataFrame(ev_values_ref)
    df.columns = ['version', 'nm', 'np', 'nf', 'nc', 'f_value']
    df.to_csv('./../result/ex3/record_ex3_rfn_' + str(THRESHOLD) + '.csv',
              index=False,
              cols=None)
Exemplo n.º 7
0
def ex3(threshold):
    # global record
    global THRESHOLD
    global nrm_prms
    global ref_prms

    THRESHOLD = threshold
    vers = version.get_version_short_list()
    ev_values_nml = []
    ev_values_ref = []
    print 'operation starts'
    for curr_ver in vers :
        next_ver = version.get_next_version(curr_ver)
        print curr_ver
        print next_ver
        if curr_ver != '4.5.0':
            curr_df = mm.create_df(curr_ver)

            # dependent value
            dv_data = curr_df['fault']
            # explanatory value
            ev_data = curr_df[nrm_prms]
            # normalize
            ev_data = ev_data.div(ev_data.sum(1),axis=0)

            # create mdl
            logit = sm.Logit(dv_data, ev_data)
            result = logit.fit()

            # get coefficients
            params = result.params.values
            coef = pd.Series(params, index=nrm_prms)
            print coef

            # create model used evaluatopn_ex
            next_df = mm.create_df(next_ver)
            # explanatory value
            ev_data = next_df[nrm_prms]
            # normalize
            ev_data = ev_data.div(ev_data.sum(1),axis=0)

            # operate evaluation_ex
            logit_odds = ev_data.dot(coef)
            evals = logit.cdf(logit_odds)

            ev_value = [curr_ver,]
            ev_value.extend( ex01.evaluate_ex(next_df, evals) )
            ev_values_nml.append(ev_value)


            # explanatory value
            ev_data = curr_df[ref_prms]
            # normalize
            ev_data = ev_data.div(ev_data.sum(1),axis=0)

            # create mdl
            logit = sm.Logit(dv_data, ev_data)
            try :
                result = logit.fit()
            except:
                print 'Singular matrix occured'

            # get coefficients
            params = result.params.values
            coef = pd.Series(params, index=ref_prms)

            # create model used evaluatopn_ex
            ev_data = next_df[ref_prms]
            # normalize
            ev_data = ev_data.div(ev_data.sum(1),axis=0)


            # operate evaluation_ex
            logit_odds = ev_data.dot(coef)
            evals = logit.cdf(logit_odds)

            ev_value = [curr_ver,]
            ev_value.extend( ex01.evaluate_ex(next_df, evals) )
            ev_values_ref.append(ev_value)



    df = pd.DataFrame(ev_values_nml)
    df.columns = ['version','nm','np','nf','nc','f_value']
    df.to_csv( './../result/ex3/record_ex3_nml_' + str(THRESHOLD) +'.csv', index=False, cols=None)
    df = pd.DataFrame(ev_values_ref)
    df.columns = ['version','nm','np','nf','nc','f_value']
    df.to_csv( './../result/ex3/record_ex3_rfn_' + str(THRESHOLD) +'.csv', index=False, cols=None)
Exemplo n.º 8
0
def ex4_short(mdl_typ):
    if mdl_typ == 'nml':
        prms = ['TCchar', 'CountLineCode', 'N', 'NN', 'NF', 'SumCyclomatic']
    elif mdl_typ == 'rfn':
        prms = [
            'TCchar', 'CountLineCode', 'N', 'NN', 'NF', 'SumCyclomatic',
            'chum', 'relatedChum', 'delectChum', 'ncdChum'
        ]
    elif mdl_typ == 'chrn':
        prms = ['chum', 'relatedChum', 'delectChum', 'ncdChum']

    THRESHOLD = 0.5
    vers = version.get_version_short_list()
    ev_values = []
    print 'operation starts'
    for curr_ver in vers:
        next_ver = version.get_next_version(curr_ver)
        print curr_ver
        print next_ver
        if curr_ver != '4.5.0':
            curr_df = mm.create_df(curr_ver)

            prm_dit = {}
            hash_dict = {}
            key = 0
            # dependent value
            dv_data = curr_df['fault']
            # explanatory value
            ev_data = curr_df[prms]
            # get aic
            aic = get_aic(dv_data, ev_data)
            # prm_dit.update(nrm_prms:aic)
            prm_dit[key] = aic
            hash_dict[key] = prms
            key += 1

            length = len(prms)
            for variables in itertools.combinations(prms, length - 1):
                # dependent value
                dv_data = curr_df['fault']
                # explanatory value
                ev_data = curr_df[list(variables)]
                # get aic
                aic = get_aic(dv_data, ev_data)
                # prm_dit.update(variables:aic)
                prm_dit[key] = aic
                hash_dict[key] = variables
                key += 1

            print prm_dit
            best_hash = max((v, k) for (k, v) in prm_dit.items())[1]
            best_prms = hash_dict[best_hash]
            # print best_prms

            # dependent value
            dv_data = curr_df['fault']
            # explanatory value
            ev_data = curr_df[list(best_prms)]

            # create mdl
            logit = sm.Logit(dv_data, ev_data)
            result = logit.fit()

            # get coefficients
            params = result.params.values
            coef = pd.Series(params, index=best_prms)
            # print coef

            # create model used evaluatopn_ex
            next_df = mm.create_df(next_ver)
            # explanatory value
            ev_data = next_df[list(best_prms)]

            # operate evaluation_ex
            logit_odds = ev_data.dot(coef)
            evals = logit.cdf(logit_odds)

            ev_value = [
                curr_ver,
            ]
            ev_value.extend(ex01.evaluate_ex(next_df, evals))
            ev_values.append(ev_value)

    df = pd.DataFrame(ev_values)
    df.columns = ['version', 'nm', 'np', 'nf', 'nc', 'f_value']
    df = df.sort_index(ascending=False)
    df.to_csv('./../result/ex4/record_ex4_' + mdl_typ + '_' + str(THRESHOLD) +
              '.csv',
              index=False,
              cols=None)
Exemplo n.º 9
0
def ex1_short(mdl_typ, threshold):
    if mdl_typ == 'nml':
        prms = [
            'TCchar', 'CountLineCode', 'CountLineComment', 'N', 'NN', 'NF',
            'SumCyclomatic'
        ]
    elif mdl_typ == 'rfn':
        prms = [
            'TCchar', 'CountLineCode', 'CountLineComment', 'N', 'NN', 'NF',
            'SumCyclomatic', 'chum', 'relatedChum', 'delectChum', 'ncdChum'
        ]
    elif mdl_typ == 'chrn':
        prms = ['chum', 'relatedChum', 'delectChum', 'ncdChum']

    THRESHOLD = threshold
    vers = version.get_version_short_list()
    ev_values = []
    print 'operation starts'
    for curr_ver in vers:
        next_ver = version.get_next_version(curr_ver)
        print curr_ver
        print next_ver
        if curr_ver != '4.5.0':
            curr_df = mmm.create_df(curr_ver)

            # dependent value
            dv_data = curr_df['fault']
            # explanatory value
            ev_data = curr_df[prms]

            # create mdl
            logit = sm.Logit(dv_data, ev_data)
            result = logit.fit()

            # get coefficients
            params = result.params.values
            coef = pd.Series(params, index=prms)
            print coef

            # create model used evaluatopn_ex
            next_df = mmm.create_df(next_ver)
            ev_data = next_df[prms]
            print ev_data

            # operate evaluation_ex
            logit_odds = ev_data.dot(coef)
            # print logit_odds
            evals = logit.cdf(logit_odds)
            print evals

            ev_value = [
                curr_ver,
            ]

            # ev_value.extend( evaluate_ex_report(next_df, evals, mdl_typ,curr_ver) )
            ev_value.extend(evaluate_ex(next_df, evals))
            ev_values.append(ev_value)

    df = pd.DataFrame(ev_values)
    df.columns = ['version', 'nm', 'np', 'nf', 'nc', 'f_value']
    df = df.sort_index(ascending=False)
    df.to_csv('./../result/ex1/record_ex1_' + mdl_typ + '_' + str(THRESHOLD) +
              '.csv',
              index=False,
              cols=None)
Exemplo n.º 10
0
def ex4_short(mdl_typ):
    if mdl_typ == 'nml':
        prms = ['TCchar', 'CountLineCode', 'N', 'NN', 'NF','SumCyclomatic']
    elif mdl_typ == 'rfn':
        prms = ['TCchar', 'CountLineCode', 'N', 'NN', 'NF','SumCyclomatic','chum','relatedChum','delectChum','ncdChum']
    elif mdl_typ == 'chrn':
        prms = ['chum','relatedChum','delectChum','ncdChum']

    THRESHOLD = 0.5
    vers = version.get_version_short_list()
    ev_values = []
    print 'operation starts'
    for curr_ver in vers :
        next_ver = version.get_next_version(curr_ver)
        print curr_ver
        print next_ver
        if curr_ver != '4.5.0':
            curr_df = mm.create_df(curr_ver)

            prm_dit = {}
            hash_dict = {}
            key = 0
            # dependent value
            dv_data = curr_df['fault']
            # explanatory value
            ev_data = curr_df[prms]
            # get aic
            aic = get_aic(dv_data, ev_data)
            # prm_dit.update(nrm_prms:aic)
            prm_dit[key]=aic
            hash_dict[key]=prms
            key += 1

            length = len(prms)
            for variables in itertools.combinations(prms, length-1):
                # dependent value
                dv_data = curr_df['fault']
                # explanatory value
                ev_data = curr_df[list(variables)]
                # get aic
                aic = get_aic(dv_data, ev_data)
                # prm_dit.update(variables:aic)
                prm_dit[key]=aic
                hash_dict[key]=variables
                key += 1

            print prm_dit
            best_hash = max((v,k) for (k,v) in prm_dit.items())[1]
            best_prms = hash_dict[best_hash]
            # print best_prms

            # dependent value
            dv_data = curr_df['fault']
            # explanatory value
            ev_data = curr_df[list(best_prms)]

            # create mdl
            logit = sm.Logit(dv_data, ev_data)
            result = logit.fit()

            # get coefficients
            params = result.params.values
            coef = pd.Series(params, index=best_prms)
            # print coef

            # create model used evaluatopn_ex
            next_df = mm.create_df(next_ver)
            # explanatory value
            ev_data = next_df[list(best_prms)]

            # operate evaluation_ex
            logit_odds = ev_data.dot(coef)
            evals = logit.cdf(logit_odds)

            ev_value = [curr_ver,]
            ev_value.extend( ex01.evaluate_ex(next_df, evals) )
            ev_values.append(ev_value)


    df = pd.DataFrame(ev_values)
    df.columns = ['version','nm','np','nf','nc','f_value']
    df = df.sort_index(ascending=False)
    df.to_csv( './../result/ex4/record_ex4_' + mdl_typ + '_' + str(THRESHOLD) + '.csv', index=False, cols=None)