def ex12_short(mdl_typ): if mdl_typ == 'nml': prms = ['TCchar', 'CountLineCode', 'CountLineComment' , 'N', 'NN', 'NF','SumCyclomatic'] elif mdl_typ == 'rfn': prms = ['TCchar', 'CountLineCode', 'CountLineComment', 'N', 'NN', 'NF','SumCyclomatic','chum','relatedChum','delectChum','ncdChum'] elif mdl_typ == 'chrn': prms = ['chum','relatedChum','delectChum','ncdChum'] THRESHOLD = 0.5 vers = version.get_version_short_list() ev_values = [] prm_note = [] for curr_ver in vers : next_ver = version.get_next_version(curr_ver) print curr_ver print next_ver if curr_ver != '4.5.0': curr_df = ex02.create_df_used_all_predate_nml(curr_ver) # get paramaters that has lowest aic value best_prms = ex11.get_best_paramaters(prms, curr_df, curr_ver, mdl_typ) # dependent value dv_data = curr_df['fault'] # explanatory value ev_data = curr_df[list(best_prms)] # create mdl logit = sm.Logit(dv_data, ev_data) result = logit.fit() # get coefficients params = result.params.values coef = pd.Series(params, index=best_prms) # create model used evaluatopn_ex next_df = mmm.create_df(next_ver) # explanatory value ev_data = next_df[list(best_prms)] # operate evaluation_ex logit_odds = ev_data.dot(coef) evals = logit.cdf(logit_odds) ev_value = [curr_ver,] ev_value.extend( ex01.evaluate_ex(next_df, evals) ) ev_values.append(ev_value) prm_note.append(curr_ver) prm_note.append(best_prms) df = pd.DataFrame(ev_values) df.columns = ['version','nm','np','nf','nc','f_value'] df = df.sort_index(ascending=False) df.to_csv( './../result/ex12/record_ex12_' + mdl_typ + '_' + str(THRESHOLD) + '.csv', index=False, cols=None) df = pd.DataFrame(prm_note) df.to_csv( './../result/ex12/prm_note_'+mdl_typ+'.csv', index=False, cols=None)
def ex1_short(mdl_typ, threshold): if mdl_typ == 'nml': prms = ['TCchar', 'CountLineCode', 'CountLineComment' , 'N', 'NN', 'NF','SumCyclomatic'] elif mdl_typ == 'rfn': prms = ['TCchar', 'CountLineCode', 'CountLineComment', 'N', 'NN', 'NF','SumCyclomatic','chum','relatedChum','delectChum','ncdChum'] elif mdl_typ == 'chrn': prms = ['chum','relatedChum','delectChum','ncdChum'] THRESHOLD = threshold vers = version.get_version_short_list() ev_values = [] print 'operation starts' for curr_ver in vers : next_ver = version.get_next_version(curr_ver) print curr_ver print next_ver if curr_ver != '4.5.0': curr_df = mmm.create_df(curr_ver) # dependent value dv_data = curr_df['fault'] # explanatory value ev_data = curr_df[prms] # create mdl logit = sm.Logit(dv_data, ev_data) result = logit.fit() # get coefficients params = result.params.values coef = pd.Series(params, index=prms) print coef # create model used evaluatopn_ex next_df = mmm.create_df(next_ver) ev_data = next_df[prms] print ev_data # operate evaluation_ex logit_odds = ev_data.dot(coef) # print logit_odds evals = logit.cdf(logit_odds) print evals ev_value = [curr_ver,] # ev_value.extend( evaluate_ex_report(next_df, evals, mdl_typ,curr_ver) ) ev_value.extend( evaluate_ex(next_df, evals) ) ev_values.append(ev_value) df = pd.DataFrame(ev_values) df.columns = ['version','nm','np','nf','nc','f_value'] df = df.sort_index(ascending=False) df.to_csv( './../result/ex1/record_ex1_' + mdl_typ + '_' + str(THRESHOLD) + '.csv', index=False, cols=None)
def evaluate_merged(exNum): THRESHOLD = 0.5 vers = version.get_version_short_list() ev_values = [] print 'operation starts' for curr_ver in vers : next_ver = version.get_next_version(curr_ver) print curr_ver print next_ver if curr_ver != '4.5.0': ev_value = [curr_ver,] ev_value.extend( evaluate_ex_merge(exNum,curr_ver) ) ev_values.append(ev_value) df = pd.DataFrame(ev_values) df.columns = ['version','nm','np','nf','nc','f_value'] df = df.sort_index(ascending=False) df.to_csv( './../result/ex'+str(exNum)+'/Mrecord_ex'+str(exNum)+'_' + str(THRESHOLD) + '.csv', index=False, cols=None)
def ex12_short(mdl_typ): if mdl_typ == 'nml': prms = [ 'TCchar', 'CountLineCode', 'CountLineComment', 'N', 'NN', 'NF', 'SumCyclomatic' ] elif mdl_typ == 'rfn': prms = [ 'TCchar', 'CountLineCode', 'CountLineComment', 'N', 'NN', 'NF', 'SumCyclomatic', 'chum', 'relatedChum', 'delectChum', 'ncdChum' ] elif mdl_typ == 'chrn': prms = ['chum', 'relatedChum', 'delectChum', 'ncdChum'] THRESHOLD = 0.5 vers = version.get_version_short_list() ev_values = [] prm_note = [] for curr_ver in vers: next_ver = version.get_next_version(curr_ver) print curr_ver print next_ver if curr_ver != '4.5.0': curr_df = ex02.create_df_used_all_predate_nml(curr_ver) # get paramaters that has lowest aic value best_prms = ex11.get_best_paramaters(prms, curr_df, curr_ver, mdl_typ) # dependent value dv_data = curr_df['fault'] # explanatory value ev_data = curr_df[list(best_prms)] # create mdl logit = sm.Logit(dv_data, ev_data) result = logit.fit() # get coefficients params = result.params.values coef = pd.Series(params, index=best_prms) # create model used evaluatopn_ex next_df = mmm.create_df(next_ver) # explanatory value ev_data = next_df[list(best_prms)] # operate evaluation_ex logit_odds = ev_data.dot(coef) evals = logit.cdf(logit_odds) ev_value = [ curr_ver, ] ev_value.extend(ex01.evaluate_ex(next_df, evals)) ev_values.append(ev_value) prm_note.append(curr_ver) prm_note.append(best_prms) df = pd.DataFrame(ev_values) df.columns = ['version', 'nm', 'np', 'nf', 'nc', 'f_value'] df = df.sort_index(ascending=False) df.to_csv('./../result/ex12/record_ex12_' + mdl_typ + '_' + str(THRESHOLD) + '.csv', index=False, cols=None) df = pd.DataFrame(prm_note) df.to_csv('./../result/ex12/prm_note_' + mdl_typ + '.csv', index=False, cols=None)
def ex4(): global record global THRESHOLD list = version.get_version_list() records = [] for curr_ver in list: if curr_ver != '10.10.2.0': next_version = version.get_next_version(curr_ver) print curr_ver # print next_version record = [] record.append(curr_ver) df = create_df_ref('./../data/metrics/METRICS_V' + curr_ver + '.csv') # normalize df = nrmlize(df) # dependent value dv_data = df['fault'] # explanatory value ev_data = df[[ 'TCchar', 'LOC', 'N', 'NN', 'NF', 'chum', 'relatedChum', 'delectChum', 'ncdChum', 'intercept' ]] # create mdl logit = sm.Logit(dv_data, ev_data) try: result = logit.fit() except: print curr_ver + 'is singular.' # result = logit.fit(method='bfgs') # print result.summary() # get coefficients params = result.params.values coef = pd.Series(params, index=[ 'TCchar', 'LOC', 'N', 'NN', 'NF', 'chum', 'relatedChum', 'delectChum', 'ncdChum', 'intercept' ]) # create model used evaluatopn_ex df = create_df_ref('./../data/metrics/METRICS_V' + next_version + '.csv') # normalize df = nrmlize(df) ev_data = df[[ 'TCchar', 'LOC', 'N', 'NN', 'NF', 'chum', 'relatedChum', 'delectChum', 'ncdChum', 'intercept' ]] # operate evaluation_ex logit_odds = ev_data.dot(coef) print '---logit_odds---' print logit_odds evals = logit.cdf(logit_odds) print '---evals---' print evals # operate evaluation_ex df = evaluate_ex(df, evals) pd.DataFrame(df).to_csv('./../data/result/ex4_ref/result_' + curr_ver + 'ex4_ref.csv', index=False) records.append(record) pd.DataFrame(records).to_csv('./../data/result/record_ex4_ref_' + str(THRESHOLD) + '.csv', index=False, cols=None)
def ex3(threshold): # global record global THRESHOLD global nrm_prms global ref_prms THRESHOLD = threshold vers = version.get_version_short_list() ev_values_nml = [] ev_values_ref = [] print 'operation starts' for curr_ver in vers: next_ver = version.get_next_version(curr_ver) print curr_ver print next_ver if curr_ver != '4.5.0': curr_df = mm.create_df(curr_ver) # dependent value dv_data = curr_df['fault'] # explanatory value ev_data = curr_df[nrm_prms] # normalize ev_data = ev_data.div(ev_data.sum(1), axis=0) # create mdl logit = sm.Logit(dv_data, ev_data) result = logit.fit() # get coefficients params = result.params.values coef = pd.Series(params, index=nrm_prms) print coef # create model used evaluatopn_ex next_df = mm.create_df(next_ver) # explanatory value ev_data = next_df[nrm_prms] # normalize ev_data = ev_data.div(ev_data.sum(1), axis=0) # operate evaluation_ex logit_odds = ev_data.dot(coef) evals = logit.cdf(logit_odds) ev_value = [ curr_ver, ] ev_value.extend(ex01.evaluate_ex(next_df, evals)) ev_values_nml.append(ev_value) # explanatory value ev_data = curr_df[ref_prms] # normalize ev_data = ev_data.div(ev_data.sum(1), axis=0) # create mdl logit = sm.Logit(dv_data, ev_data) try: result = logit.fit() except: print 'Singular matrix occured' # get coefficients params = result.params.values coef = pd.Series(params, index=ref_prms) # create model used evaluatopn_ex ev_data = next_df[ref_prms] # normalize ev_data = ev_data.div(ev_data.sum(1), axis=0) # operate evaluation_ex logit_odds = ev_data.dot(coef) evals = logit.cdf(logit_odds) ev_value = [ curr_ver, ] ev_value.extend(ex01.evaluate_ex(next_df, evals)) ev_values_ref.append(ev_value) df = pd.DataFrame(ev_values_nml) df.columns = ['version', 'nm', 'np', 'nf', 'nc', 'f_value'] df.to_csv('./../result/ex3/record_ex3_nml_' + str(THRESHOLD) + '.csv', index=False, cols=None) df = pd.DataFrame(ev_values_ref) df.columns = ['version', 'nm', 'np', 'nf', 'nc', 'f_value'] df.to_csv('./../result/ex3/record_ex3_rfn_' + str(THRESHOLD) + '.csv', index=False, cols=None)
def ex3(threshold): # global record global THRESHOLD global nrm_prms global ref_prms THRESHOLD = threshold vers = version.get_version_short_list() ev_values_nml = [] ev_values_ref = [] print 'operation starts' for curr_ver in vers : next_ver = version.get_next_version(curr_ver) print curr_ver print next_ver if curr_ver != '4.5.0': curr_df = mm.create_df(curr_ver) # dependent value dv_data = curr_df['fault'] # explanatory value ev_data = curr_df[nrm_prms] # normalize ev_data = ev_data.div(ev_data.sum(1),axis=0) # create mdl logit = sm.Logit(dv_data, ev_data) result = logit.fit() # get coefficients params = result.params.values coef = pd.Series(params, index=nrm_prms) print coef # create model used evaluatopn_ex next_df = mm.create_df(next_ver) # explanatory value ev_data = next_df[nrm_prms] # normalize ev_data = ev_data.div(ev_data.sum(1),axis=0) # operate evaluation_ex logit_odds = ev_data.dot(coef) evals = logit.cdf(logit_odds) ev_value = [curr_ver,] ev_value.extend( ex01.evaluate_ex(next_df, evals) ) ev_values_nml.append(ev_value) # explanatory value ev_data = curr_df[ref_prms] # normalize ev_data = ev_data.div(ev_data.sum(1),axis=0) # create mdl logit = sm.Logit(dv_data, ev_data) try : result = logit.fit() except: print 'Singular matrix occured' # get coefficients params = result.params.values coef = pd.Series(params, index=ref_prms) # create model used evaluatopn_ex ev_data = next_df[ref_prms] # normalize ev_data = ev_data.div(ev_data.sum(1),axis=0) # operate evaluation_ex logit_odds = ev_data.dot(coef) evals = logit.cdf(logit_odds) ev_value = [curr_ver,] ev_value.extend( ex01.evaluate_ex(next_df, evals) ) ev_values_ref.append(ev_value) df = pd.DataFrame(ev_values_nml) df.columns = ['version','nm','np','nf','nc','f_value'] df.to_csv( './../result/ex3/record_ex3_nml_' + str(THRESHOLD) +'.csv', index=False, cols=None) df = pd.DataFrame(ev_values_ref) df.columns = ['version','nm','np','nf','nc','f_value'] df.to_csv( './../result/ex3/record_ex3_rfn_' + str(THRESHOLD) +'.csv', index=False, cols=None)
def ex4_short(mdl_typ): if mdl_typ == 'nml': prms = ['TCchar', 'CountLineCode', 'N', 'NN', 'NF', 'SumCyclomatic'] elif mdl_typ == 'rfn': prms = [ 'TCchar', 'CountLineCode', 'N', 'NN', 'NF', 'SumCyclomatic', 'chum', 'relatedChum', 'delectChum', 'ncdChum' ] elif mdl_typ == 'chrn': prms = ['chum', 'relatedChum', 'delectChum', 'ncdChum'] THRESHOLD = 0.5 vers = version.get_version_short_list() ev_values = [] print 'operation starts' for curr_ver in vers: next_ver = version.get_next_version(curr_ver) print curr_ver print next_ver if curr_ver != '4.5.0': curr_df = mm.create_df(curr_ver) prm_dit = {} hash_dict = {} key = 0 # dependent value dv_data = curr_df['fault'] # explanatory value ev_data = curr_df[prms] # get aic aic = get_aic(dv_data, ev_data) # prm_dit.update(nrm_prms:aic) prm_dit[key] = aic hash_dict[key] = prms key += 1 length = len(prms) for variables in itertools.combinations(prms, length - 1): # dependent value dv_data = curr_df['fault'] # explanatory value ev_data = curr_df[list(variables)] # get aic aic = get_aic(dv_data, ev_data) # prm_dit.update(variables:aic) prm_dit[key] = aic hash_dict[key] = variables key += 1 print prm_dit best_hash = max((v, k) for (k, v) in prm_dit.items())[1] best_prms = hash_dict[best_hash] # print best_prms # dependent value dv_data = curr_df['fault'] # explanatory value ev_data = curr_df[list(best_prms)] # create mdl logit = sm.Logit(dv_data, ev_data) result = logit.fit() # get coefficients params = result.params.values coef = pd.Series(params, index=best_prms) # print coef # create model used evaluatopn_ex next_df = mm.create_df(next_ver) # explanatory value ev_data = next_df[list(best_prms)] # operate evaluation_ex logit_odds = ev_data.dot(coef) evals = logit.cdf(logit_odds) ev_value = [ curr_ver, ] ev_value.extend(ex01.evaluate_ex(next_df, evals)) ev_values.append(ev_value) df = pd.DataFrame(ev_values) df.columns = ['version', 'nm', 'np', 'nf', 'nc', 'f_value'] df = df.sort_index(ascending=False) df.to_csv('./../result/ex4/record_ex4_' + mdl_typ + '_' + str(THRESHOLD) + '.csv', index=False, cols=None)
def ex1_short(mdl_typ, threshold): if mdl_typ == 'nml': prms = [ 'TCchar', 'CountLineCode', 'CountLineComment', 'N', 'NN', 'NF', 'SumCyclomatic' ] elif mdl_typ == 'rfn': prms = [ 'TCchar', 'CountLineCode', 'CountLineComment', 'N', 'NN', 'NF', 'SumCyclomatic', 'chum', 'relatedChum', 'delectChum', 'ncdChum' ] elif mdl_typ == 'chrn': prms = ['chum', 'relatedChum', 'delectChum', 'ncdChum'] THRESHOLD = threshold vers = version.get_version_short_list() ev_values = [] print 'operation starts' for curr_ver in vers: next_ver = version.get_next_version(curr_ver) print curr_ver print next_ver if curr_ver != '4.5.0': curr_df = mmm.create_df(curr_ver) # dependent value dv_data = curr_df['fault'] # explanatory value ev_data = curr_df[prms] # create mdl logit = sm.Logit(dv_data, ev_data) result = logit.fit() # get coefficients params = result.params.values coef = pd.Series(params, index=prms) print coef # create model used evaluatopn_ex next_df = mmm.create_df(next_ver) ev_data = next_df[prms] print ev_data # operate evaluation_ex logit_odds = ev_data.dot(coef) # print logit_odds evals = logit.cdf(logit_odds) print evals ev_value = [ curr_ver, ] # ev_value.extend( evaluate_ex_report(next_df, evals, mdl_typ,curr_ver) ) ev_value.extend(evaluate_ex(next_df, evals)) ev_values.append(ev_value) df = pd.DataFrame(ev_values) df.columns = ['version', 'nm', 'np', 'nf', 'nc', 'f_value'] df = df.sort_index(ascending=False) df.to_csv('./../result/ex1/record_ex1_' + mdl_typ + '_' + str(THRESHOLD) + '.csv', index=False, cols=None)
def ex4_short(mdl_typ): if mdl_typ == 'nml': prms = ['TCchar', 'CountLineCode', 'N', 'NN', 'NF','SumCyclomatic'] elif mdl_typ == 'rfn': prms = ['TCchar', 'CountLineCode', 'N', 'NN', 'NF','SumCyclomatic','chum','relatedChum','delectChum','ncdChum'] elif mdl_typ == 'chrn': prms = ['chum','relatedChum','delectChum','ncdChum'] THRESHOLD = 0.5 vers = version.get_version_short_list() ev_values = [] print 'operation starts' for curr_ver in vers : next_ver = version.get_next_version(curr_ver) print curr_ver print next_ver if curr_ver != '4.5.0': curr_df = mm.create_df(curr_ver) prm_dit = {} hash_dict = {} key = 0 # dependent value dv_data = curr_df['fault'] # explanatory value ev_data = curr_df[prms] # get aic aic = get_aic(dv_data, ev_data) # prm_dit.update(nrm_prms:aic) prm_dit[key]=aic hash_dict[key]=prms key += 1 length = len(prms) for variables in itertools.combinations(prms, length-1): # dependent value dv_data = curr_df['fault'] # explanatory value ev_data = curr_df[list(variables)] # get aic aic = get_aic(dv_data, ev_data) # prm_dit.update(variables:aic) prm_dit[key]=aic hash_dict[key]=variables key += 1 print prm_dit best_hash = max((v,k) for (k,v) in prm_dit.items())[1] best_prms = hash_dict[best_hash] # print best_prms # dependent value dv_data = curr_df['fault'] # explanatory value ev_data = curr_df[list(best_prms)] # create mdl logit = sm.Logit(dv_data, ev_data) result = logit.fit() # get coefficients params = result.params.values coef = pd.Series(params, index=best_prms) # print coef # create model used evaluatopn_ex next_df = mm.create_df(next_ver) # explanatory value ev_data = next_df[list(best_prms)] # operate evaluation_ex logit_odds = ev_data.dot(coef) evals = logit.cdf(logit_odds) ev_value = [curr_ver,] ev_value.extend( ex01.evaluate_ex(next_df, evals) ) ev_values.append(ev_value) df = pd.DataFrame(ev_values) df.columns = ['version','nm','np','nf','nc','f_value'] df = df.sort_index(ascending=False) df.to_csv( './../result/ex4/record_ex4_' + mdl_typ + '_' + str(THRESHOLD) + '.csv', index=False, cols=None)