def test_result_attributes(self): np.random.seed(1234567) outcome = np.random.randn(20, 4) + [0, 0, 1, 2] res = mstats.ttest_ind(outcome[:, 0], outcome[:, 1]) attributes = ('statistic', 'pvalue') check_named_results(res, attributes, ma=True)
def check_significance(gt_dist, seg_dist): normal_gt = normaltest(gt_dist)[1] normal_seg = normaltest(seg_dist)[1] # if both distributions are parametric use t-test, else use mann-whitney-u if normal_gt > 0.05 and normal_seg > 0.05: pvalue = ttest_ind(gt_dist, seg_dist)[1] else: pvalue = mannwhitneyu(gt_dist, seg_dist)[1] if pvalue > 0.05: return 0 if 0.01 < pvalue < 0.05: return 1 if 0.001 < pvalue < 0.01: return 2 if pvalue < 0.001: return 3 return pvalue
def test_vs_nonmasked(self): np.random.seed(1234567) outcome = np.random.randn(20, 4) + [0, 0, 1, 2] # 1-D inputs res1 = stats.ttest_ind(outcome[:, 0], outcome[:, 1]) res2 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1]) assert_allclose(res1, res2) # 2-D inputs res1 = stats.ttest_ind(outcome[:, 0], outcome[:, 1], axis=None) res2 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1], axis=None) assert_allclose(res1, res2) res1 = stats.ttest_ind(outcome[:, :2], outcome[:, 2:], axis=0) res2 = mstats.ttest_ind(outcome[:, :2], outcome[:, 2:], axis=0) assert_allclose(res1, res2) # Check default is axis=0 res3 = mstats.ttest_ind(outcome[:, :2], outcome[:, 2:]) assert_allclose(res2, res3)
def test_empty(self): res1 = mstats.ttest_ind([], []) assert_(np.all(np.isnan(res1)))
tissueMat = np.loadtxt(ablnFile).reshape(80, 80) patchMat = tissueMat[patchPos:patchEnd, patchPos:patchEnd] patchConcentration = np.sum(patchMat) / (patchWidth * patchWidth) tissueConcentration = np.sum(tissueMat) / (80 * 80) patchConces[i] = patchConcentration tissueConces[i] = tissueConcentration with open(testLogFile, 'w') as testLog: testLog.write('patch ablated/area,\ttissue ablated/area\n') theString = '{},\t{}\n' for i in range(len(patchConces)): testLog.write(theString.format(patchConces[i], tissueConces[i])) # Finding the test statistics with open(testStatFile, 'w') as statLog: tval, pval = ttest_ind(patchConces, tissueConces) statLog.write('Testing Ha: tissue ablation concentration < Patch concentration') statLog.write('Raw tval: {}\tRaw pval: {}\n'.format(tval, pval)) print('Raw tval: {}\tRaw pval: {}\n'.format(tval, pval)) actualPval = pval / 2 print('Ha: tissue ablation concentration < Patch concentration tval:{}\tpval:{}'.format(tval, actualPval)) statLog.write('Raw_tval: {}\t1-tail_pval:{}'.format(tval, actualPval)) print('\tTesting & logging complete') if boundaryTest: # Test boundary area connection importance print('\nTesting if boundary connection matters') ## First create 10 tissues never seen before testDir = os.path.join(tempDir, 'BoundaryConnTest') if(not os.path.exists(testDir)): run(['mkdir', testDir])
def groupmeans(data, groups, numbers, cutoff=0.01, quantile=0.95, minsize=None): """ Yields the significant differences in average between every pair of groups and numbers. Parameters ---------- data : blaze data object groups : non-empty iterable containing category column names in data numbers : non-empty iterable containing numeric column names in data cutoff : ignore anything with prob > cutoff. cutoff=None ignores significance checks, speeding it up a LOT. quantile : number that represents target improvement. Defaults to .95. The ``diff`` returned is the % impact of everyone moving to the 95th percentile minsize : each group should contain at least minsize values. If minsize=None, automatically set the minimum size to 1% of the dataset, or 10, whichever is larger. """ if minsize is None: minsize = max(data.nrows / 100, 10) means = {col: data[col].mean() for col in numbers} results = [] for group in groups: agg = {number: bz.mean(data[number]) for number in numbers} agg["#"] = bz.count(data) ave = bz.by(data[group], **agg).sort("#", ascending=False) ave = bz.into(pd.DataFrame, ave) ave.index = ave[group] sizes = ave["#"] # Each group should contain at least minsize values biggies = sizes[sizes >= minsize].index # ... and at least 2 groups overall, to compare. if len(biggies) < 2: continue for number in numbers: if number == group: continue sorted_cats = ave[number][biggies].dropna().sort_values() if len(sorted_cats) < 2: continue lo = bz.into(list, data[number][data[group] == sorted_cats.index[0]]) hi = bz.into(list, data[number][data[group] == sorted_cats.index[-1]]) _, prob = ttest_ind(np.ma.masked_array(lo, np.isnan(lo)), np.ma.masked_array(hi, np.isnan(hi))) if prob > cutoff: continue results.append( { "group": group, "number": number, "prob": prob, "gain": (sorted_cats.iloc[-1] / means[number] - 1)[0], "biggies": ave.ix[biggies][number], "means": ave[[number, "#"]].sort_values(by=number), } ) results = pd.DataFrame(results) if len(results) > 0: results = results.set_index(["group", "number"]) return results
p=1, y_dist=0.02, distance=0.1) sb.boxplot(data=all_data, color="skyblue") #.set(ylabel="Dice coefficient") plt.ylabel("Intersection over Union", size=18) plt.yticks(fontsize=12) plt.xticks(fontsize=12) print(normaltest(all_data["Gaussian"])[1]) print(normaltest(all_data["Hessian"])[1]) print(normaltest(all_data["Laplacian"])[1]) print(normaltest(all_data["Ilastik"])[1]) print(normaltest(all_data["MitoSegNet"])[1]) print("\n") """ print(mannwhitneyu(all_data["Gaussian"], all_data["MitoSegNet"])[1]) print(mannwhitneyu(all_data["Hessian"], all_data["MitoSegNet"])[1]) print(mannwhitneyu(all_data["Laplacian"], all_data["MitoSegNet"])[1]) print(mannwhitneyu(all_data["Ilastik"], all_data["MitoSegNet"])[1]) """ print(ttest_ind(all_data["Gaussian"], all_data["MitoSegNet"])[1]) print(ttest_ind(all_data["Hessian"], all_data["MitoSegNet"])[1]) print(ttest_ind(all_data["Laplacian"], all_data["MitoSegNet"])[1]) print(ttest_ind(all_data["Ilastik"], all_data["MitoSegNet"])[1]) plt.show()
def groupmeans(data, groups, numbers, cutoff=.01, quantile=.95, minsize=None, weight=None): ''' Yields the significant differences in average between every pair of groups and numbers. :arg DataFrame data: pandas.DataFrame to analyze :arg list groups: category column names to group data by :arg list numbers: numeric column names in to summarize data by :arg float cutoff: ignore anything with prob > cutoff. cutoff=None ignores significance checks, speeding it up a LOT. :arg float quantile: number that represents target improvement. Defaults to .95. The ``diff`` returned is the % impact of everyone moving to the 95th percentile :arg int minsize: each group should contain at least minsize values. If minsize=None, automatically set the minimum size to 1% of the dataset, or 10, whichever is larger. ''' from scipy.stats.mstats import ttest_ind if minsize is None: minsize = max(len(data.index) // 100, 10) if weight is None: means = data[numbers].mean() else: means = weighted_avg(data, numbers, weight) results = [] for group in groups: grouped = data.groupby(group, sort=False) if weight is None: ave = grouped[numbers].mean() else: ave = grouped.apply(lambda v: weighted_avg(v, numbers, weight)) ave['#'] = sizes = grouped.size() # Each group should contain at least minsize values biggies = sizes[sizes >= minsize].index # ... and at least 2 groups overall, to compare. if len(biggies) < 2: continue for number in numbers: if number == group: continue sorted_cats = ave[number][biggies].dropna().sort_values() if len(sorted_cats) < 2: continue lo = data[number][grouped.groups[sorted_cats.index[0]]].values hi = data[number][grouped.groups[sorted_cats.index[-1]]].values _, prob = ttest_ind( pd.np.ma.masked_array(lo, pd.np.isnan(lo)), pd.np.ma.masked_array(hi, pd.np.isnan(hi)) ) if prob > cutoff: continue results.append({ 'group': group, 'number': number, 'prob': prob, 'gain': sorted_cats.iloc[-1] / means[number] - 1, 'biggies': ave.loc[biggies][number].to_dict(), 'means': ave[[number, '#']].sort_values(number).to_dict(), }) results = pd.DataFrame(results) if len(results) > 0: results = results.set_index(['group', 'number']) return results.reset_index() # Flatten multi-index.
plt.legend(loc="best") plt.show() # %% df['rs1'][df['rs1'].isin([2])] = 1 df['rs2'][df['rs2'].isin([2])] = 1 # %% df1 = df[df['bloodsugar'].isin([0])] df2 = df[df['bloodsugar'].isin([1])] from scipy.stats.mstats import ttest_ind for str in ['GDM','rs1','rs2']: stat, p = ttest_ind(df1[str], df2[str]) print(str,' ',p) df['bloodsugar'].value_counts() # %% df['combine'] = 10 ''' change='GDM' control='rs' flag=1 if flag==0: df = df[df[control].isin([0])] else:
def groupmeans(data, groups, numbers, cutoff=.01, quantile=.95, min_size=None): ''' Yields the significant differences in average between every pair of groups and numbers. Parameters ---------- data : blaze data object groups : non-empty iterable containing category column names in data numbers : non-empty iterable containing numeric column names in data cutoff : ignore anything with prob > cutoff. cutoff=None ignores significance checks, speeding it up a LOT. quantile : number that represents target improvement. Defaults to .95. The ``diff`` returned is the % impact of everyone moving to the 95th percentile min_size : each group should contain at least min_size values. If min_size=None, automatically set the minimum size to 1% of the dataset, or 10, whichever is larger. ''' if min_size is None: # compute nrows, bz.compute(data.nrows) doesn't work for sqlite min_size = max(bz.into(int, data.nrows) / 100, 10) # compute mean of each number column means = {col: bz.into(float, data[col].mean()) for col in numbers} # pre-create aggregation expressions (mean, count) agg = {number: bz.mean(data[number]) for number in numbers} for group in groups: agg['#'] = data[group].count() ave = bz.by(data[group], **agg).sort('#', ascending=False) ave = bz.into(pd.DataFrame, ave) ave.index = ave[group] sizes = ave['#'] # Each group should contain at least min_size values biggies = sizes[sizes >= min_size].index # ... and at least 2 groups overall, to compare. if len(biggies) < 2: continue for number in numbers: if number == group: continue sorted_cats = ave[number][biggies].dropna().sort_values() if len(sorted_cats) < 2: continue sohi = sorted_cats.index[-1] solo = sorted_cats.index[0] # If sorted_cats.index items are of numpy type, then # convert them to native type, skip conversion for unicode, str # See https://github.com/blaze/blaze/issues/1461 if isinstance(solo, np.generic): solo, sohi = solo.item(), sohi.item() lo = bz.into(list, data[number][data[group] == solo]) hi = bz.into(list, data[number][data[group] == sohi]) _, prob = ttest_ind( np.ma.masked_array(lo, np.isnan(lo)), np.ma.masked_array(hi, np.isnan(hi)) ) # All results will be returned by default # Up to the user to ignore or show insignificant results # Uncomment below two lines to return only significant results # if prob > cutoff: # continue yield ({ 'group': group, 'number': number, 'prob': float(prob), 'gain': sorted_cats.iloc[-1] / means[number] - 1, 'biggies': ave.ix[biggies][number].to_dict(), 'means': ave[[number, '#']].sort_values(by=number).reset_index().to_dict( orient='records'), })
def groupmeans(data, groups, numbers, cutoff=.01, quantile=.95, min_size=None): ''' Yields the significant differences in average between every pair of groups and numbers. Parameters ---------- data : blaze data object groups : non-empty iterable containing category column names in data numbers : non-empty iterable containing numeric column names in data cutoff : ignore anything with prob > cutoff. cutoff=None ignores significance checks, speeding it up a LOT. quantile : number that represents target improvement. Defaults to .95. The ``diff`` returned is the % impact of everyone moving to the 95th percentile min_size : each group should contain at least min_size values. If min_size=None, automatically set the minimum size to 1% of the dataset, or 10, whichever is larger. ''' if min_size is None: # compute nrows, bz.compute(data.nrows) doesn't work for sqlite min_size = max(bz.into(int, data.nrows) / 100, 10) # compute mean of each number column means = {col: bz.into(float, data[col].mean()) for col in numbers} # pre-create aggregation expressions (mean, count) agg = {number: bz.mean(data[number]) for number in numbers} for group in groups: agg['#'] = data[group].count() ave = bz.by(data[group], **agg).sort('#', ascending=False) ave = bz.into(pd.DataFrame, ave) ave.index = ave[group] sizes = ave['#'] # Each group should contain at least min_size values biggies = sizes[sizes >= min_size].index # ... and at least 2 groups overall, to compare. if len(biggies) < 2: continue for number in numbers: if number == group: continue sorted_cats = ave[number][biggies].dropna().sort_values() if len(sorted_cats) < 2: continue sohi = sorted_cats.index[-1] solo = sorted_cats.index[0] # If sorted_cats.index items are of numpy type, then # convert them to native type, skip conversion for unicode, str # See https://github.com/blaze/blaze/issues/1461 if isinstance(solo, np.generic): solo, sohi = solo.item(), sohi.item() lo = bz.into(list, data[number][data[group] == solo]) hi = bz.into(list, data[number][data[group] == sohi]) _, prob = ttest_ind( np.ma.masked_array(lo, np.isnan(lo)), np.ma.masked_array(hi, np.isnan(hi)) ) if prob > cutoff: continue yield ({ 'group': group, 'number': number, 'prob': float(prob), 'gain': sorted_cats.iloc[-1] / means[number] - 1, 'biggies': ave.ix[biggies][number].to_dict(), 'means': ave[[number, '#']].sort_values(by=number).reset_index().to_dict( orient='records'), })
def ttests(path_to_dict, name_dict='\\TOTresults'): import numpy as np import fnmatch from dictmanager import load_obj #NOTES TO UNDERSTAND NOTATIONS #d = list of distances from reference point #fl = list of FL #PAs = list of PA sup #PAi = list of PA inf #mt = list of MT #_s : simple images #_p : panoramic images #_m : manual #_a : automated #_filtered: matched fascicles only participants=['01_Kevin', '02_rafaelopes', '03_charlesbarrand', '04_guilhem',\ '05_leandre', '06_thomasmartine', '10_victor',\ '11_youssouf', '12_sufyan', '16_julien', '34_nicolas'] '************************************************************************' '*****************************INITIALIZATION*****************************' d_s_m = [[] for par in range(len(participants))] mt_s_m = [[] for par in range(len(participants))] d_s_m_filtered = [[] for par in range(len(participants))] fl_s_m_filtered = [[] for par in range(len(participants))] PAs_s_m_filtered = [[] for par in range(len(participants))] PAi_s_m_filtered = [[] for par in range(len(participants))] d_s_a = [[] for par in range(len(participants))] mt_s_a = [[] for par in range(len(participants))] d_s_a_filtered = [[] for par in range(len(participants))] fl_s_a_filtered = [[] for par in range(len(participants))] PAs_s_a_filtered = [[] for par in range(len(participants))] PAi_s_a_filtered = [[] for par in range(len(participants))] d_p_m = [[] for par in range(len(participants))] mt_p_m = [[] for par in range(len(participants))] d_p_m_filtered = [[] for par in range(len(participants))] fl_p_m_filtered = [[] for par in range(len(participants))] PAs_p_m_filtered = [[] for par in range(len(participants))] PAi_p_m_filtered = [[] for par in range(len(participants))] d_p_a = [[] for par in range(len(participants))] mt_p_a = [[] for par in range(len(participants))] d_p_a_filtered = [[] for par in range(len(participants))] fl_p_a_filtered = [[] for par in range(len(participants))] PAs_p_a_filtered = [[] for par in range(len(participants))] PAi_p_a_filtered = [[] for par in range(len(participants))] #stats on the number of fascicles detected nb_fasc_tot_s = 0 nb_fasc_in_s = 0 nb_fasc_filt_s = 0 nb_images_s = 0 nb_fasc_tot_p = 0 nb_fasc_in_p = 0 nb_fasc_filt_p = 0 nb_images_p = 0 '************************************************************************' '*****************************DATA RETRIEVAL*****************************' dictio = load_obj(name_dict, path_to_dict) l2 = ['fasc*', 'fsc_*'] for par in range(len(participants)): participant = participants[par] fam_folders = [str(d) for d in dictio[participant].keys()] s_manuFasc = [] s_autoFasc = [] p_manuFasc = [] p_autoFasc = [] for fam in fam_folders: ################################################################### # simple images dictioS = dictio[participant][fam]['BF']['simple'] images = [str(im) for im in dictioS.keys()] for i in images: # if par == 9 and fam =='fam_2' and i=='img_2': # print(par, fam, i) # else: nb_images_s = nb_images_s + 1 ############################################################### # SIMPLE - manual dictioM = dictioS[i]['architecture manual'] fascicles = [ str(fa) for fa in dictioM if any( fnmatch.fnmatch(fa, p) for p in l2) ] for f in fascicles: dictioF = dictioM[f] idf = fam + '/' + i + '/' + f if len(dictioF.keys()) > 1: s_manuFasc.append( (idf, dictioF['dist from (0,0) of RGB image, in mm'])) d_s_m[par].append( dictioF['dist from (0,0) of RGB image, in mm']) ############################################################### # SIMPLE - automatic if ('architecture auto' in dictioS[i]): dictioA = dictioS[i]['architecture auto'] midRow = np.mean(dictioA['crop']['lines']) midCol = np.mean(dictioA['crop']['columns']) if dictioA and ('MT' in dictioA): fascicles = [ fa for fa in dictioA if any( fnmatch.fnmatch(fa, p) for p in l2) ] nb_fasc_tot_s = nb_fasc_tot_s + len(fascicles) for f in fascicles: dictioF = dictioA[f] idf = fam + '/' + i + '/' + f if len(dictioF.keys()) > 1: #keep the fascicles that are in the lower half of the image, #to compare with manual data - often taken in that region PAi = dictioF['PAinf']['intersection with apo'] PAs = dictioF['PAsup']['intersection with apo'] fasc_row = (PAs[0] - PAi[0]) / ( PAs[1] - PAi[1]) * (midCol - PAs[1]) + PAs[0] if fasc_row <= midRow: s_autoFasc.append((idf, dictioF[ 'dist from (0,0) of RGB image, in mm'] )) d_s_a[par].append(dictioF[ 'dist from (0,0) of RGB image, in mm']) nb_fasc_in_s = nb_fasc_in_s + 1 if ('MT for labelled points' in dictioM['MT']): for ind0 in range( len(dictioM['MT'] ['MT for labelled points'])): elem = dictioM['MT']['MT for labelled points'][ ind0] if elem != 'error': mt_s_m[par].append(elem) #MT in mm for ind0 in range( len(dictioA['MT'] ['MT for labelled points'])): elem = dictioA['MT']['MT for labelled points'][ ind0] if elem != 'error': mt_s_a[par].append(elem) ################################################################### # panoramic images dictioP = dictio[participant][fam]['BF']['panoramic'] images = [str(im) for im in dictioP.keys()] for i in images: nb_images_p = nb_images_p + 1 ############################################################### # PANORAMIC - manual dictioM = dictioP[i]['architecture manual'] fascicles = [ fa for fa in dictioM if any( fnmatch.fnmatch(fa, p) for p in l2) ] for f in fascicles: dictioF = dictioM[f] idf = fam + '/' + i + '/' + f if len(dictioF.keys()) > 1: p_manuFasc.append( (idf, dictioF['dist from insertion in mm'])) d_p_m[par].append(dictioF['dist from insertion in mm']) ############################################################### # PANORAMIC - automatic if ('architecture auto' in dictioP[i]): dictioA = dictioP[i]['architecture auto'] if dictioA and ('MT' in dictioA): fascicles = [ fa for fa in dictioA if any( fnmatch.fnmatch(fa, p) for p in l2) ] nb_fasc_tot_p = nb_fasc_tot_p + len(fascicles) for f in fascicles: dictioF = dictioA[f] idf = fam + '/' + i + '/' + f #only keep fascicles that are entirely within the cropped image, #to compare with manually identified fascicles if len(dictioF.keys()) > 1 and dictioF['FL'][ 'in/out of the image'] == 'in image': nb_fasc_in_p = nb_fasc_in_p + 1 p_autoFasc.append( (idf, dictioF['dist from insertion in mm'])) d_p_a[par].append( dictioF['dist from insertion in mm']) if ('MT for labelled points' in dictioM['MT']): for ind0 in range( len(dictioM['MT'] ['MT for labelled points'])): elem = dictioM['MT']['MT for labelled points'][ ind0] if elem != 'error': mt_p_m[par].append(elem) #MT in mm for ind0 in range( len(dictioA['MT'] ['MT for labelled points'])): elem = dictioA['MT']['MT for labelled points'][ ind0] if elem != 'error': mt_p_a[par].append(elem) '************************************************************************' '********************MATCHING AUTO & MANUAL FASCICLES*******************' listePair_manuF_s = [] for n in range(len(s_manuFasc)): mf = s_manuFasc[n] subtr = [(tup, abs(tup[1] - mf[1])) for tup in s_autoFasc] subtr.sort(key=lambda x: x[1]) closest = subtr[0] listePair_manuF_s.append( (mf[0], closest[0][0], closest[1]) ) #tuple = ( ID manu fasc, ID auto fasc, distance entre les deux) listePair_manuF_s.sort(key=lambda x: x[1]) uniqueMatching = [] counterL = 0 while counterL < len(listePair_manuF_s): currentAutoFasc = listePair_manuF_s[counterL][1] correspondingAutoFasc = [(listePair_manuF_s[counterL][0], listePair_manuF_s[counterL][2])] rank = counterL + 1 while rank < len(listePair_manuF_s) and listePair_manuF_s[rank][ 1] == currentAutoFasc: correspondingAutoFasc.append( (listePair_manuF_s[rank][0], listePair_manuF_s[rank][2])) rank = rank + 1 correspondingAutoFasc.sort(key=lambda x: x[1]) uniqueMatching.append( (correspondingAutoFasc[0][0], currentAutoFasc, correspondingAutoFasc[0][1])) counterL = rank for element in uniqueMatching: pathA = element[1].split('/') pathM = element[0].split('/') nb_fasc_filt_s = nb_fasc_filt_s + 1 d_s_m_filtered[par].append(dictio[participant][ pathM[0]]['BF']['simple'][pathM[1]]['architecture manual'][ pathM[2]]['dist from (0,0) of RGB image, in mm']) fl_s_m_filtered[par].append( dictio[participant][pathM[0]]['BF']['simple'][pathM[1]] ['architecture manual'][pathM[2]]['FL']['length in mm']) PAs_s_m_filtered[par].append( dictio[participant][pathM[0]]['BF']['simple'][pathM[1]] ['architecture manual'][pathM[2]]['PAsup']['value in degree']) PAi_s_m_filtered[par].append( dictio[participant][pathM[0]]['BF']['simple'][pathM[1]] ['architecture manual'][pathM[2]]['PAinf']['value in degree']) d_s_a_filtered[par].append(dictio[participant][ pathA[0]]['BF']['simple'][pathA[1]]['architecture auto'][ pathA[2]]['dist from (0,0) of RGB image, in mm']) fl_s_a_filtered[par].append( dictio[participant][pathA[0]]['BF']['simple'][pathA[1]] ['architecture auto'][pathA[2]]['FL']['length in mm']) PAs_s_a_filtered[par].append( dictio[participant][pathA[0]]['BF']['simple'][pathA[1]] ['architecture auto'][pathA[2]]['PAsup']['value in degree']) PAi_s_a_filtered[par].append( dictio[participant][pathA[0]]['BF']['simple'][pathA[1]] ['architecture auto'][pathA[2]]['PAinf']['value in degree']) listePair_manuF_p = [] for n in range(len(p_manuFasc)): mf = p_manuFasc[n] subtr = [(tup, abs(tup[1] - mf[1])) for tup in p_autoFasc] subtr.sort(key=lambda x: x[1]) closest = subtr[0] listePair_manuF_p.append( (mf[0], closest[0][0], closest[1]) ) #tuple = ( ID manu fasc, ID auto fasc, distance entre les deux) listePair_manuF_p.sort(key=lambda x: x[1]) uniqueMatching = [] counterL = 0 while counterL < len(listePair_manuF_p): currentAutoFasc = listePair_manuF_p[counterL][1] correspondingAutoFasc = [(listePair_manuF_p[counterL][0], listePair_manuF_p[counterL][2])] rank = counterL + 1 while rank < len(listePair_manuF_p) and listePair_manuF_p[rank][ 1] == currentAutoFasc: correspondingAutoFasc.append( (listePair_manuF_p[rank][0], listePair_manuF_p[rank][2])) rank = rank + 1 correspondingAutoFasc.sort(key=lambda x: x[1]) uniqueMatching.append( (correspondingAutoFasc[0][0], currentAutoFasc, correspondingAutoFasc[0][1])) counterL = rank for element in uniqueMatching: pathA = element[1].split('/') pathM = element[0].split('/') nb_fasc_filt_p = nb_fasc_filt_p + 1 d_p_m_filtered[par].append( dictio[participant][pathM[0]]['BF']['panoramic'][pathM[1]] ['architecture manual'][pathM[2]]['dist from insertion in mm']) fl_p_m_filtered[par].append( dictio[participant][pathM[0]]['BF']['panoramic'][pathM[1]] ['architecture manual'][pathM[2]]['FL']['length in mm']) PAs_p_m_filtered[par].append( dictio[participant][pathM[0]]['BF']['panoramic'][pathM[1]] ['architecture manual'][pathM[2]]['PAsup']['value in degree']) PAi_p_m_filtered[par].append( dictio[participant][pathM[0]]['BF']['panoramic'][pathM[1]] ['architecture manual'][pathM[2]]['PAinf']['value in degree']) d_p_a_filtered[par].append( dictio[participant][pathA[0]]['BF']['panoramic'][pathA[1]] ['architecture auto'][pathA[2]]['dist from insertion in mm']) fl_p_a_filtered[par].append( dictio[participant][pathA[0]]['BF']['panoramic'][pathA[1]] ['architecture auto'][pathA[2]]['FL']['length in mm']) PAs_p_a_filtered[par].append( dictio[participant][pathA[0]]['BF']['panoramic'][pathA[1]] ['architecture auto'][pathA[2]]['PAsup']['value in degree']) PAi_p_a_filtered[par].append( dictio[participant][pathA[0]]['BF']['panoramic'][pathA[1]] ['architecture auto'][pathA[2]]['PAinf']['value in degree']) #t_tests print('paired samples t-tests resuts: ') from scipy.stats.mstats import ttest_rel #NOTES: we cannot user '..._filtered' arrays directly because of their structure #we need to flatten them to 1-D lists t, p = ttest_rel( [item for sublist in PAs_s_m_filtered for item in sublist], [item for sublist in PAs_s_a_filtered for item in sublist], axis=None) print('PAS s', p) t2, p2 = ttest_rel( [item for sublist in PAs_p_m_filtered for item in sublist], [item for sublist in PAs_p_a_filtered for item in sublist], axis=None) print('PAS p', p2) t3, p3 = ttest_rel( [item for sublist in PAi_s_m_filtered for item in sublist], [item for sublist in PAi_s_a_filtered for item in sublist], axis=None) print('PAI s', p3) t4, p4 = ttest_rel( [item for sublist in PAi_p_m_filtered for item in sublist], [item for sublist in PAi_p_a_filtered for item in sublist], axis=None) print('PAI p', p4) t5, p5 = ttest_rel( [item for sublist in fl_s_m_filtered for item in sublist], [item for sublist in fl_s_a_filtered for item in sublist], axis=None) print('FL s', p5) t6, p6 = ttest_rel( [item for sublist in fl_p_m_filtered for item in sublist], [item for sublist in fl_p_a_filtered for item in sublist], axis=None) print('FL p', p6) t7, p7 = ttest_rel([item for sublist in mt_s_m for item in sublist], [item for sublist in mt_s_a for item in sublist], axis=None) print('mt s', p7) t7_2, p7_2 = ttest_rel([np.mean(sublist) for sublist in mt_s_m], [np.mean(sublist) for sublist in mt_s_a], axis=None) print('mt s for means', p7_2) t8, p8 = ttest_rel([item for sublist in mt_p_m for item in sublist], [item for sublist in mt_p_a for item in sublist], axis=None) print('mt p', p8) t8_2, p8_2 = ttest_rel([np.mean(sublist) for sublist in mt_p_m], [np.mean(sublist) for sublist in mt_p_a], axis=None) print('mt p for means', p8_2) print('independent samples t-tests resuts: ') from scipy.stats.mstats import ttest_rel, ttest_ind #NOTES: we cannot user '..._filtered' arrays directly because of their structure #we need to flatten them to 1-D lists t, p = ttest_ind( [item for sublist in PAs_s_m_filtered for item in sublist], [item for sublist in PAs_s_a_filtered for item in sublist], axis=None) print('PAS s', p) t2, p2 = ttest_ind( [item for sublist in PAs_p_m_filtered for item in sublist], [item for sublist in PAs_p_a_filtered for item in sublist], axis=None) print('PAS p', p2) t3, p3 = ttest_ind( [item for sublist in PAi_s_m_filtered for item in sublist], [item for sublist in PAi_s_a_filtered for item in sublist], axis=None) print('PAI s', p3) t4, p4 = ttest_ind( [item for sublist in PAi_p_m_filtered for item in sublist], [item for sublist in PAi_p_a_filtered for item in sublist], axis=None) print('PAI p', p4) t5, p5 = ttest_ind( [item for sublist in fl_s_m_filtered for item in sublist], [item for sublist in fl_s_a_filtered for item in sublist], axis=None) print('FL s', p5) t6, p6 = ttest_ind( [item for sublist in fl_p_m_filtered for item in sublist], [item for sublist in fl_p_a_filtered for item in sublist], axis=None) print('FL p', p6) t7, p7 = ttest_ind([item for sublist in mt_s_m for item in sublist], [item for sublist in mt_s_a for item in sublist], axis=None) print('mt s', p7) t7_2, p7_2 = ttest_ind([np.mean(sublist) for sublist in mt_s_m], [np.mean(sublist) for sublist in mt_s_a], axis=None) print('mt s for means', p7_2) t8, p8 = ttest_ind([item for sublist in mt_p_m for item in sublist], [item for sublist in mt_p_a for item in sublist], axis=None) print('mt p', p8) t8_2, p8_2 = ttest_ind([np.mean(sublist) for sublist in mt_p_m], [np.mean(sublist) for sublist in mt_p_a], axis=None) print('mt p for means', p8_2) #size effects s1 = sizeEffect(PAs_s_m_filtered, PAs_s_a_filtered) s2 = sizeEffect(PAs_p_m_filtered, PAs_p_a_filtered) s3 = sizeEffect(PAi_s_m_filtered, PAi_s_a_filtered) s4 = sizeEffect(PAi_p_m_filtered, PAi_p_a_filtered) s5 = sizeEffect(fl_s_m_filtered, fl_s_a_filtered) s6 = sizeEffect(fl_p_m_filtered, fl_p_a_filtered) s7 = sizeEffect(mt_s_m, mt_s_a) s8 = sizeEffect(mt_p_m, mt_p_a) print('Size effects: ') print('PAS s', s1) print('PAS p', s2) print('PAi s', s3) print('PAi p', s4) print('fl s', s5) print('fl p', s6) print('mt s', s7) print('mt p', s8) mt_s_a_filt = [[] for par in range(len(participants))] mt_s_m_filt = [[] for par in range(len(participants))] for p in range(len(mt_s_a)): for val in range(len(mt_s_a[p])): if p == 9: if mt_s_a[p][val] > mt_s_m[p][val] + 2.37 or mt_s_a[p][ val] < mt_s_m[p][val] - 2.08: print('aberrante valeur: participant ', p, ' , place val ', val) else: mt_s_a_filt[p].append(mt_s_a[p][val]) mt_s_m_filt[p].append(mt_s_m[p][val]) else: mt_s_a_filt[p].append(mt_s_a[p][val]) mt_s_m_filt[p].append(mt_s_m[p][val]) print('apres avoir enleve les valeurs out of LoA: ') t7, p7 = ttest_rel([item for sublist in mt_s_m_filt for item in sublist], [item for sublist in mt_s_a_filt for item in sublist], axis=None) print('mt s', p7)
# 计算AUC、f1-score和准确率 from sklearn.metrics import roc_auc_score auc_log = roc_auc_score(y, log_y_scores) auc_svm = roc_auc_score(y, svm_y_scores) auc_knn = roc_auc_score(y, knn_y_scores) # 计算准确率 from sklearn.metrics import accuracy_score, f1_score from sklearn.model_selection import cross_val_predict for clf in [knn_clf, svm_clf, log_clf]: y_pred = cross_val_predict(clf, X, y, cv=5) accuracy = accuracy_score(y, y_pred) f1 = f1_score(y, y_pred) print(accuracy, f1) auc_knn auc_svm auc_log # %% before_test = pd.concat([y_tr, X_select], axis=1) from scipy.stats.mstats import ttest_ind for col in list(before_test.columns): if col == '产后血糖异常(有=1,无=0)': continue n1 = before_test[col][before_test['产后血糖异常(有=1,无=0)'].isin([0])] n2 = before_test[col][before_test['产后血糖异常(有=1,无=0)'].isin([1])] stat, p = ttest_ind(n1, n2) print(col, p) before_test['产后血糖异常(有=1,无=0)'].value_counts()