Exemplo n.º 1
0
    def test_result_attributes(self):
        x = np.array([
            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
            1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1.,
            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1.,
            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1.,
            2., 1., 1., 2., 1., 1., 2., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2.,
            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
            1., 1., 2., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1.,
            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 3., 1., 1.,
            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
            1., 1., 1., 1., 1., 1.
        ])

        y = np.array([
            1., 1., 1., 1., 1., 1., 1., 2., 1., 2., 1., 1., 1., 1., 2., 1., 1.,
            1., 2., 1., 1., 1., 1., 1., 2., 1., 1., 3., 1., 1., 1., 1., 1., 1.,
            1., 1., 1., 1., 2., 1., 2., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1.,
            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1.,
            1., 2., 2., 1., 1., 2., 1., 1., 2., 1., 2., 1., 1., 1., 1., 2., 2.,
            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 1.,
            1., 1., 1., 2., 2., 2., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
            1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 2., 1., 1., 1., 1., 2., 1.,
            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 2., 1.,
            1., 1., 1., 1., 1.
        ])

        res = mstats.mannwhitneyu(x, y)
        attributes = ('statistic', 'pvalue')
        check_named_results(res, attributes, ma=True)
Exemplo n.º 2
0
    def check_significance(gt_dist, seg_dist):

        normal_gt = normaltest(gt_dist)[1]
        normal_seg = normaltest(seg_dist)[1]

        # if both distributions are parametric use t-test, else use mann-whitney-u
        if normal_gt > 0.05 and normal_seg > 0.05:

            pvalue = ttest_ind(gt_dist, seg_dist)[1]

        else:
            pvalue = mannwhitneyu(gt_dist, seg_dist)[1]


        if pvalue > 0.05:
            return 0

        if 0.01 < pvalue < 0.05:
            return 1

        if 0.001 < pvalue < 0.01:
            return 2

        if pvalue < 0.001:
            return 3

        return pvalue
def geneStats(scoreColumn, negArray):
    scoreArray = np.ma.array(data=scoreColumn.dropna(), mask=False)
    ksPval = ms.ks_twosamp(scoreArray, negArray)[1]
    
    ksHi = ms.ks_twosamp(scoreArray, negArray, alternative = 'less')[1]
    ksLo = ms.ks_twosamp(scoreArray, negArray, alternative = 'greater')[1]
    if ksHi < ksLo:
        ksSign = 'P'
    else:
        ksSign = 'S'

    mwPval = ms.mannwhitneyu(scoreArray, negArray)[1]

    return ksPval, ksSign, mwPval
Exemplo n.º 4
0
    def test_result_attributes(self):
        x = np.array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
                      1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1.,
                      1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
                      1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
                      1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1.,
                      1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
                      1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2.,
                      1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
                      1., 1., 2., 1., 1., 1., 1., 2., 1., 1., 2., 1., 1., 2.,
                      1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
                      1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1.,
                      1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
                      1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
                      1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1.,
                      1., 1., 1., 2., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
                      1., 1., 1., 1., 1., 1., 1., 1., 3., 1., 1., 1., 1., 1.,
                      1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
                      1., 1., 1., 1., 1., 1.])

        y = np.array([1., 1., 1., 1., 1., 1., 1., 2., 1., 2., 1., 1., 1., 1.,
                      2., 1., 1., 1., 2., 1., 1., 1., 1., 1., 2., 1., 1., 3.,
                      1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 2., 1.,
                      1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1., 1., 1.,
                      1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 2.,
                      2., 1., 1., 2., 1., 1., 2., 1., 2., 1., 1., 1., 1., 2.,
                      2., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
                      1., 2., 1., 1., 1., 1., 1., 2., 2., 2., 1., 1., 1., 1.,
                      1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
                      2., 1., 1., 2., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1.,
                      1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 2., 1., 1.,
                      1., 1., 1., 1.])

        res = mstats.mannwhitneyu(x, y)
        attributes = ('statistic', 'pvalue')
        check_named_results(res, attributes, ma=True)
Exemplo n.º 5
0
            # Determine the settings from the filename
            problem, dup, ordering, nodes, mut, seed = base.split('_')
            with open_file_method(filename)(filename, 'r') as f:
                data = json.load(f)
            version = dup, ordering, nodes, mut
            if (dup, ordering) == ('skip', 'normal'):
                control_group = version
            statify[version].append(data[1]['evals'])
            active[version].append(data[1]['phenotype'])
            best = data[1]['bests'][-1]
            test = data[1]['test_inputs']
            individual = Individual.reconstruct_individual(best, test)
            simplified = individual.new(Individual.simplify)
            reduced[version].append(len(simplified.active))
            filecount += 1
        except ValueError:
            print(filename, "FAILED")

    # Kruskal's requires a rectangular matrix
    rect = make_rectangular(list(statify.values()), 10000001)

    print('Files Successfully Loaded', filecount)
    print('Kruskal Wallis', kruskalwallis(rect))
    for version, data in statify.items():
        print('--------- %s ---------' % str(version))
        print("MES, MAD", median_deviation(data))
        print('Active', median_deviation(active[version]))
        print('Reduced', median_deviation(reduced[version]))
        print('Mann Whitney U against Control', end=' ')
        print(mannwhitneyu(statify[control_group], data))
Exemplo n.º 6
0
from scipy.stats.mstats import mannwhitneyu
st, pvalue = kruskal([x[1] for x in customers_group1],
                     [x[1] for x in customers_group2
                      ])  # x[1] contains the expenditure of each product
if pvalue < 0.05:
    print(
        "The null hypothesis: \n\t'The average expenditure for each group is the same'\nis False"
    )
    # Mann-Whitney for each pair of groups. Eg.: milk_group1 - milk_group2, delicatessen_group1 - delicatessen_group2
    for product in range(2, 8):
        product_group1 = []
        product_group2 = []
        # Get the expenditures per type of product
        for value in customers_group1:
            product_index = data[value[0]].index(value[1])
            if product_index == product:
                product_group1.append(value[1])
        for value in customers_group2:
            product_index = data[value[0]].index(value[1])
            if product_index == product:
                product_group2.append(value[1])

        st, pvalue = mannwhitneyu(product_group1, product_group2)
        if pvalue < 0.05:
            print(
                "The null hypothesis: \n\t'The average income in the {} products is similar in groups 1 and 2'\nis False"
                .format(real_labels[product]))
        else:
            print(
                "The null hypothesis: \n\t'The average income in the {} products is similar in groups 1 and 2'\nis True"
                .format(real_labels[product]))
Exemplo n.º 7
0
def starplot(df=[],
             x='',
             y='',
             data=[],
             index=[],
             columns=[],
             fold=False,
             foldcol=0,
             mode=3,
             errorbar=True,
             plottype='barplot',
             stats='independent t test',
             test_var=False,
             stats_var='f test',
             crit_var=0.05,
             equal_var=True,
             rotate=0,
             elinewidth=0.5,
             fontsize=14,
             capsize=4,
             noffset_ylim=35,
             noffset_fst=10,
             noffset_diff=10,
             star_size=3,
             linewidth=1,
             crit=[0.05, 0.01, 0.001, 0.0001]):
    # data: list of data matrixs(or DataFrames) for comparison (row: obs, columns: var)
    # index: var, columns: obs
    # adjacent: annotate star for adjacent bar
    # control: annotate star between all other bars to selctive control bar
    # mix: mix mode
    # 3: annotate star for all combination of bar (only 3 bars available)

    crit = np.array(crit)
    plt.rcParams['font.family'] = 'Times New Roman'
    fig, ax = plt.subplots()
    star = ['*', '**', '***', '****']
    n = len(data)
    m = data[0].shape[1]
    test = pd.DataFrame()
    for i, j in enumerate(data):
        if type(test) == type(j):
            data[i] = j.values.reshape(len(j.index), len(j.columns))
    if plottype == 'barplot':
        error = pd.DataFrame()
        mean = pd.DataFrame()
        for i in range(m):
            error[i] = [data[j][:, i].std() for j in range(n)]
            mean[i] = [data[j][:, i].mean() for j in range(n)]
        error = error.transpose()
        mean = mean.transpose()
        if len(index) != 0:
            error.index = index
            mean.index = index
        if len(columns) != 0:
            error.columns = columns
            mean.columns = columns
        if fold == True:
            oldmean = mean.copy()
            olderror = error.copy()
            for i in range(len(mean.columns)):
                mean.iloc[:, i] = oldmean.iloc[:, i] / oldmean.iloc[:, foldcol]
                error.iloc[:,
                           i] = olderror.iloc[:, i] / oldmean.iloc[:, foldcol]
        if errorbar == True:
            plot = plot = mean.plot.bar(yerr=error,
                                        ax=ax,
                                        rot=rotate,
                                        capsize=capsize,
                                        error_kw=dict(elinewidth=elinewidth),
                                        fontsize=fontsize)
            max_bar = [[mean.iloc[j, i] + error.iloc[j, i] for i in range(n)]
                       for j in range(m)]
            min_bar = [
                mean.iloc[j, i] - error.iloc[j, i] for i in range(n)
                for j in range(m)
            ]
        else:
            plot = plot = mean.plot.bar(ax=ax,
                                        rot=rotate,
                                        capsize=capsize,
                                        error_kw=dict(elinewidth=elinewidth),
                                        fontsize=fontsize)
            max_bar = [[mean.iloc[j, i] for i in range(n)] for j in range(m)]
            min_bar = [mean.iloc[j, i] for i in range(n) for j in range(m)]
    elif plottype == 'boxplot':
        print("under buiding")
    ylim = 0
    offset = max([max_bar[i][j] for i in range(m) for j in range(n)]) / 100
    blank = []
    if mode == 3:
        for j in range(m):
            level = np.zeros(n)
            for i in range(n):
                if i < n - 1:
                    k = i + 1
                else:
                    k = 0
                if test_var == True:
                    if stats_var == 'f test':
                        f = 0.5 - abs(0.5 - ftest.sf(
                            data[i][:, j].var() / data[k][:, j].var(),
                            len(data[i][:, j]) - 1,
                            len(data[k][:, j]) - 1))
                        if crit_var / 2 > f:
                            equal_var = False
                        else:
                            equal_var = True
                    else:
                        if stats_var == 'bartlett':
                            f = bartlett(data[i][:, j], data[k][:, j])[1]
                        elif stats_var == 'levene':
                            f = bartlett(data[i][:, j], data[k][:, j])[1]
                        elif stats_var == 'fligner':
                            f = fligner(data[i][:, j], data[k][:, j])[1]
                        if crit_var > f:
                            equal_var = False
                        else:
                            equal_var = True
                if stats == 'independent t test':
                    p = ttest_ind(data[i][:, j],
                                  data[k][:, j],
                                  equal_var=equal_var)[1]
                elif stats == 'paired t test':
                    if equal_var == True:
                        p = ttest_rel(data[i][:, j], data[k][:, j])[1]
                    else:
                        p = 0
                elif stats == 'median test':
                    p = median_test(data[i][:, j], data[k][:, j])[1]
                elif stats == 'mannwhitneyu':
                    if equal_var == True:
                        p = mannwhitneyu(data[i][:, j], data[k][:, j])[1]
                    else:
                        p = 0
                elif stats == 'wilcoxon':
                    if equal_var == True:
                        p = wilcoxon(data[i][:, j], data[k][:, j])[1]
                    else:
                        p = 0
                level[i] = len(crit) - len(crit.compress(p > crit))
            for k in range(n):
                height = 0
                if level[k] != 0 and k != n - 1:
                    center = [
                        plot.patches[k * m + j].get_x(),
                        plot.patches[k * m + m + j].get_x()
                    ]
                    height = max([max_bar[j][k], max_bar[j][k + 1]])
                    h1 = max_bar[j][k]
                    h2 = max_bar[j][k + 1]
                    width = plot.patches[k * m + j].get_width()
                    blank.append(
                        (center[0] + width / 2,
                         height + noffset_fst * offset + (-1)**k * 2 * offset))
                    blank.append(
                        (center[1] + width / 2,
                         height + noffset_fst * offset + (-1)**k * 2 * offset))
                    ax.vlines(x=center[0] + width / 2,
                              ymin=h1 + offset * 2,
                              ymax=height + noffset_fst * offset +
                              (-1)**k * 2 * offset,
                              lw=linewidth)
                    ax.vlines(x=center[1] + width / 2,
                              ymin=h2 + offset * 2,
                              ymax=height + noffset_fst * offset +
                              (-1)**k * 2 * offset,
                              lw=linewidth)
                    ax.annotate(star[int(level[k] - 1)],
                                xy=((center[0] + center[1]) / 2 + width / 2,
                                    height + (noffset_fst + 1) * offset +
                                    (-1)**k * 2 * offset),
                                ha='center',
                                size=star_size)
                elif level[k] != 0 and k == n - 1:
                    center = [
                        plot.patches[j].get_x(),
                        plot.patches[k * m + j].get_x()
                    ]
                    height = max(max_bar[j])
                    h1 = max_bar[j][0]
                    h2 = max_bar[j][k]
                    blank.append(
                        (center[0] + width / 2,
                         height + (noffset_fst + noffset_diff) * offset))
                    blank.append((center[1] + width / 2, height + 20 * offset))
                    ax.vlines(x=center[0] + width / 2,
                              ymin=h1 + offset * 2,
                              ymax=height +
                              (noffset_fst + noffset_diff) * offset,
                              lw=linewidth)
                    ax.vlines(x=center[1] + width / 2,
                              ymin=h2 + offset * 2,
                              ymax=height +
                              (noffset_fst + noffset_diff) * offset,
                              lw=linewidth)
                    ax.annotate(star[int(level[k] - 1)],
                                xy=((center[0] + center[1]) / 2 + width / 2,
                                    height +
                                    (noffset_fst + noffset_diff + 1) * offset),
                                ha='center',
                                size=star_size)
                if height > ylim:
                    ylim = height
    if mode == 'adjacent':
        for j in range(m):
            level = np.zeros(n - 1)
            for i in range(n - 1):
                k = i + 1
                if test_var == True:
                    if stats_var == 'f test':
                        f = 0.5 - abs(0.5 - ftest.sf(
                            data[i][:, j].var() / data[k][:, j].var(),
                            len(data[i][:, j]) - 1,
                            len(data[k][:, j]) - 1))
                        if crit_var / 2 > f:
                            equal_var = False
                        else:
                            equal_var = True
                    else:
                        if stats_var == 'bartlett':
                            f = bartlett(data[i][:, j], data[k][:, j])[1]
                        elif stats_var == 'levene':
                            f = bartlett(data[i][:, j], data[k][:, j])[1]
                        elif stats_var == 'fligner':
                            f = fligner(data[i][:, j], data[k][:, j])[1]
                        if crit_var > f:
                            equal_var = False
                        else:
                            equal_var = True
                if stats == 'independent t test':
                    p = ttest_ind(data[i][:, j],
                                  data[k][:, j],
                                  equal_var=equal_var)[1]
                elif stats == 'paired t test':
                    if equal_var == True:
                        p = ttest_rel(data[i][:, j], data[k][:, j])[1]
                    else:
                        p = 0
                elif stats == 'median test':
                    p = median_test(data[i][:, j], data[k][:, j])[1]
                elif stats == 'mannwhitneyu':
                    if equal_var == True:
                        p = mannwhitneyu(data[i][:, j], data[k][:, j])[1]
                    else:
                        p = 0
                elif stats == 'wilcoxon':
                    if equal_var == True:
                        p = wilcoxon(data[i][:, j], data[k][:, j])[1]
                    else:
                        p = 0
                level[i] = len(crit) - len(crit.compress(p > crit))
            for k in range(n - 1):
                height = 0
                if level[k] != 0:
                    center = [
                        plot.patches[k * m + j].get_x(),
                        plot.patches[k * m + m + j].get_x()
                    ]
                    height = max([max_bar[j][k], max_bar[j][k + 1]])
                    h1 = max_bar[j][k]
                    h2 = max_bar[j][k + 1]
                    width = plot.patches[k * m + j].get_width()
                    blank.append(
                        (center[0] + width / 2,
                         height + noffset_fst * offset + (-1)**k * 2 * offset))
                    blank.append(
                        (center[1] + width / 2,
                         height + noffset_fst * offset + (-1)**k * 2 * offset))
                    ax.vlines(x=center[0] + width / 2,
                              ymin=h1 + offset * 2,
                              ymax=height + noffset_fst * offset +
                              (-1)**k * 2 * offset,
                              lw=linewidth)
                    ax.vlines(x=center[1] + width / 2,
                              ymin=h2 + offset * 2,
                              ymax=height + noffset_fst * offset +
                              (-1)**k * 2 * offset,
                              lw=linewidth)
                    ax.annotate(star[int(level[k] - 1)],
                                xy=((center[0] + center[1]) / 2 + width / 2,
                                    height + (noffset_fst + 1) * offset +
                                    (-1)**k * 2 * offset),
                                ha='center',
                                size=star_size)
                if height > ylim:
                    ylim = height
    ax.set_ylim(min(0,
                    min(min_bar) - 10 * offset), ylim + noffset_ylim * offset)
    for j, i in enumerate(blank):
        ax.vlines(x=i[0],
                  ymin=i[1],
                  ymax=i[1] + offset * 2,
                  color='white',
                  lw=1.2 * linewidth)
        if j % 2 == 1:
            ax.hlines(y=i[1], xmin=blank[j - 1], xmax=blank[j], lw=linewidth)
Exemplo n.º 8
0
print("\n")

#"""
print(normaltest(all_data["Gaussian"])[1])
print(normaltest(all_data["Hessian"])[1])
print(normaltest(all_data["Laplacian"])[1])
print(normaltest(all_data["Ilastik"])[1])
print(normaltest(all_data["MitoSegNet"])[1])
print(normaltest(all_data["Finetuned\nFiji U-Net"])[1])
#print(normaltest(all_data["Fiji U-Net"])[1])
#"""

print("\n")

print(mannwhitneyu(all_data["Gaussian"], all_data["MitoSegNet"])[1])
print(mannwhitneyu(all_data["Hessian"], all_data["MitoSegNet"])[1])
print(mannwhitneyu(all_data["Laplacian"], all_data["MitoSegNet"])[1])
print(mannwhitneyu(all_data["Ilastik"], all_data["MitoSegNet"])[1])
print(
    mannwhitneyu(all_data["Finetuned\nFiji U-Net"], all_data["MitoSegNet"])[1])
#print(mannwhitneyu(all_data["Fiji U-Net"], all_data["MitoSegNet"])[1])

p_g = mannwhitneyu(all_data["Gaussian"], all_data["MitoSegNet"])[1]
p_h = mannwhitneyu(all_data["Hessian"], all_data["MitoSegNet"])[1]
p_l = mannwhitneyu(all_data["Laplacian"], all_data["MitoSegNet"])[1]
p_i = mannwhitneyu(all_data["Ilastik"], all_data["MitoSegNet"])[1]
p_f = mannwhitneyu(all_data["Finetuned\nFiji U-Net"],
                   all_data["MitoSegNet"])[1]

Exemplo n.º 9
0
def u_stat_all_label(data, ts=None, labels=None, mask=None):
    """
    Calculate the U-statistic for all label pairings in a HCTSA_loc.mat file. The operations can be masked 
    by a boolean array.
    Parameters:
    -----------
    ts : dict, optional (either labels or ts has to be given)
        'TimeSeries' dictionary from HCTSA_loc.mat file.
    labels : ndarray, optional (either labels or ts has to be given)
        An array containing the label for each timeseries (row) in data.
    data : ndarray
        data array where each row represents a timeseries and each column represents a feature
    mask : ndarray dtype='bool', optional
        If given this acts as mask to which features are included in the calculation
    Returns:
    --------
    ranks : ndarray of dim (n_labels * (n_labels-1) / 2, nr_features)
        The U-statistic for all features and label pairs, where a row represents a pair of labels.
        Every column represents one feature.
    labels_unique : list
        List of all unique labels
    label_ind_list : list 
        List of lists where each sub-list i represents all row-indices in data containing timeseries
        for labels_unique[i]
    """
    # ---------------------------------------------------------------------
    # mask the data if required
    # ---------------------------------------------------------------------   
    if mask != None:
        data = data[:,mask]
    # ---------------------------------------------------------------------
    # extract the unique labels
    # ---------------------------------------------------------------------
    if labels == None:
        # FIXME this is only in to secure compatibility to previous versions
        if ts != None:
            labels = [x.split(',')[0] for x in ts['keywords']]
        else:
            # FIXME some error handling would be good
            exit()

    labels_unique = list(set(labels))
    # FIXME Not sure if this is necessary or why it is there in the first place
    labels = np.array(labels,dtype = np.dtype('S64'))
    label_ind_list = []
    
    # ---------------------------------------------------------------------
    # get indices for all unique labels
    # ---------------------------------------------------------------------
    for i,label in enumerate(labels_unique):
        label_ind_list.append(np.nonzero((labels == label))[0])
    n_labels = len(label_ind_list)
    
    # ---------------------------------------------------------------------
    # calculate Mann-Whitney u-test
    # ---------------------------------------------------------------------
    ranks = np.zeros((n_labels * (n_labels-1) / 2,data.shape[1]))
    norm = np.zeros(n_labels * (n_labels-1) / 2)

    for i,(label_ind_0,label_ind_1) in enumerate(itertools.combinations(range(n_labels),2)):
        data_0 = data[label_ind_list[label_ind_0],:]
        data_1 = data[label_ind_list[label_ind_1],:]

        print "calculating label pair {:d} / {:d}".format(i+1,n_labels * (n_labels-1) / 2)
        for k in range(0,data.shape[1]):
            if np.ma.all((data_0[:,k] == data_0[0,k])) and np.ma.all((data_1[:,k] == data_0[0,k] )):
                ranks[i,k] = data_0[:,k].shape[0] * data_1[:,k].shape[0]/2.
            else:
                ranks[i,k] = stats.mannwhitneyu(data_0[:,k], data_1[:,k])[0]
            # -- Calculate the norm factor for every label pair
            norm[i] = data_0.shape[0] *  data_1.shape[0]   
    return ranks, norm, labels_unique, label_ind_list
Exemplo n.º 10
0
def morph_distribution(path, seg_name):


    gt_folder_path = path + os.sep + "Ground_Truth"
    seg_folder_path = path + os.sep + seg_name
    org_folder_path = path + os.sep + "Original"

    #image_list = os.listdir(gt_folder_path)
    image_list = ["170412 MD4049 w10 FL200.tif"]

    columns = ["Image", "Area", "Eccentricity", "Aspect Ratio", "Perimeter", "Solidity", "Number of branches",
               "Branch length", "Total branch length", "Curvature index", "Mean intensity"]

    df = pd.DataFrame(columns=columns)
    df_2 = pd.DataFrame(columns=columns)
    df_c = pd.DataFrame(columns=columns)


    list_area = []
    list_ecc = []
    list_ar = []
    list_per = []
    list_sol = []

    list_nb = []
    list_tbl = []
    list_bl = []
    list_ci = []

    list_int = []


    clist_area = []
    clist_ecc = []
    clist_ar = []
    clist_per = []
    clist_sol = []

    clist_nb = []
    clist_tbl = []
    clist_bl = []
    clist_ci = []

    clist_int = []


    elist_area = []
    elist_ecc = []
    elist_ar = []
    elist_per = []
    elist_sol = []

    elist_nb = []
    elist_tbl = []
    elist_bl = []
    elist_ci = []

    elist_int = []


    ###################################################

    ###################################################

    for image in image_list:


        print(image)

        gt = cv2.imread(gt_folder_path + os.sep + image, cv2.IMREAD_GRAYSCALE)
        seg = cv2.imread(seg_folder_path + os.sep + image , cv2.IMREAD_GRAYSCALE)
        org = cv2.imread(org_folder_path + os.sep + image, cv2.IMREAD_GRAYSCALE)

        # skeletonize
        ##################################################
        ##################################################

        def get_branch_meas(path):

            """
            05-07-19

            for some reason the sumarise function of skan prints out a different number of objects, which is why
            i currently cannot include the branch data in the same table as the morph parameters
            """

            read_lab_skel = img_as_bool(color.rgb2gray(io.imread(path)))
            lab_skel = skeletonize(read_lab_skel).astype("uint8")

            branch_data = summarise(lab_skel)

            # curv_ind = (branch_data["branch-distance"] - branch_data["euclidean-distance"]) / branch_data["euclidean-distance"]

            curve_ind = []
            for bd, ed in zip(branch_data["branch-distance"], branch_data["euclidean-distance"]):

                if ed != 0.0:
                    curve_ind.append((bd - ed) / ed)
                else:
                    curve_ind.append(bd - ed)

            branch_data["curvature-index"] = curve_ind

            grouped_branch_data_mean = branch_data.groupby(["skeleton-id"], as_index=False).mean()

            grouped_branch_data_sum = branch_data.groupby(["skeleton-id"], as_index=False).sum()

            counter = collections.Counter(branch_data["skeleton-id"])

            n_branches = []
            for i in grouped_branch_data_mean["skeleton-id"]:
                n_branches.append(counter[i])

            branch_len = grouped_branch_data_mean["branch-distance"].tolist()
            tot_branch_len = grouped_branch_data_sum["branch-distance"].tolist()
            curv_ind = grouped_branch_data_mean["curvature-index"].tolist()

            return n_branches, branch_len, tot_branch_len, curv_ind

        def significance(pvalue):

            if pvalue > 0.05:
                return 0

            if 0.01 < pvalue < 0.05:
                return 1

            if 0.001 < pvalue < 0.01:
                return 2

            if pvalue < 0.001:
                return 3

            return pvalue

        # pooled standard deviation for calculation of effect size (cohen's d)
        def cohens_d(data1, data2):

            p_std = np.sqrt(
                ((len(data1) - 1) * np.var(data1) + (len(data2) - 1) * np.var(data2)) / (len(data1) + len(data2) - 2))

            cohens_d = np.abs(np.median(data1) - np.median(data2)) / p_std

            return cohens_d


        ##################################################
        ##################################################

        gt_nb, gt_bl, gt_tbl, gt_ci = get_branch_meas(gt_folder_path + os.sep + image)
        seg_nb, seg_bl, seg_tbl, seg_ci = get_branch_meas(seg_folder_path + os.sep + image)


        #######################

        pvalue_nb = mannwhitneyu(gt_nb, seg_nb)[1]
        list_nb.append(significance(pvalue_nb))

        elist_nb.append(energy_distance(gt_nb, seg_nb))
        clist_nb.append(cohens_d(gt_nb, seg_nb))

        pvalue_bl = mannwhitneyu(gt_bl, seg_bl)[1]
        list_bl.append(significance(pvalue_bl))

        elist_bl.append(energy_distance(gt_bl, seg_bl))
        clist_bl.append(cohens_d(gt_bl, seg_bl))

        pvalue_tbl = mannwhitneyu(gt_tbl, seg_tbl)[1]
        list_tbl.append(significance(pvalue_tbl))

        elist_tbl.append(energy_distance(gt_tbl, seg_tbl))
        clist_tbl.append(cohens_d(gt_tbl, seg_tbl))

        pvalue_ci = mannwhitneyu(gt_ci, seg_ci)[1]
        list_ci.append(significance(pvalue_ci))

        elist_ci.append(energy_distance(gt_ci, seg_ci))
        clist_ci.append(cohens_d(gt_ci, seg_ci))

        #######################

        # label image mask
        gt_labelled = label(gt)
        seg_labelled = label(seg)


        # Get region props of labelled images
        gt_reg_props = regionprops(label_image=gt_labelled, intensity_image=org, coordinates='xy')
        seg_reg_props = regionprops(label_image=seg_labelled, intensity_image=org, coordinates='xy')


        # compare shape descriptor distributions
        #################################

        # Intensity

        gt_int = [i.mean_intensity for i in gt_reg_props]
        seg_int = [i.mean_intensity for i in seg_reg_props]

        pvalue_int = mannwhitneyu(gt_int, seg_int)[1]
        list_int.append(significance(pvalue_int))

        elist_int.append(energy_distance(gt_int, seg_int))
        clist_int.append(cohens_d(gt_int, seg_int))

        # Area
        gt_area = [i.area for i in gt_reg_props]
        seg_area = [i.area for i in seg_reg_props]

        pvalue_area = mannwhitneyu(gt_area, seg_area)[1]

        list_area.append(significance(pvalue_area))
        #list_area.append(pvalue_area)
        #list_area2.append(cohens_d(gt_area, seg_area))

        elist_area.append(energy_distance(gt_area, seg_area))
        clist_area.append(cohens_d(gt_area, seg_area))

        # Eccentricity
        gt_ecc = [i.eccentricity for i in gt_reg_props]
        seg_ecc = [i.eccentricity for i in seg_reg_props]

        pvalue_ecc = mannwhitneyu(gt_ecc, seg_ecc)[1]

        list_ecc.append(significance(pvalue_ecc))
        #list_ecc.append(pvalue_ecc)

        #list_ecc2.append(cohens_d(gt_ecc, seg_ecc))

        elist_ecc.append(energy_distance(gt_ecc, seg_ecc))
        clist_ecc.append(cohens_d(gt_ecc, seg_ecc))


        # Aspect ratio

        gt_ar = [i.major_axis_length/i.minor_axis_length for i in gt_reg_props if i.minor_axis_length != 0]
        seg_ar = [i.major_axis_length/i.minor_axis_length for i in seg_reg_props if i.minor_axis_length != 0]

        pvalue_ar = mannwhitneyu(gt_ar, seg_ar)[1]

        list_ar.append(significance(pvalue_ar))
        #list_ar.append(pvalue_ar)

        #list_ar2.append(cohens_d(gt_ar, seg_ar))

        elist_ar.append(energy_distance(gt_ar, seg_ar))
        clist_ar.append(cohens_d(gt_ar, seg_ar))


        # Perimeter
        gt_per = [i.perimeter for i in gt_reg_props]
        seg_per = [i.perimeter for i in seg_reg_props]

        pvalue_per = mannwhitneyu(gt_per, seg_per)[1]

        list_per.append(significance(pvalue_per))
        #list_per.append(pvalue_per)

        #list_per2.append(cohens_d(gt_per, seg_per))

        elist_per.append(energy_distance(gt_per, seg_per))
        clist_per.append(cohens_d(gt_per, seg_per))

        # Solidity
        gt_sol = [i.solidity for i in gt_reg_props]
        seg_sol = [i.solidity for i in seg_reg_props]

        #print(len(gt_sol))

        pvalue_sol = mannwhitneyu(gt_sol, seg_sol)[1]

        list_sol.append(significance(pvalue_sol))
        #list_sol.append(pvalue_sol)

        #list_sol2.append(cohens_d(gt_sol, seg_sol))

        elist_sol.append(energy_distance(gt_sol, seg_sol))
        clist_sol.append(cohens_d(gt_sol, seg_sol))

        #################################


    columns = ["Image", "Area", "Eccentricity", "Aspect Ratio", "Perimeter", "Solidity", "Number of branches",
               "Branch length", "Total branch length", "Curvature index", "Mean Intensity"]


    df["Image"] = image_list
    df["Area"] = list_area
    df["Eccentricity"] = list_ecc
    df["Aspect Ratio"] = list_ar
    df["Perimeter"] = list_per
    df["Solidity"] = list_sol
    df["Number of branches"] = list_nb
    df["Branch length"] = list_bl
    df["Total branch length"] = list_tbl
    df["Curvature index"] = list_ci
    df["Mean intensity"] = list_int

    df_2["Image"] = image_list
    df_2["Area"] = elist_area
    df_2["Eccentricity"] = elist_ecc
    df_2["Aspect Ratio"] = elist_ar
    df_2["Perimeter"] = elist_per
    df_2["Solidity"] = elist_sol
    df_2["Number of branches"] = elist_nb
    df_2["Branch length"] = elist_bl
    df_2["Total branch length"] = elist_tbl
    df_2["Curvature index"] = elist_ci
    df_2["Mean intensity"] = elist_int

    df_2["Image"] = image_list
    df_c["Area"] = clist_area
    df_c["Eccentricity"] = clist_ecc
    df_c["Aspect Ratio"] = clist_ar
    df_c["Perimeter"] = clist_per
    df_c["Solidity"] = clist_sol
    df_c["Number of branches"] = clist_nb
    df_c["Branch length"] = clist_bl
    df_c["Total branch length"] = clist_tbl
    df_c["Curvature index"] = clist_ci
    df_c["Mean intensity"] = clist_int


    #total_values = len(image_list)*5

    zero_p = 0
    one_p = 0
    two_p = 0
    three_p = 0

    for index, row in df.iterrows():

        for column in row:

            if column == 0:
                zero_p+=1

            elif column == 1:
                one_p+=1

            elif column == 2:
                two_p+=1

            elif column == 3:
                three_p+=1


    #print(zero_p/total_values)
    #print(one_p/total_values)
    #print(two_p/total_values)
    #print(three_p/total_values)

    # raw data

    df.to_csv(path + os.sep + seg_name + "_Morph_Dist_comparison.csv")
    df_2.to_csv(path + os.sep + seg_name + "_EnergyDistance.csv")
    df_c.to_csv(path + os.sep + seg_name + "_EffectSize.csv")
Exemplo n.º 11
0
print("\n")

#"""
print(normaltest(h_l)[1])
print(normaltest(la_l)[1])
print(normaltest(ga_l)[1])
print(normaltest(il_l)[1])
print(normaltest(m_l)[1])
print(normaltest(u_l_pt)[1])
#"""
#print(normaltest(u_l)[1])

print("\n")

print(mannwhitneyu(m_l, h_l)[1])
print(mannwhitneyu(m_l, la_l)[1])
print(mannwhitneyu(m_l, ga_l)[1])
print(mannwhitneyu(m_l, il_l)[1])
print(mannwhitneyu(m_l, u_l_pt)[1])

#print(mannwhitneyu(m_l, u_l)[1])
"""
all_data["Gaussian"] = ga_l
all_data["Hessian"] = h_l
all_data["Laplacian"] = la_l
all_data["Ilastik"] = il_l

all_data["MitoSegNet"] = m_l
all_data["Finetuned\nFiji U-Net"] = u_l_pt
"""
Exemplo n.º 12
0
print(normaltest(data["Gaussian"].dropna())[1])
print(normaltest(data["Laplacian"].dropna())[1])
print(normaltest(data["Hessian"].dropna())[1])
print(normaltest(data["Ilastik"].dropna())[1])
print(normaltest(data["MitoSegNet"].dropna())[1])
"""

print(len(data["Gaussian"].dropna()))
print(len(data["Laplacian"].dropna()))
print(len(data["Hessian"].dropna()))
print(len(data["Ilastik"].dropna()))
print(len(data["MitoSegNet"].dropna()))

print("\n")

print(mannwhitneyu(mitonet, hess)[1])
print(mannwhitneyu(mitonet, gauss)[1])
print(mannwhitneyu(mitosegnet, lap)[1])
print(mannwhitneyu(mitosegnet, il)[1])


# pooled standard deviation for calculation of effect size (cohen's d)
def cohens_d(data1, data2):
    p_std = np.sqrt(
        ((len(data1) - 1) * np.var(data1) +
         (len(data2) - 1) * np.var(data2)) / (len(data1) + len(data2) - 2))

    cohens_d = np.abs(np.average(data1) - np.average(data2)) / p_std

    return cohens_d
Exemplo n.º 13
0
def u_stat_all_label_file_name(file_name, mask=None, is_from_old_matlab=False):
    """
    Calculate the U-statistic for all label pairings in a HCTSA_loc.mat file. The operations can be masked
    by a boolean array.
    Parameters:
    -----------
    file_name : string
        File name of the HCTSA_loc.mat file containing as least the matrices 'TimeSeries' and 'TS_DataMat'
    mask : ndarray dtype='bool', optional
        If given this acts as mask to which features are included in the calculation
    is_from_old_matlab : bool
        If the HCTSA_loc.mat files are saved from an older version of the comp engine. The order of entries is different.

    Returns:
    --------
    ranks : ndarray of dim (n_labels * (n_labels-1) / 2, nr_timeseries)
        The U-statistic for all features and label pairs, where a row represents a pair of labels.
        Every column represents one feature
    labels_unique : list
        List of all unique labels
    label_ind_list : list
        List of lists where each sub-list i represents all row-indices in data containing timeseries
        for labels_unique[i]
    """
    # ---------------------------------------------------------------------
    # load the data
    # ---------------------------------------------------------------------
    ts, data = mIO.read_from_mat_file(file_name, ['TimeSeries', 'TS_DataMat'],
                                      is_from_old_matlab=is_from_old_matlab)

    # ---------------------------------------------------------------------
    # mask the data if required
    # ---------------------------------------------------------------------
    if mask != None:
        data = data[:, mask]

    # ---------------------------------------------------------------------
    # extract the unique labels
    # ---------------------------------------------------------------------
    labels = [int(x.split(',')[-1]) for x in ts['keywords']]
    labels_unique = list(set(labels))

    labels = np.array(labels)
    label_ind_list = []

    # ---------------------------------------------------------------------
    # get indices for all unique labels
    # ---------------------------------------------------------------------
    for i, label in enumerate(labels_unique):
        label_ind_list.append(np.nonzero((labels == label))[0])
    n_labels = len(label_ind_list)

    # ---------------------------------------------------------------------
    # calculate Mann-Whitney u-test
    # ---------------------------------------------------------------------
    ranks = np.zeros((n_labels * (n_labels - 1) / 2, data.shape[1]))

    for i, (label_ind_0, label_ind_1) in enumerate(
            itertools.combinations(range(n_labels), 2)):
        # -- select the data for the current labels
        data_0 = data[label_ind_list[label_ind_0], :]
        data_1 = data[label_ind_list[label_ind_1], :]
        print i + 1, '/', n_labels * (n_labels - 1) / 2
        for k in range(0, data.shape[1]):
            # -- in the case of same value for every feature in both arrays set max possible value
            if np.ma.all((data_0[:, k] == data_0[0, k])) and np.ma.all(
                (data_1[:, k] == data_0[0, k])):
                ranks[i,
                      k] = data_0[:, k].shape[0] * data_1[:, k].shape[0] / 2.
            else:
                ranks[i, k] = stats.mannwhitneyu(data_0[:, k], data_1[:, k])[0]

    return ranks, labels_unique, label_ind_list
Exemplo n.º 14
0
def u_stat_all_label(data, ts=None, labels=None, mask=None):
    """
    Calculate the U-statistic for all label pairings in a HCTSA_loc.mat file. The operations can be masked
    by a boolean array.
    Parameters:
    -----------
    ts : dict, optional (either labels or ts has to be given)
        'TimeSeries' dictionary from HCTSA_loc.mat file.
    labels : ndarray, optional (either labels or ts has to be given)
        An array containing the label for each timeseries (row) in data.
    data : ndarray
        data array where each row represents a timeseries and each column represents a feature
    mask : ndarray dtype='bool', optional
        If given this acts as mask to which features are included in the calculation
    Returns:
    --------
    ranks : ndarray of dim (n_labels * (n_labels-1) / 2, nr_features)
        The U-statistic for all features and label pairs, where a row represents a pair of labels.
        Every column represents one feature.
    labels_unique : list
        List of all unique labels
    label_ind_list : list
        List of lists where each sub-list i represents all row-indices in data containing timeseries
        for labels_unique[i]
    """
    # ---------------------------------------------------------------------
    # mask the data if required
    # ---------------------------------------------------------------------
    if mask != None:
        data = data[:, mask]
    # ---------------------------------------------------------------------
    # extract the unique labels
    # ---------------------------------------------------------------------
    if labels == None:
        # FIXME this is only in to secure compatibility to previous versions
        if ts != None:
            labels = [x.split(',')[0] for x in ts['keywords']]
        else:
            # FIXME some error handling would be good
            exit()

    labels_unique = list(set(labels))
    # FIXME Not sure if this is necessary or why it is there in the first place
    labels = np.array(labels, dtype=np.dtype('S64'))
    label_ind_list = []

    # ---------------------------------------------------------------------
    # get indices for all unique labels
    # ---------------------------------------------------------------------
    for i, label in enumerate(labels_unique):
        label_ind_list.append(np.nonzero((labels == label))[0])
    n_labels = len(label_ind_list)

    # ---------------------------------------------------------------------
    # calculate Mann-Whitney u-test
    # ---------------------------------------------------------------------
    ranks = np.zeros((n_labels * (n_labels - 1) / 2, data.shape[1]))
    norm = np.zeros(n_labels * (n_labels - 1) / 2)

    for i, (label_ind_0, label_ind_1) in enumerate(
            itertools.combinations(range(n_labels), 2)):
        data_0 = data[label_ind_list[label_ind_0], :]
        data_1 = data[label_ind_list[label_ind_1], :]

        print "calculating label pair {:d} / {:d}".format(
            i + 1,
            n_labels * (n_labels - 1) / 2)
        for k in range(0, data.shape[1]):
            if np.ma.all((data_0[:, k] == data_0[0, k])) and np.ma.all(
                (data_1[:, k] == data_0[0, k])):
                ranks[i,
                      k] = data_0[:, k].shape[0] * data_1[:, k].shape[0] / 2.
            else:
                ranks[i, k] = stats.mannwhitneyu(data_0[:, k], data_1[:, k])[0]
            # -- Calculate the norm factor for every label pair
            norm[i] = data_0.shape[0] * data_1.shape[0]
    return ranks, norm, labels_unique, label_ind_list
Exemplo n.º 15
0
                             89.69072164948453, 96.907216494845358, 92.783505154639172, 95.876288659793815, 98.969072164948457]\
                        ])


############### Tool's MAX.ACC PER CORPUS #################
#max_accs_array = np.array([ [ 95.26, 95.56, 95.70, 95.18, 95.85, 98.13, 94.88, 94.12 ],\
#                            [ 91.53, 90.84, 89.16, 90.23, 91.03, 91.99, 87.09, 90.81 ],\
#                            [ 91.47, 92.00, 91.73, 92.27, 90.67, 87.43, 88.27, 91.23 ],\
#                            [ 92.00, 93.60, 92.27, 93.33, 92.27, 95.27, 89.33, 92.57 ],\
#                         ])

print spm.kruskalwallis(max_accs_array[0],max_accs_array[1],max_accs_array[2],max_accs_array[3],\
                        max_accs_array[4],max_accs_array[6],max_accs_array[7])
print spm.kruskalwallis(max_accs_array[5],max_accs_array[2])
#print sps.wilcoxon(max_accs_array[1],max_accs_array[4])
print spm.mannwhitneyu(max_accs_array[5],max_accs_array[2]) #, use_continuity=True)
0/0

#print max_accs_array

#import matplotlib.pyplot as plt
#plt.hist(max_accs_array[1], 10) #bins, range, normed, weights, cumulative, bottom, histtype, align, orientation, rwidth, log, hold))
#plt.figure()
#plt.plot([1,2,3,4,5,6,7,8,9,10],max_accs_array[7] ) #, histtype='bar', rwidth=0.8)
#plt.show()

print max_accs_array, "\n"

max_accs_vect = max_accs_array.copy()
#max_accs_vect = max_accs_vect.reshape(1,80)
max_accs_vect = max_accs_vect.reshape(1,32)
Exemplo n.º 16
0
def u_stat_all_label_file_name(file_name,mask = None, is_from_old_matlab=False):
    """
    Calculate the U-statistic for all label pairings in a HCTSA_loc.mat file. The operations can be masked 
    by a boolean array.
    Parameters:
    -----------
    file_name : string
        File name of the HCTSA_loc.mat file containing as least the matrices 'TimeSeries' and 'TS_DataMat'
    mask : ndarray dtype='bool', optional
        If given this acts as mask to which features are included in the calculation
    is_from_old_matlab : bool
        If the HCTSA_loc.mat files are saved from an older version of the comp engine. The order of entries is different.

    Returns:
    --------
    ranks : ndarray of dim (n_labels * (n_labels-1) / 2, nr_timeseries)
        The U-statistic for all features and label pairs, where a row represents a pair of labels.
        Every column represents one feature    
    labels_unique : list
        List of all unique labels
    label_ind_list : list 
        List of lists where each sub-list i represents all row-indices in data containing timeseries
        for labels_unique[i]
    """
    # ---------------------------------------------------------------------
    # load the data
    # ---------------------------------------------------------------------
    ts,data = mIO.read_from_mat_file(file_name,['TimeSeries','TS_DataMat'],is_from_old_matlab = is_from_old_matlab )
    
    # ---------------------------------------------------------------------
    # mask the data if required
    # ---------------------------------------------------------------------   
    if mask != None:
        data = data[:,mask]
    
    # ---------------------------------------------------------------------
    # extract the unique labels
    # ---------------------------------------------------------------------
    labels = [int(x.split(',')[-1]) for x in ts['keywords']]
    labels_unique = list(set(labels))
    
    labels = np.array(labels)
    label_ind_list = []
    
    # ---------------------------------------------------------------------
    # get indices for all unique labels
    # ---------------------------------------------------------------------
    for i,label in enumerate(labels_unique):
        label_ind_list.append(np.nonzero((labels == label))[0])
    n_labels = len(label_ind_list)
    
    # ---------------------------------------------------------------------
    # calculate Mann-Whitney u-test
    # ---------------------------------------------------------------------
    ranks = np.zeros((n_labels * (n_labels-1) / 2,data.shape[1]))
    
    for i,(label_ind_0,label_ind_1) in enumerate(itertools.combinations(range(n_labels),2)):
        # -- select the data for the current labels
        data_0 = data[label_ind_list[label_ind_0],:]
        data_1 = data[label_ind_list[label_ind_1],:]
        print i+1,'/',n_labels * (n_labels-1) / 2
        for k in range(0,data.shape[1]):
            # -- in the case of same value for every feature in both arrays set max possible value
            if np.ma.all((data_0[:,k] == data_0[0,k])) and np.ma.all((data_1[:,k] == data_0[0,k] )):
                ranks[i,k] = data_0[:,k].shape[0] * data_1[:,k].shape[0]/2.
            else:
                ranks[i,k] = stats.mannwhitneyu(data_0[:,k], data_1[:,k])[0]

    return ranks,labels_unique,label_ind_list
            # Determine the settings from the filename
            problem, dup, ordering, nodes, mut, seed = base.split('_')
            with open_file_method(filename)(filename, 'r') as f:
                data = json.load(f)
            version = dup, ordering, nodes, mut
            if (dup, ordering) == ('skip', 'normal'):
                control_group = version
            statify[version].append(data[1]['evals'])
            active[version].append(data[1]['phenotype'])
            best = data[1]['bests'][-1]
            test = data[1]['test_inputs']
            individual = Individual.reconstruct_individual(best, test)
            simplified = individual.new(Individual.simplify)
            reduced[version].append(len(simplified.active))
            filecount += 1
        except ValueError:
            print filename, "FAILED"

    # Kruskal's requires a rectangular matrix
    rect = make_rectangular(statify.values(), 10000001)

    print 'Files Successfully Loaded', filecount
    print 'Kruskal Wallis', kruskalwallis(rect)
    for version, data in statify.iteritems():
        print '--------- %s ---------' % str(version)
        print "MES, MAD", median_deviation(data)
        print 'Active', median_deviation(active[version])
        print 'Reduced', median_deviation(reduced[version])
        print 'Mann Whitney U against Control',
        print mannwhitneyu(statify[control_group], data)
Exemplo n.º 18
0
def morph_distribution(path, seg_name):


    gt_folder_path = path + "Ground_Truth"
    seg_folder_path = path + seg_name

    gt_image_list = os.listdir(gt_folder_path)
    seg_image_list = os.listdir(seg_folder_path)

    columns = ["Image", "Area", "Eccentricity", "Aspect Ratio", "Perimeter", "Solidity"]

    df = pd.DataFrame(columns=columns)
    df_2 = pd.DataFrame(columns=columns)


    list_area = []
    list_ecc = []
    list_ar = []
    list_per = []
    list_sol = []

    list_area2 = []
    list_ecc2 = []
    list_ar2 = []
    list_per2 = []
    list_sol2 = []

    def significance(pvalue):

        #"""
        if pvalue > 0.05:
            return 0

        if 0.01 < pvalue < 0.05:
            return 1

        if 0.001 < pvalue < 0.01:
            return 2

        if pvalue < 0.001:
            return 3
        #"""

        return pvalue

    ###################################################

    ###################################################

    for gt_image, seg_image in zip(gt_image_list, seg_image_list):


        print(gt_image)

        gt = cv2.imread(gt_folder_path + "/" + gt_image, cv2.IMREAD_GRAYSCALE)
        seg = cv2.imread(seg_folder_path + "/" + seg_image, cv2.IMREAD_GRAYSCALE)

        # label image mask
        gt_labelled = label(gt)
        seg_labelled = label(seg)


        # Get region props of labelled images
        gt_reg_props = regionprops(gt_labelled)
        seg_reg_props = regionprops(seg_labelled)


        # compare shape descriptor distributions
        #################################

        # Area
        gt_area = [i.area for i in gt_reg_props]
        seg_area = [i.area for i in seg_reg_props]

        pvalue_area = mannwhitneyu(gt_area, seg_area)[1]

        list_area.append(significance(pvalue_area))
        #list_area.append(pvalue_area)

        list_area2.append(cohens_d(gt_area, seg_area))


        # Eccentricity
        gt_ecc = [i.eccentricity for i in gt_reg_props]
        seg_ecc = [i.eccentricity for i in seg_reg_props]

        pvalue_ecc = mannwhitneyu(gt_ecc, seg_ecc)[1]

        list_ecc.append(significance(pvalue_ecc))
        #list_ecc.append(pvalue_ecc)

        list_ecc2.append(cohens_d(gt_ecc, seg_ecc))

        # Aspect ratio


        gt_ar = [i.major_axis_length/i.minor_axis_length for i in gt_reg_props if i.minor_axis_length != 0]
        seg_ar = [i.major_axis_length/i.minor_axis_length for i in seg_reg_props if i.minor_axis_length != 0]

        pvalue_ar = mannwhitneyu(gt_ar, seg_ar)[1]

        list_ar.append(significance(pvalue_ar))
        #list_ar.append(pvalue_ar)

        list_ar2.append(cohens_d(gt_ar, seg_ar))

        # Perimeter
        gt_per = [i.perimeter for i in gt_reg_props]
        seg_per = [i.perimeter for i in seg_reg_props]

        pvalue_per = mannwhitneyu(gt_per, seg_per)[1]

        list_per.append(significance(pvalue_per))
        #list_per.append(pvalue_per)

        list_per2.append(cohens_d(gt_per, seg_per))

        # Solidity
        gt_sol = [i.solidity for i in gt_reg_props]
        seg_sol = [i.solidity for i in seg_reg_props]

        #print(len(gt_sol))

        pvalue_sol = mannwhitneyu(gt_sol, seg_sol)[1]

        list_sol.append(significance(pvalue_sol))
        #list_sol.append(pvalue_sol)

        list_sol2.append(cohens_d(gt_sol, seg_sol))

        #################################

        def show(gt, seg):

            sb.kdeplot(gt, color="green", shade=True)
            sb.kdeplot(seg, color="red", shade=True)
            plt.show()

        def norm(gt, seg):

            print(normaltest(gt)[1])
            print(normaltest(seg)[1])


        #norm(gt_area, seg_area)
        #norm(gt_ecc, seg_ecc)
        #norm(gt_ar, seg_ar)
        #norm(gt_per, seg_per)
        #norm(gt_sol, seg_sol)

        """
        #show(gt_area, seg_area)
        #print(pvalue_area)
        print(energy_distance(gt_area, seg_area))
        #show(gt_ecc, seg_ecc)
        #print(pvalue_ecc)
        print(energy_distance(gt_ecc, seg_ecc))
        #show(gt_ar, seg_ar)
        #print(pvalue_ar)
        print(energy_distance(gt_ar, seg_ar))
        #show(gt_per, seg_per)
        #print(pvalue_per)
        print(energy_distance(gt_per, seg_per))
        #show(gt_sol, seg_sol)
        #print(pvalue_sol)
        print(energy_distance(gt_sol, seg_sol))
        """



    df["Image"] = gt_image_list
    df["Area"] = list_area
    df["Eccentricity"] = list_ecc
    df["Aspect Ratio"] = list_ar
    df["Perimeter"] = list_per
    df["Solidity"] = list_sol

    df_2["Image"] = gt_image_list
    df_2["Area"] = list_area2
    df_2["Eccentricity"] = list_ecc2
    df_2["Aspect Ratio"] = list_ar2
    df_2["Perimeter"] = list_per2
    df_2["Solidity"] = list_sol2

    total_values = len(gt_image_list)*5

    zero_p = 0
    one_p = 0
    two_p = 0
    three_p = 0

    for index, row in df.iterrows():

        for column in row:

            if column == 0:
                zero_p+=1

            elif column == 1:
                one_p+=1

            elif column == 2:
                two_p+=1

            elif column == 3:
                three_p+=1


    #print(zero_p/total_values)
    #print(one_p/total_values)
    #print(two_p/total_values)
    #print(three_p/total_values)



    # raw data
    df.to_csv(path + seg_name + "_Morph_Dist_comparison.csv")