Exemplo n.º 1
0
    def test_kstwosamp(self):
        x = [
            [nan, nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1],
            [4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3],
            [3, 2, 5, 6, 18, 4, 9, 1, 1, nan, 1, 1, nan],
            [nan, 6, 11, 4, 17, nan, 6, 1, 1, 2, 5, 1, 1],
        ]
        x = ma.fix_invalid(x).T
        (winter, spring, summer, fall) = x.T

        assert_almost_equal(np.round(mstats.ks_twosamp(winter, spring), 4), (0.1818, 0.9892))
        assert_almost_equal(np.round(mstats.ks_twosamp(winter, spring, "g"), 4), (0.1469, 0.7734))
        assert_almost_equal(np.round(mstats.ks_twosamp(winter, spring, "l"), 4), (0.1818, 0.6744))
Exemplo n.º 2
0
    def test_kstwosamp(self):
        x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1],
             [4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3],
             [3, 2, 5, 6, 18, 4, 9, 1, 1,nan, 1, 1,nan],
             [nan, 6, 11, 4, 17,nan, 6, 1, 1, 2, 5, 1, 1]]
        x = ma.fix_invalid(x).T
        (winter,spring,summer,fall) = x.T

        assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring),4),
                            (0.1818,0.9892))
        assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring,'g'),4),
                            (0.1469,0.7734))
        assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring,'l'),4),
                            (0.1818,0.6744))
def geneStats(scoreColumn, negArray):
    scoreArray = np.ma.array(data=scoreColumn.dropna(), mask=False)
    ksPval = ms.ks_twosamp(scoreArray, negArray)[1]
    
    ksHi = ms.ks_twosamp(scoreArray, negArray, alternative = 'less')[1]
    ksLo = ms.ks_twosamp(scoreArray, negArray, alternative = 'greater')[1]
    if ksHi < ksLo:
        ksSign = 'P'
    else:
        ksSign = 'S'

    mwPval = ms.mannwhitneyu(scoreArray, negArray)[1]

    return ksPval, ksSign, mwPval
 def test_kstwosamp(self):
     "Tests the Kolmogorov-Smirnov 2 samples test"
     x = [[nan,nan,  4,  2, 16, 26,  5,  1,  5,  1,  2,  3,  1],
          [  4,  3,  5,  3,  2,  7,  3,  1,  1,  2,  3,  5,  3],
          [  3,  2,  5,  6, 18,  4,  9,  1,  1,nan,  1,  1,nan],
          [nan,  6, 11,  4, 17,nan,  6,  1,  1,  2,  5,  1,  1]]
     x = ma.fix_invalid(x).T
     (winter,spring,summer,fall) = x.T
     #
     assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring),4),
                         (0.1818,0.9892))
     assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring,'g'),4),
                         (0.1469,0.7734))
     assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring,'l'),4),
                         (0.1818,0.6744))
Exemplo n.º 5
0
def performKS_Test(caseList, controlList, selection, files):
    ks_P = mstats.ks_twosamp(caseList, controlList, alternative=selection)[1]

    if selection == 'two-sided':
        files.write('two-sided p-value is: ' + str(ks_P) + '\n')
    elif selection == 'less':
        files.write('positive selection p-value is: ' + str(ks_P) + '\n')
    else:
        files.write('negative selection p-value is: ' + str(ks_P) + '\n')
Exemplo n.º 6
0
def analyze_run(ds_file, history=0.20):
    '''
    '''
    import numpy as np
    from scipy.stats import mstats

    h = open(ds_file)
    iterations = (l for l in h if l.startswith('iteration'))

    theta = []
    gamma = []
    K = []
    unt = []
    for i in iterations:
        isp = i.split()
        iter, tK, tunt = map(int, isp[1:4])
        tth, tg = map(float, isp[4:6])
        K.append(tK)
        unt.append(tunt)
        theta.append(tth)
        gamma.append(tg)
    K_a = np.array(K)
    unt_a = np.array(unt)
    theta_a = np.array(theta)
    gamma_a = np.array(gamma)

    J = len(K_a)
    gamma_history = gamma_a[-history * J:]
    gamma_prehistory = gamma_a[-2 * history * J:-history * J]

    #    hm, hstd = np.mean(gamma_history), np.std(gamma_history)
    #    pm, pstd = np.mean(gamma_prehistory), np.std(gamma_prehistory)
    #    print 'In history gamma is', hm, '+/-', hstd
    #    print 'In prehistory gamma is', pm, '+/-', pstd
    KS = mstats.ks_twosamp(gamma_history, gamma_prehistory)

    #    try:
    #        import matplotlib
    #        matplotlib.use('pdf')
    #        import matplotlib.pyplot as plt
    #        import numpy as np
    #    except:
    #        print 'exit, could not import matplotlib'
    #        sys.exit()

    ax = plt.figure()
    plt.plot(gamma_a)
    plt.xlabel('iterations')
    plt.ylabel('$\Gamma$')
    plt.title('$\Gamma$, KS test history prehistory gives ' + '%3.2E' % KS[1])
    ph = plt.axvspan((1. - history) * J, J, facecolor='g', alpha=0.3)

    min_g, max_g = min(gamma_a), max(gamma_a)
    ann_y = 0.9 * (max_g - min_g) + min_g
    plt.annotate('history', xy=((1 - 0.7 * history) * J, ann_y))

    # pph = plt.axvspan((1.-2*history)*J, (1.-history)*J, facecolor='r', alpha=0.3)
    fstem = ds_file.split('/')[-2]

    imtype = 'pdf'
    plt.savefig('%s-gamma_conv.%s' % (fstem, imtype), dpi=None, facecolor='w', edgecolor='w',\
                    orientation='landscape', papertype=None, format=imtype,\
                    transparent=False)

    #    plt.show()

    return J * history
Exemplo n.º 7
0
    def plot_phase_shifts(self, column: str, reference: Band, shift: int, max_shift=10):
        t = linspace(0, self.phases[0][column].size / self.fs, self.phases[0][column].size)
        ref_index = self.thresholds.index(reference)
        ref_name = self.thresholds[ref_index]["name"]

        sns.set(style='ticks')
        figure(figsize=[13, 6])

        thresh_len = len(self.thresholds)
        # This will be rounded up Don't use // instead of /.
        col_len = int(thresh_len / 2)

        gs1 = GridSpec(thresh_len, thresh_len + 2)
        gs1.update(left=0.1, right=.99, wspace=.2, hspace=.3)

        axes = tuple(subplot(gs1[ind, :col_len]) for ind in range(thresh_len))
        axes2_norm_kws = dict(adjustable='box-forced', xlim=(0, 1), ylim=(-pi, pi))
        axes2_polar_kws = dict(polar=True)
        axes2 = (
            (
                subplot(gs1[:col_len, item], yticks=(), xticks=(), **axes2_norm_kws),
                subplot(gs1[col_len:, item], **axes2_polar_kws)
            ) for item in range(2, thresh_len + 1)
        )
        phasecore_ax, box_ax = subplot(gs1[:col_len, -1], xticks=()), subplot(gs1[col_len:, -1])

        for ind, (ax, th) in enumerate(zip(axes, self.thresholds)):
            title = th['name']
            ax.scatter(t, rad2deg(mod(shift * self.phases[ind][column], pipi)) - 180, marker='.', s=5, c='k')
            ax.set_xlim(0, 1)
            ax.set_ylabel(fr'${shift} \times \Phi_{{\{title}}}$', fontsize=12, labelpad=1)
            ax.set_yticks([-180, 0, 180])
            ax.set_xticklabels([])
            ax.set_xticks([])
            ax.set_yticklabels(['$-\pi$', 0, '$\pi$'])
            sns.despine(left=False, right=True, bottom=True, top=True, ax=ax, offset=5)

        df = DataFrame(index=linspace(0, max_shift, max_shift * 10))

        for ind, th in enumerate(self.thresholds):
            if ind == ref_index:
                continue

            title = th['name']
            phase_ax, polar_ax = next(axes2)

            # phasediff = angle(exp(1j * (self.phases[ind][column] - shift * self.phases[ref_index][column])))
            phasediff = get_phase_difference(self.phases[ind][column], self.phases[ref_index][column], shift)

            phase_ax.scatter(t, rad2deg(phasediff), marker='.', s=5, c='k')
            phase_ax.set_yticks([-180, 0, 180])
            phase_ax.set_yticks([])
            phase_ax.set_xticks([])

            d = ks_twosamp(
                self.phases[ind][column],
                shift * self.phases[ref_index][column],
                'two-sided'
            )

            phase_ax_ttl = fr'$\Delta\Phi_{{1:{shift}}} = \Phi_{{\{title}}} - {shift}\Phi_{{\{ref_name}}}$'
            phase_ax.set_title(phase_ax_ttl + f'\n$D_{{n, m}} = {d[0]:.3f}$')

            label = fr'$\Phi_{{\{title}}}-{shift}\Phi_{{\{ref_name}}}$'
            df[label] = phase_core(max_shift, self.phases[ind][column], shift * self.phases[ref_index][column])

            r, phi = histogram(phasediff + pi, bins=20)
            theta = c_[phi[:-1], phi[1:]].mean(axis=1)
            phi_probability = (2 * r) / r.sum()

            mean_angle = angle(exp(1j * (self.phases[ind][column] - shift * self.phases[ref_index][column])).mean())
            r_mean = abs(exp(1j * (self.phases[ind][column] - shift * self.phases[ref_index][column])).mean()) + pi
            zm = r_mean * exp(1j * mean_angle) * max(phi_probability)
            polar_ax.plot([0, real(zm)], [0, 1], lw=1, c='r', alpha=.7)

            polar_ax.bar(theta, phi_probability, width=.2, alpha=.7)
            polar_ax.set_yticks([])
            polar_ax.set_xticks([0, pi / 2, pi, (3 * pi) / 2])
            polar_ax.set_xticklabels(['0', r'$\pi/2$', r'$\pi$', r'$3\pi/2$'])
            polar_ax.set_ylim(0, .2)

        y_ticks = 0, max(df.max())
        y_labels = [f'{tick:.2f}' for tick in box_ax.get_yticks()]

        sns.boxplot(data=df, ax=box_ax, palette=self.colors, fliersize=0, linewidth=1, width=.9)
        sns.despine(left=True, right=False, bottom=True, top=True, ax=box_ax)
        box_ax.set_xticks([])
        box_ax.set_yticks(y_ticks)
        box_ax.set_yticklabels(y_labels)

        df.plot(ax=phasecore_ax, lw=1, color=self.colors[:df.columns.size])
        phasecore_ax.set_xticklabels(['1 : %d' % item if item else '' for item in phasecore_ax.get_xticks()])
        sns.despine(left=True, right=False, bottom=False, top=True, ax=phasecore_ax)
        phasecore_ax.set_yticks(y_ticks)
        phasecore_ax.set_yticklabels(y_labels)