Exemplo n.º 1
0
def probplot(x, sparams=(), dist='norm', fit=1, plot=None):
    """Return (osm, osr){,(scale,loc,r)} where (osm, osr) are order statistic
    medians and ordered response data respectively so that plot(osm, osr)
    is a probability plot.  If fit==1, then do a regression fit and compute the
    slope (scale), intercept (loc), and correlation coefficient (r), of the
    best straight line through the points.  If fit==0, only (osm, osr) is
    returned.
    
    sparams is a tuple of shape parameter arguments for the distribution.
    """
    N = len(x)
    Ui = zeros(N)*1.0
    Ui[-1] = 0.5**(1.0/N)
    Ui[0] = 1-Ui[-1]
    i = arange(2,N)
    Ui[1:-1] = (i-0.3175)/(N+0.365)
    try:
        ppf_func = eval('distributions.%s.ppf'%dist)
    except AttributError:
        raise dist, "is not a valid distribution with a ppf."
    if sparams is None:
        sparams = ()
    if isscalar(sparams):
        sparams = (sparams,)
    if not isinstance(sparams,types.TupleType):
        sparams = tuple(sparams)
    res = inspect.getargspec(ppf_func)
    if not ('loc' == res[0][-2] and 'scale' == res[0][-1] and \
            0.0==res[-1][-2] and 1.0==res[-1][-1]):
        raise ValueError, "Function has does not have default location", \
              "and scale parameters\n  that are 0.0 and 1.0 respectively."
    if (len(sparams) < len(res[0])-len(res[-1])-1) or \
       (len(sparams) > len(res[0])-3):
        raise ValueError, "Incorrect number of shape parameters."
    osm = ppf_func(Ui,*sparams)
    osr = sort(x)
    if fit or (plot is not None):
        # perform a linear fit.
        slope, intercept, r, prob, sterrest = stats.linregress(osm,osr)
    if plot is not None:
        try:
            import scipy.xplt as xplt
            xplt.limits()
        except: pass
        plot.plot(osm, osr, 'o', osm, slope*osm + intercept)
        plot.title('Probability Plot')
        plot.xlabel('Order Statistic Medians')
        plot.ylabel('Ordered Values')
        try: plot.expand_limits(5)
        except: pass
        xmin,xmax= amin(osm),amax(osm)
        ymin,ymax= amin(x),amax(x)
        pos = xmin+0.70*(xmax-xmin), ymin+0.01*(ymax-ymin)
        try: plot.addtext("r^2^=%1.4f" % r, xy=pos,tosys=1)
        except: pass
    if fit:
        return (osm, osr), (slope, intercept, r)
    else:
        return osm, osr
Exemplo n.º 2
0
def probplot(x, sparams=(), dist='norm', fit=1, plot=None):
    """Return (osm, osr){,(scale,loc,r)} where (osm, osr) are order statistic
    medians and ordered response data respectively so that plot(osm, osr)
    is a probability plot.  If fit==1, then do a regression fit and compute the
    slope (scale), intercept (loc), and correlation coefficient (r), of the
    best straight line through the points.  If fit==0, only (osm, osr) is
    returned.

    sparams is a tuple of shape parameter arguments for the distribution.
    """
    N = len(x)
    Ui = zeros(N)*1.0
    Ui[-1] = 0.5**(1.0/N)
    Ui[0] = 1-Ui[-1]
    i = arange(2,N)
    Ui[1:-1] = (i-0.3175)/(N+0.365)
    try:
        ppf_func = eval('distributions.%s.ppf' % dist)
    except AttributeError:
        raise ValueError("%s is not a valid distribution with a ppf." % dist)
    if sparams is None:
        sparams = ()
    if isscalar(sparams):
        sparams = (sparams,)
    if not isinstance(sparams,types.TupleType):
        sparams = tuple(sparams)
    """
    res = inspect.getargspec(ppf_func)
    if not ('loc' == res[0][-2] and 'scale' == res[0][-1] and \
            0.0==res[-1][-2] and 1.0==res[-1][-1]):
        raise ValueError("Function has does not have default location "
              "and scale parameters\n  that are 0.0 and 1.0 respectively.")
    if (len(sparams) < len(res[0])-len(res[-1])-1) or \
       (len(sparams) > len(res[0])-3):
        raise ValueError("Incorrect number of shape parameters.")
    """
    osm = ppf_func(Ui,*sparams)
    osr = sort(x)
    if fit or (plot is not None):
        # perform a linear fit.
        slope, intercept, r, prob, sterrest = stats.linregress(osm,osr)
    if plot is not None:
        plot.plot(osm, osr, 'o', osm, slope*osm + intercept)
        plot.title('Probability Plot')
        plot.xlabel('Order Statistic Medians')
        plot.ylabel('Ordered Values')

        xmin,xmax= amin(osm),amax(osm)
        ymin,ymax= amin(x),amax(x)
        posx,posy = xmin+0.70*(xmax-xmin), ymin+0.01*(ymax-ymin)
        #plot.addtext("r^2^=%1.4f" % r, xy=pos,tosys=1)
        plot.text(posx,posy, "r^2=%1.4f" % r)
    if fit:
        return (osm, osr), (slope, intercept, r)
    else:
        return osm, osr
Exemplo n.º 3
0
 def calculate_test(self):
     try:
         df=self.data.copy()
         df[self.var]=self.samples
         for column in df.columns:
             if column != self.var:
                 reg=stats.linregress(df[column],df[self.var])
                 self.results.write("\nLinear Regression to "+column+": "+str(reg)+"\n")
                 print("\nLinear Regression to "+column+": "+str(reg)+"\n")
     except:
         print("Error in regression")  
Exemplo n.º 4
0
def linearFit(f0s):
    """ 
    def linearFit(f0s):

    Input: list of (time,F0) tuples
    Output: slope, corrected intercept
    """
    from stats import linregress # this is just a wrapper
    slope,intercept,a,b,c = linregress([f0 for (time,f0) in f0s],
                                        [time-f0s[0][0] for (time,f0) in f0s])
    return slope,intercept # return slope, and corrected intercept
Exemplo n.º 5
0
def estimatePoissonYoung(principalAxis, stress=0, plot=False, cutoff=0.):
    """Estimate Poisson's ration given the "principal" axis of straining.
	For every base direction, homogenized strain is computed
	(slope in linear regression on discrete function particle coordinate →
	→ particle displacement	in the same direction as returned by
	utils.coordsAndDisplacements) and, (if axis '0' is the strained 
	axis) the poisson's ratio is given as -½(ε₁+ε₂)/ε₀.

	Young's modulus is computed as σ/ε₀; if stress σ is not given (default 0),
	the result is 0.

	cutoff, if > 0., will take only smaller part (centered) or the specimen into account
	"""
    dd = []  # storage for linear regression parameters
    import pylab, numpy
    try:
        import stats
    except ImportError:
        raise ImportError(
            "Unable to import stats; install the python-stats package.")
    from woo import utils
    if cutoff > 0: cut = utils.fractionalBox(fraction=1 - cutoff)
    for axis in [0, 1, 2]:
        if cutoff > 0:
            w, dw = utils.coordsAndDisplacements(axis, Aabb=cut)
        else:
            w, dw = utils.coordsAndDisplacements(axis)
        l, ll = stats.linregress(w, dw)[0:2]  # use only tangent and section
        dd.append((l, ll, min(w), max(w)))
        if plot:
            pylab.plot(w,
                       dw,
                       '.',
                       label=r'$\Delta %s(%s)$' % ('xyz'[axis], 'xyz'[axis]))
    if plot:
        for axis in [0, 1, 2]:
            dist = dd[axis][-1] - dd[axis][-2]
            c = numpy.linspace(dd[axis][-2] - .2 * dist,
                               dd[axis][-1] + .2 * dist)
            d = [dd[axis][0] * cc + dd[axis][1] for cc in c]
            pylab.plot(c,
                       d,
                       label=r'$\widehat{\Delta %s}(%s)$' %
                       ('xyz'[axis], 'xyz'[axis]))
        pylab.legend(loc='upper left')
        pylab.xlabel(r'$x,\;y,\;z$')
        pylab.ylabel(r'$\Delta x,\;\Delta y,\; \Delta z$')
        pylab.show()
    otherAxes = (principalAxis + 1) % 3, (principalAxis + 2) % 3
    avgTransHomogenizedStrain = .5 * (dd[otherAxes[0]][0] +
                                      dd[otherAxes[1]][0])
    principalHomogenizedStrain = dd[principalAxis][0]
    return -avgTransHomogenizedStrain / principalHomogenizedStrain, stress / principalHomogenizedStrain
Exemplo n.º 6
0
def tilt(fft):
    """
    def tilt(fft):

    Input: a single (time, list of energies) tuple
    Output: the spectral tilt slope and intercept, defined by regression on the
    energy between 500 Hz and 4 KHz. 
    """
    from stats import linregress # need this to calculate
    band = fft[1][6:48] # between 500 Hz and 4 KHz, assuming CD-qual audio
    slope,intercept,a,b,c = linregress(range(len(band)),band) # do the regress
    return slope,intercept
Exemplo n.º 7
0
 def calculate_test(self):
     try:
         values = []
         variables = []
         df = self.data.copy()
         df = df.sort_index()
         for i, index in enumerate(df.index):
             val = df.loc[index]
             for va in val:
                 values.append(va)
                 variables.append(int(self.samples[i]))
         #print(values,variables)
         reg = stats.linregress(values, variables)
         self.results.write("\nAll samples to exp. variables " + str(reg) +
                            "\n")
         print("\nAll samples to exp. variables " + str(reg) + "\n")
     except:
         print("Error in regression")
Exemplo n.º 8
0
def estimatePoissonYoung(principalAxis,stress=0,plot=False,cutoff=0.):
    """Estimate Poisson's ration given the "principal" axis of straining.
    For every base direction, homogenized strain is computed
    (slope in linear regression on discrete function particle coordinate →
    → particle displacement    in the same direction as returned by
    utils.coordsAndDisplacements) and, (if axis '0' is the strained 
    axis) the poisson's ratio is given as -½(ε₁+ε₂)/ε₀.

    Young's modulus is computed as σ/ε₀; if stress σ is not given (default 0),
    the result is 0.

    cutoff, if > 0., will take only smaller part (centered) or the specimen into account
    """
    dd=[] # storage for linear regression parameters
    import pylab,numpy
    try:
        import stats
    except ImportError:
        raise ImportError("Unable to import stats; install the python-stats package.")
    from woo import utils
    if cutoff>0: cut=utils.fractionalBox(fraction=1-cutoff)
    for axis in [0,1,2]:
        if cutoff>0:
            w,dw=utils.coordsAndDisplacements(axis,Aabb=cut)
        else:
            w,dw=utils.coordsAndDisplacements(axis)
        l,ll=stats.linregress(w,dw)[0:2] # use only tangent and section
        dd.append((l,ll,min(w),max(w)))
        if plot: pylab.plot(w,dw,'.',label=r'$\Delta %s(%s)$'%('xyz'[axis],'xyz'[axis]))
    if plot:
        for axis in [0,1,2]:
            dist=dd[axis][-1]-dd[axis][-2]
            c=numpy.linspace(dd[axis][-2]-.2*dist,dd[axis][-1]+.2*dist)
            d=[dd[axis][0]*cc+dd[axis][1] for cc in c]
            pylab.plot(c,d,label=r'$\widehat{\Delta %s}(%s)$'%('xyz'[axis],'xyz'[axis]))
        pylab.legend(loc='upper left')
        pylab.xlabel(r'$x,\;y,\;z$')
        pylab.ylabel(r'$\Delta x,\;\Delta y,\; \Delta z$')
        pylab.show()
    otherAxes=(principalAxis+1)%3,(principalAxis+2)%3
    avgTransHomogenizedStrain=.5*(dd[otherAxes[0]][0]+dd[otherAxes[1]][0])
    principalHomogenizedStrain=dd[principalAxis][0]
    return -avgTransHomogenizedStrain/principalHomogenizedStrain,stress/principalHomogenizedStrain
Exemplo n.º 9
0
def probplot(x, sparams=(), dist='norm', fit=True, plot=None):
    """
    Calculate quantiles for a probability plot of sample data against a
    specified theoretical distribution.

    `probplot` optionally calculates a best-fit line for the data and plots the
    results using Matplotlib or a given plot function.

    Parameters
    ----------
    x : array_like
        Sample/response data from which `probplot` creates the plot.
    sparams : tuple, optional
        Distribution-specific shape parameters (location(s) and scale(s)).
    dist : str, optional
        Distribution function name. The default is 'norm' for a normal
        probability plot.
    fit : bool, optional
        Fit a least-squares regression (best-fit) line to the sample data if
        True (default).
    plot : object, optional
        If given, plots the quantiles and least squares fit.
        `plot` is an object with methods "plot", "title", "xlabel", "ylabel"
        and "text". The matplotlib.pyplot module or a Matplotlib axes object can
        be used, or a custom object with the same methods.
        By default, no plot is created.

    Notes
    -----
    Even if `plot` is given, the figure is not shown or saved by `probplot`;
    ``plot.show()`` or ``plot.savefig('figname.png')`` should be used after
    calling `probplot`.

    Returns
    -------
    (osm, osr) : tuple of ndarrays
        Tuple of theoretical quantiles (osm, or order statistic medians) and
        ordered responses (osr).
    (slope, intercept, r) : tuple of floats, optional
        Tuple  containing the result of the least-squares fit, if that is
        performed by `probplot`. `r` is the square root of the coefficient of
        determination.  If ``fit=False`` and ``plot=None``, this tuple is not
        returned.

    Examples
    --------
    >>> import scipy.stats as stats
    >>> nsample = 100
    >>> np.random.seed(7654321)

    A t distribution with small degrees of freedom:

    >>> ax1 = plt.subplot(221)
    >>> x = stats.t.rvs(3, size=nsample)
    >>> res = stats.probplot(x, plot=plt)

    A t distribution with larger degrees of freedom:

    >>> ax2 = plt.subplot(222)
    >>> x = stats.t.rvs(25, size=nsample)
    >>> res = stats.probplot(x, plot=plt)

    A mixture of 2 normal distributions with broadcasting:

    >>> ax3 = plt.subplot(223)
    >>> x = stats.norm.rvs(loc=[0,5], scale=[1,1.5], size=(nsample/2.,2)).ravel()
    >>> res = stats.probplot(x, plot=plt)

    A standard normal distribution:

    >>> ax4 = plt.subplot(224)
    >>> x = stats.norm.rvs(loc=0, scale=1, size=nsample)
    >>> res = stats.probplot(x, plot=plt)

    """
    N = len(x)
    Ui = zeros(N) * 1.0
    Ui[-1] = 0.5**(1.0 /N)
    Ui[0] = 1 - Ui[-1]
    i = arange(2, N)
    Ui[1:-1] = (i - 0.3175) / (N + 0.365)
    try:
        ppf_func = eval('distributions.%s.ppf' % dist)
    except AttributeError:
        raise ValueError("%s is not a valid distribution with a ppf." % dist)
    if sparams is None:
        sparams = ()
    if isscalar(sparams):
        sparams = (sparams,)
    if not isinstance(sparams, types.TupleType):
        sparams = tuple(sparams)
    """
    res = inspect.getargspec(ppf_func)
    if not ('loc' == res[0][-2] and 'scale' == res[0][-1] and \
            0.0==res[-1][-2] and 1.0==res[-1][-1]):
        raise ValueError("Function has does not have default location "
              "and scale parameters\n  that are 0.0 and 1.0 respectively.")
    if (len(sparams) < len(res[0])-len(res[-1])-1) or \
       (len(sparams) > len(res[0])-3):
        raise ValueError("Incorrect number of shape parameters.")
    """
    osm = ppf_func(Ui, *sparams)
    osr = sort(x)
    if fit or (plot is not None):
        # perform a linear fit.
        slope, intercept, r, prob, sterrest = stats.linregress(osm, osr)
    if plot is not None:
        plot.plot(osm, osr, 'o', osm, slope*osm + intercept)
        plot.title('Probability Plot')
        plot.xlabel('Quantiles')
        plot.ylabel('Ordered Values')

        xmin = amin(osm)
        xmax = amax(osm)
        ymin = amin(x)
        ymax = amax(x)
        posx = xmin + 0.70 * (xmax - xmin)
        posy = ymin + 0.01 * (ymax - ymin)
        plot.text(posx, posy, "r^2=%1.4f" % r)
    if fit:
        return (osm, osr), (slope, intercept, r)
    else:
        return osm, osr
Exemplo n.º 10
0
print(print)
print('pearsonr:')
print(stats.pearsonr(l,m))
print(stats.pearsonr(a,b))
print('spearmanr:')
print(stats.spearmanr(l,m))
print(stats.spearmanr(a,b))
print('pointbiserialr:')
print(stats.pointbiserialr(pb,l))
print(stats.pointbiserialr(apb,a))
print('kendalltau:')
print(stats.kendalltau(l,m))
print(stats.kendalltau(a,b))
print('linregress:')
print(stats.linregress(l,m))
print(stats.linregress(a,b))
print('\nINFERENTIAL')
print('ttest_1samp:')
print(stats.ttest_1samp(l,12))
print(stats.ttest_1samp(a,12))
print('ttest_ind:')
print(stats.ttest_ind(l,m))
print(stats.ttest_ind(a,b))
print('ttest_rel:')
print(stats.ttest_rel(l,m))
print(stats.ttest_rel(a,b))
print('chisquare:')
print(stats.chisquare(l))
print(stats.chisquare(a))
print('ks_2samp:')
Exemplo n.º 11
0
print
print
print 'pearsonr:'
print stats.pearsonr(l,m)
print stats.pearsonr(a,b)
print 'spearmanr:'
print stats.spearmanr(l,m)
print stats.spearmanr(a,b)
print 'pointbiserialr:'
print stats.pointbiserialr(pb,l)
print stats.pointbiserialr(apb,a)
print 'kendalltau:'
print stats.kendalltau(l,m)
print stats.kendalltau(a,b)
print 'linregress:'
print stats.linregress(l,m)
print stats.linregress(a,b)

print '\nINFERENTIAL'
print 'ttest_1samp:'
print stats.ttest_1samp(l,12)
print stats.ttest_1samp(a,12)
print 'ttest_ind:'
print stats.ttest_ind(l,m)
print stats.ttest_ind(a,b)
print 'ttest_rel:'
print stats.ttest_rel(l,m)
print stats.ttest_rel(a,b)
print 'chisquare:'
print stats.chisquare(l)
print stats.chisquare(a)
Exemplo n.º 12
0
print
print
print 'pearsonr:'
print stats.pearsonr(l, m)
print stats.pearsonr(a, b)
print 'spearmanr:'
print stats.spearmanr(l, m)
print stats.spearmanr(a, b)
print 'pointbiserialr:'
print stats.pointbiserialr(pb, l)
print stats.pointbiserialr(apb, a)
print 'kendalltau:'
print stats.kendalltau(l, m)
print stats.kendalltau(a, b)
print 'linregress:'
print stats.linregress(l, m)
print stats.linregress(a, b)

print '\nINFERENTIAL'
print 'ttest_1samp:'
print stats.ttest_1samp(l, 12)
print stats.ttest_1samp(a, 12)
print 'ttest_ind:'
print stats.ttest_ind(l, m)
print stats.ttest_ind(a, b)
print 'ttest_rel:'
print stats.ttest_rel(l, m)
print stats.ttest_rel(a, b)
print 'chisquare:'
print stats.chisquare(l)
print stats.chisquare(a)
Exemplo n.º 13
0
#stats.paired(l,l)

print()
print()
print('pearsonr:')
print(stats.pearsonr(l,m))
print(stats.pearsonr(l,l))
print('spearmanr:')
print('pointbiserialr:')
print(stats.pointbiserialr(pb,l))
print(stats.pointbiserialr(pb,l))
print('kendalltau:')
print(stats.kendalltau(l,m))
print(stats.kendalltau(l,l))
print('linregress:')
print(stats.linregress(l,m))
print(stats.linregress(l,l))

print('\nINFERENTIAL')
print('ttest_1samp:')
print(stats.ttest_1samp(l,12))
print(stats.ttest_1samp(l,12))
print('ttest_ind:')
print(stats.ttest_ind(l,m))
print(stats.ttest_ind(l,l))
print('chisquare:')
print(stats.chisquare(l))
print(stats.chisquare(l))
print('ks_2samp:')
print(stats.ks_2samp(l,m))
print(stats.ks_2samp(l,l))
Exemplo n.º 14
0
#gap_mean = (recv_timestamps[-1] - recv_timestamps[0]) / (n-1)
#recv_lateness = [recv_timestamps[i] - recv_timestamps[0] - gap_mean * i for i in range(n)]

#for x in send_lateness:
#	print x
#exit()

diff_timestamps = [recv_timestamps[i] - send_timestamps[i] for i in range(n)]

diff_min = numpy.min(diff_timestamps)

#zero_diff_timestamps = [x - diff_min for x in diff_timestamps]

#print stats.linregress(range(n), diff_timestamps)
#exit()
slope, intercept, _, _, _ = stats.linregress(range(n), diff_timestamps)
#print slope, intercept

corrected = [diff_timestamps[i] - (slope * i + intercept) for i in range(n)]

diff_min = numpy.min(corrected)

skewed = [x - diff_min for x in corrected]

#print numpy.max(skewed), numpy.min(skewed), numpy.mean(skewed), numpy.std(skewed)
for x in skewed:
	print x

#for x in diff_timestamps:
#	print x - diff_min
#	print x
Exemplo n.º 15
0

print('\n\nPoint-Biserial r')

gender = list(map(float,[1,1,1,1,2,2,2,2,2,2]))
score  = list(map(float,[35, 38, 41, 40, 60, 65, 65, 68, 68, 64]))
print('\nSHOULD BE +0.981257 (N=10) ... Basic Stats 1st ed, p.197')
print(stats.pointbiserialr(gender,score))


print('\n\nLinear Regression')

x = list(map(float,[1,1,2,2,2,3,3,3,4,4,4]))
y = list(map(float,[2,4,4,6,2,4,7,8,6,8,7]))
print('\nSHOULD BE 1.44, 1.47, 0.736, ???, 1.42 (N=11)... Basic Stats 1st ed, p.211-2')
print(stats.linregress(x,y))

print('\n\nChi-Square')

fo = list(map(float,[10,40]))
print('\nSHOULD BE 18.0, <<<0.01 (df=1) ... Basic Stats 1st ed. p.457')
print(stats.chisquare(fo))
print('\nSHOULD BE 5.556, 0.01<p<0.05 (df=1) ... Basic Stats 1st ed. p.460')
print(stats.chisquare(fo,[5,45]))


print('\n\nMann Whitney U')

red = list(map(float,[540,480,600,590,605]))
black = list(map(float,[760,890,1105,595,940]))
print('\nSHOULD BE 2.0, 0.01<p<0.05 (N=5,5) ... Basic Stats 1st ed, p.473-4')