Exemplo n.º 1
0
 def testMean(self):
     """
     Check that mean works as expected.
     """
     self.assertAlmostEqual(stats.mean(self.dataA), self.meanA, 5)
     self.assertAlmostEqual(stats.mean(self.dataB), self.meanB, 5)
     return
Exemplo n.º 2
0
def least_squares_fit(x, y):
    """given training values for x and y,
    find the least-squares values of alpha and beta"""
    beta = stats.correlation(x, y) * \
        stats.standard_deviation(y) / stats.standard_deviation(x)
    alpha = stats.mean(y) - beta * stats.mean(x)
    return alpha, beta
Exemplo n.º 3
0
def sync_check():
#    print 'Checking sync...'
    max_mcnt_difference=4
    mcnts=dict()
    mcnts_list=[]
    mcnt_tot=0

    for f,fpga in enumerate(fpgas):
        mcnts[f]=dict()
        try:
            hdr_index=bram_oob[f]['hdr'].index(1)
        except:
            print 'ERR: No headers found in BRAM. Are the F engines properly connected?'
            exit()

        pkt_64bit = struct.unpack('>Q',bram_dmp['bram_msb'][f]['data'][(4*hdr_index):(4*hdr_index)+4]+bram_dmp['bram_lsb'][f]['data'][(4*hdr_index):(4*hdr_index)+4])[0]
        mcnts[f]['mcnt'] =(pkt_64bit&((2**64)-(2**16)))>>16
        mcnts_list.append(mcnts[f]['mcnt'])
#        print '[%s] MCNT: %i'%(servers[f],mcnts[f]['mcnt'])

    mcnts['mean']=stats.mean(mcnts_list)
    mcnts['median']=stats.median(mcnts_list)
    mcnts['mode']=stats.mode(mcnts_list)
    mcnts['modalmean']=stats.mean(mcnts['mode'][1])

#    print 'mean: %i, median: %i, modal mean: %i mode:'%(mcnts['mean'],mcnts['median'],mcnts['modalmean']),mcnts['mode']
    
    for f,fpga in enumerate(fpgas):
        if mcnts[f]['mcnt']>(mcnts['modalmean']+max_mcnt_difference) or mcnts[f]['mcnt'] < (mcnts['modalmean']-max_mcnt_difference):
            print '%s OUT OF SYNC!!'%servers[f]
            mcnts[f]['sync_status']='FAIL with error of %i'%(mcnts[f]['mcnt']-mcnts['modalmean'])
        else:
            mcnts[f]['sync_status']='PASS'

    return mcnts
Exemplo n.º 4
0
def least_squares_fit(x, y):
    """given training values for x and y,
    find the least-squares values of alpha and beta"""
    beta = stats.correlation(x, y) * \
        stats.standard_deviation(y) / stats.standard_deviation(x)
    alpha = stats.mean(y) - beta * stats.mean(x)
    return alpha, beta
Exemplo n.º 5
0
def compute_stats(te_diffs, gene_diffs, plot_dir):
    pvals = []
    table_lines = []

    for te_or in te_diffs:
        rep, fam, orient = te_or
        
        for sample_key in te_diffs[te_or]:        
            sample1, sample2 = sample_key

            # if enough data
            if len(te_diffs[te_or][sample_key]) >= 10:
                wo_te = list((gene_diffs[sample_key] - te_diffs[te_or][sample_key]).elements())
                w_te = list(te_diffs[te_or][sample_key].elements())

                wo_mean = stats.mean(wo_te)
                w_mean = stats.mean(w_te)

                z, p = stats.mannwhitneyu(w_te, wo_te)

                cols = (rep, fam, orient, sample1, sample2, len(w_te), w_mean, wo_mean, z, p)
                table_lines.append('%-17s %-17s  %1s  %-10s %-10s %6d %9.2f %9.2f %8.2f %10.2e' % cols)

                pvals.append(p)

                # plot ...
                if rep in ['*'] and fam in ['*','LINE/L1','SINE/Alu','LTR/ERV1','LTR/ERVL-MaLR','LINE/L2','LTR/ERVL','SINE/MIR','DNA/hAT-Charlie','LTR/ERVK','DNA/TcMar-Tigger']:
                    out_pdf = '%s/%s_%s_%s_%s-%s.pdf' % (plot_dir,rep.replace('/','-'),fam.replace('/','-'),orient,sample1,sample2)
                    cdf_plot(te_or, w_te, wo_te, out_pdf)

    return table_lines, pvals
Exemplo n.º 6
0
    def fit(self, X, y):
        n = len(X)
        # _x e _y serao as medias de x e y.
        _x = st.mean(X)
        _y = st.mean(y)

        self.__class__.b1 = np.sum((X - _x) * (y - _y)) / np.sum((X - _x)**2)
        self.__class__.b0 = _y - (self.__class__.b1 * _x)
Exemplo n.º 7
0
  def statsex(self, objects):

    """
	Do some statistics on a source list
	Return dictionary
    """

    import stats, pstat
    
    # Return if we have no objects
    if len(objects) == 0:
      return 0	 

    # Define dictionary to hold statistics	
    stat = {}

    # Get number of objects
    stat['N'] = str(len(objects))

    # Define list (float) of FWHM values
    fwhm = [ float(obj[7]) for obj in objects ]
 
    # Define list (float) of ELLIPTICITY values
    el = [ float(obj[6]) for obj in objects ]

    # Define list (float) of THETA_IMAGE values
    pa = [ float(obj[5]) for obj in objects ]

    # Define list (float) of 'Stella-like' values
    stella = [ float(obj[9]) for obj in objects ]	

    # Create a histogram of FWHM values of binsize 1 pixel
    hfwhm = stats.histogram(fwhm,40,[0,40])[0]
    
    stat['medianFWHM'] = "%.2f" % stats.median(fwhm)
    stat['meanFWHM']   = "%.2f" % stats.mean(fwhm)
    stat['modeFWHM']   = "%.2f" % float(hfwhm.index(max(hfwhm))+0.5)

    try:	
       stat['stdevFWHM']  = "%.2f" % stats.stdev(fwhm)
    except ZeroDivisionError:
       stat['stdevFWHM'] = '0.00';

    stat['medianEL'] = "%.2f" % stats.median(el)
    stat['meanEL']   = "%.2f" % stats.mean(el)

    try:
      stat['stdevEL']  = "%.2f" % stats.stdev(el)
    except ZeroDivisionError:
      stat['stdevEL']  = '0.00' 

    # Histogram of Ellipticity PA (-180 to 180, bins of 45 deg)
    #stat['histoTHETA'] = stats.histogram(pa,8,[-180,180])[0]

    # Histogram of Stellarity (0 to 1, bins of 0.05)
    #stat['histoStella']  = stats.histogram(stella,20,[0,1.01])[0]   

    return stat
Exemplo n.º 8
0
  def statsex(self, objects):

    """
	Do some statistics on a source list
	Return dictionary
    """

    import stats, pstat
    
    # Return if we have no objects
    if len(objects) == 0:
      return 0	 

    # Define dictionary to hold statistics	
    stat = {}

    # Get number of objects
    stat['N'] = str(len(objects))

    # Define list (float) of FWHM values
    fwhm = [ float(obj[7]) for obj in objects ]
 
    # Define list (float) of ELLIPTICITY values
    el = [ float(obj[6]) for obj in objects ]

    # Define list (float) of THETA_IMAGE values
    pa = [ float(obj[5]) for obj in objects ]

    # Define list (float) of 'Stella-like' values
    stella = [ float(obj[9]) for obj in objects ]	

    # Create a histogram of FWHM values of binsize 1 pixel
    hfwhm = stats.histogram(fwhm,40,[0,40])[0]
    
    stat['medianFWHM'] = "%.2f" % stats.median(fwhm)
    stat['meanFWHM']   = "%.2f" % stats.mean(fwhm)
    stat['modeFWHM']   = "%.2f" % float(hfwhm.index(max(hfwhm))+0.5)

    try:	
       stat['stdevFWHM']  = "%.2f" % stats.stdev(fwhm)
    except ZeroDivisionError:
       stat['stdevFWHM'] = '0.00';

    stat['medianEL'] = "%.2f" % stats.median(el)
    stat['meanEL']   = "%.2f" % stats.mean(el)

    try:
      stat['stdevEL']  = "%.2f" % stats.stdev(el)
    except ZeroDivisionError:
      stat['stdevEL']  = '0.00' 

    # Histogram of Ellipticity PA (-180 to 180, bins of 45 deg)
    #stat['histoTHETA'] = stats.histogram(pa,8,[-180,180])[0]

    # Histogram of Stellarity (0 to 1, bins of 0.05)
    #stat['histoStella']  = stats.histogram(stella,20,[0,1.01])[0]   

    return stat
Exemplo n.º 9
0
 def testOnTuples(self):
     """
     Checks that methods also work on tuples.
     """
     self.assertAlmostEqual(stats.mean(tuple(self.dataA)), self.meanA, 5)
     self.assertAlmostEqual(stats.mean(tuple(self.dataB)), self.meanB, 5)
     self.assertAlmostEqual(stats.stddev(tuple(self.dataA)), self.stddevA, 5)
     self.assertAlmostEqual(stats.stddev(tuple(self.dataB)), self.stddevB, 5)
     return
Exemplo n.º 10
0
def least_squares_fit(xs: Vector, ys: Vector) -> Tuple[float, float]:
    """
    Given a dataset represented by xs and ys, return the alpha, beta that provide the least squared error fit for a
    function y_i = alpha * x_i + beta
    """
    alpha = correlation(xs,
                        ys) * standard_deviation(ys) / standard_deviation(xs)
    beta = mean(ys) - alpha * mean(xs)
    return alpha, beta
Exemplo n.º 11
0
 def test_validator_working_correct_negativa(self):
     with self.assertRaises(TypeError) as raised_exception:
         stats.missing_data(2, 3.5)
     self.assertEqual(raised_exception.exception.args[0],
                      "input type must be list")
     with self.assertRaises(ValueError) as raised_exception:
         stats.mean([1, 2, (2, 3.5)], [3.5, 2, 1])
     self.assertEqual(raised_exception.exception.args[0],
                      "value in x must be numeric")
Exemplo n.º 12
0
def findLSRline(dp):
    if !checkDPFormat(dp):
        print "ERROR: invalid dotplot format in findLSRline"
        return "invalid dotplot format"
    r = CorrCoef(dp)
    x = [i[0] for i in dp]
    sx = stats.stdDev(x)
    y = [i[1] for i in dp]
    sy = stats.stdDev(y)
    b = (r * sy) / sx
    a = stats.mean(y) - b * stats.mean(x)
Exemplo n.º 13
0
def corr(xdata, ydata):
    """corr(xydata) -> float
    corr(xdata, ydata) -> float

    Return the sample Pearson's Correlation Coefficient of (x,y) data.

    If ydata is None or not given, then xdata must be an iterable of (x, y)
    pairs. Otherwise, both xdata and ydata must be iterables of values, which
    will be truncated to the shorter of the two.

    >>> corr([(0.1, 2.3), (0.5, 2.7), (1.2, 3.1), (1.7, 2.9)])
    ... #doctest: +ELLIPSIS
    0.827429009335...

    The Pearson correlation is +1 in the case of a perfect positive
    correlation (i.e. an increasing linear relationship), -1 in the case of
    a perfect anti-correlation (i.e. a decreasing linear relationship), and
    some value between -1 and 1 in all other cases, indicating the degree
    of linear dependence between the variables.

    >>> xdata = [1, 2, 3, 4, 5, 6]
    >>> ydata = [2*x for x in xdata]  # Perfect correlation.
    >>> corr(xdata, ydata)
    1.0
    >>> corr(xdata, [5-y for y in ydata])  # Perfect anti-correlation.
    -1.0

    If there are not at least two data points, or if either all the x values
    or all the y values are equal, StatsError is raised.
    """
    n = len(xdata)
    assert n == len(ydata)
    if n < 2:
        raise StatsError(
            'correlation requires at least two data points, got %d' % n)
    # First pass is to determine the means.
    mx = stats.mean(xdata)
    my = stats.mean(ydata)
    # Second pass to determine the standard deviations.
    sx = stats.stdev(xdata, mx)
    sy = stats.stdev(ydata, my)
    if sx == 0:
        raise StatsError('all x values are equal')
    if sy == 0:
        raise StatsError('all y values are equal')
    # Third pass to calculate the correlation coefficient.
    ap = add_partial
    total = []
    for x, y in zip(xdata, ydata):
        term = ((x-mx)/sx) * ((y-my)/sy)
        ap(term, total)
    r = math.fsum(total)/(n-1)
    assert -1 <= r <= r
    return r
Exemplo n.º 14
0
def muestroEspeYVar(valores, esperanzaTeorica, alfa):
    print("El calculo de la Esperanza en la muestra es:", stats.mean(valores))
    print("Teoricamente la Esperanza es:", esperanzaTeorica)
    print("El calculo de la Varianza en la muestra es:", numpy.var(valores))
    print("Teoricamente la Varianza es:", calcularVarianza(alfa))
    print("El calculo del Desvio Estandar en la muestra es:",
          math.sqrt(numpy.var(valores)))
    print("Teoricamente es:", math.sqrt(calcularVarianza(alfa)))
    print("El parametro alfa de la muestra es :", 1 / (stats.mean(valores)))
    print("Teoricamente es :", alfa)
    print("\n")
Exemplo n.º 15
0
 def get_modules(self, cutoff=.05):
     modules = []
     for e in self:
         if e.val < min(e.lo_min, e.hi_min, cutoff):
             if self.datatype=="continuous":
                 e.desc = "lo" if mean(e.a) < mean(e.b) else "hi"
             else:
                 e.desc = "enriched"
             modules.append(e)
         else:
             modules += e.get_modules(cutoff=cutoff)
     return modules
Exemplo n.º 16
0
def collect_mean(input_list):
    """ Collect time execution of mean of each module """

    begin_py_mean = clock()
    py_mean(input_list)
    end_py_mean = clock()

    begin_mean = clock()
    mean(input_list)
    end_mean = clock()

    times = format_times(end_py_mean - begin_py_mean, end_mean - begin_mean)
    save_times(times, logs['mean'])
Exemplo n.º 17
0
def _SP(xdata, mx, ydata, my):
    """SP = sum of product of deviations.
    Helper function for calculating covariance directly.
    """
    if mx is None:
        # Two pass algorithm.
        xdata = as_sequence(xdata)
        mx = stats.mean(xdata)
    if my is None:
        # Two pass algorithm.
        ydata = as_sequence(ydata)
        my = stats.mean(ydata)
    return _generalised_sum(zip(xdata, ydata), lambda t: (t[0]-mx)*(t[1]-my))
Exemplo n.º 18
0
def corr(x, y):
    N = len(x)
    if len(y) != N:
        raise Exception(
            "Sequences must be of the same length. X length: {0} ; Y length {1}"
            .format(N, len(y)))
    else:
        sum = 0
        for index, xi in enumerate(x):
            sum += xi * y[index]
        r = (sum - N * stats.mean(x) * stats.mean(y)) / (
            (N - 1) * stats.stdev(x) * stats.stdev(y))
        return r
Exemplo n.º 19
0
 def check_basic(self):
     a = [3,4,5,10,-3,-5,6]
     af = [3.,4,5,10,-3,-5,-6]
     Na = len(a)
     Naf = len(af)
     mn1 = 0.0
     for el in a:
         mn1 += el / float(Na)
     assert_almost_equal(stats.mean(a),mn1,11)
     mn2 = 0.0
     for el in af:
         mn2 += el / float(Naf)
     assert_almost_equal(stats.mean(af),mn2,11)
def _getRatingStarsAverages():
    android_ratings = []
    ios_ratings = []
    android_stars = []
    ios_stars = []

    global android_ratings_average
    global android_stars_average

    global ios_ratings_average
    global ios_stars_average

    print collection_ios.count(
        {
            'android_ratings_float': {
                '$gte': 1
            },
            'ios_ratings_float': {
                '$gte': 1
            }
        },
        no_cursor_timeout=True)
    for app in collection_ios.find(
        {
            'android_ratings_float': {
                '$gte': 1
            },
            'ios_ratings_float': {
                '$gte': 1
            }
        },
            no_cursor_timeout=True):
        android_ratings.append(app['android_ratings_float'])
        android_stars.append(app['android_stars_float'])
        #ios stats
        ios_ratings.append(app['ios_ratings_float'])
        ios_stars.append(app['ios_stars_float'])

    android_ratings_average = stats.mean(android_ratings)
    android_stars_average = stats.mean(android_stars)

    ios_ratings_average = stats.mean(ios_ratings)
    ios_stars_average = stats.mean(ios_stars)

    print "android"
    print android_ratings_average
    print android_stars_average

    print "ios"
    print ios_ratings_average
    print ios_stars_average
Exemplo n.º 21
0
 def check_2d(self):
     a = [[1.0, 2.0, 3.0],
          [2.0, 4.0, 6.0],
          [8.0, 12.0, 7.0]]
     A = array(a,'d')
     N1,N2 = (3,3)
     mn1 = zeros(N2,'d')
     for k in range(N1):
         mn1 += A[k,:] / N1
     allclose(stats.mean(a),mn1,rtol=1e-13,atol=1e-13)
     mn2 = zeros(N1,'d')
     for k in range(N2):
         mn2 += A[:,k] / N2
     allclose(stats.mean(a,axis=0),mn2,rtol=1e-13,atol=1e-13)
Exemplo n.º 22
0
 def parameter_cedo_meanmean(self, TTL):
     """ Computes the CEDO distribution parameter with the mean of the mean IMT per each pair of meeting nodes. """
     raise Exception() #deprecated
     
     logger.debug("Estimating CEDO mean of means parameter.")
     
     imts = []
     
     for timestamp, l in self._all_im.values():
         if l:
             imts.append(mean(l))
     
     m = imts and mean(imts) or sys.maxsize
     return self.apply_ndp(TTL, m)
def _getCollection():
    print "Sanity Check"
    print "android"
    print android_ratings_average
    print android_stars_average

    print "ios"
    print ios_ratings_average
    print ios_stars_average

    ratings_combined_average = stats.mean(
        [android_ratings_average, ios_ratings_average])
    stars_combined_average = stats.mean(
        [android_stars_average, ios_stars_average])

    print "average:"
    print ratings_combined_average
    print stars_combined_average
    for post in collection_ios.find(
        {
            'android_ratings_float': {
                '$gte': 1
            },
            'ios_ratings_float': {
                '$gte': 1
            }
        },
            no_cursor_timeout=True):
        android_app_id = post['android_app_id']
        android_app_rating = post['android_ratings_float']
        android_app_star = post['android_stars_float']

        ios_app_rating = post['ios_ratings_float']
        ios_app_star = post['ios_stars_float']

        successAndroid = (float(
            _sucess(android_app_rating, android_app_star,
                    ratings_combined_average, stars_combined_average)) /
                          5) * 100
        successIos = (float(
            _sucess(ios_app_rating, ios_app_star, ratings_combined_average,
                    stars_combined_average)) / 5) * 100

        collection_ios.find_one_and_update(
            {"android_app_id": android_app_id}, {
                '$set': {
                    'android_success_average': successAndroid,
                    'ios_success_average': successIos
                }
            })
def calculateDividingLine(gestures, maybeGestures, nonGestures):
	numFolds = min(TESTING_FOLDS, len(gestures))
	
	allGestureDistances = []
	allNonGestureDistances = []
	
	for foldNum in range(numFolds):	
		trainingGestures = [gesture for i, gesture in enumerate(gestures) if i % numFolds != foldNum]
		testingGestures = [localTimeGestures for i, localTimeGestures in enumerate(maybeGestures) if i % numFolds == foldNum]
		
		#print 'train, test #s: ', len(trainingGestures), len(testingGestures)
		
		#make a distance calculator based on the subset of hte training data
		distanceCalculator = gestureDistanceCalculator.GestureDistanceCalculator(trainingGestures)
		
		#each localTimeGestures is a list of the closest times to when a gesture was identified in training
		#since the output can be triggered at slightly different times, we should look for a minimum near where
		#the gesture is known to have happened, compared to the training gestures
		gestureDistances = []
		#print testingGestures
		for localTimeGestureSet in testingGestures:
			
			closestDistance = min(map(distanceCalculator.getDistance, localTimeGestureSet))
			gestureDistances.append(closestDistance)
		
		#gestureDistances = map(distanceCalculator.getDistance, testingGestures)
		#print gestureDistances
		nonGestureDistances = map(distanceCalculator.getDistance, nonGestures)
		#print gestureDistances
		
		allGestureDistances += gestureDistances
		allNonGestureDistances += nonGestureDistances
		#break
		
	#print len(allGestureDistances), len(allNonGestureDistances)
	print 'means: ', stats.mean(allGestureDistances), stats.mean(allNonGestureDistances)
	
	print 'std devs: ', stats.stdDev(allGestureDistances), stats.stdDev(allNonGestureDistances)
	
	meanGesture = stats.mean(allGestureDistances)
	meanNon = stats.mean(allNonGestureDistances)
	
	devGesture = stats.stdDev(allGestureDistances)
	devNon = stats.stdDev(allNonGestureDistances)
	
	line = (meanGesture * devNon + meanNon * devGesture) / ( devGesture + devNon)
	
	#print line
	return line
Exemplo n.º 25
0
def liver(request):
        expressLevelsNishi = naturallysortedexpressionlist(Expression.objects.filter(organ='Liver', experiment='Nishimura'))
        expressLevelsPMT = naturallysortedexpressionlist(Expression.objects.filter(organ='Liver', experiment__startswith='PMT Sample'))
        expressBiotroveTransporters = naturallysortedtransporterlist(Transporter.objects.filter(expression__organ='Liver',expression__experiment__startswith='PMT Biotrove').distinct())
	important = Transporter.objects.filter(organ__name='Liver')
	importantNames = []
	for x in important:
		importantNames.append(str(x.symbol))
	synquery = Transporter.objects.all()
	syns = {}
	for x in synquery:
		syns[x.symbol] = x.synonyms
#Calculate mean expression across all PMT samples
        pmtTableValues = []
        for x in range(len(expressLevelsPMT)/3):
                build = []
                for y in range(3):
                        build.append(expressLevelsPMT[x*3+y].value)
                avg = stats.mean(build)
                stdev = stats.stdev(build)
                id = expressLevelsPMT[x*3].trans
                pmtTableValues.append([id] + build + [avg, stdev])
#Calculate median and quartiles across biotrove samples
        biotroveTableValues = []
        for x in expressBiotroveTransporters:
                values = Expression.objects.filter(organ='Liver', experiment__startswith='PMT Biotrove', trans=x).values_list('value',flat='True').order_by('value')
                build = []
                build.append(x.symbol)
                build.append(quartiles(values,1))
                build.append(quartiles(values,2))
                build.append(quartiles(values,3))
                biotroveTableValues.append(build)
        return render_to_response('liver.html', {'expressionNishi': expressLevelsNishi, 'expressionPMT': pmtTableValues, 'organ': 'Liver', 'syns': syns, 'important': importantNames, 'expressionBiotrove': biotroveTableValues})
Exemplo n.º 26
0
def linear_regression(x, y):
  """
  See: https://www.khanacademy.org/math/probability/regression/regression-correlation/v/regression-line-example
  """
  xy_mean = s.xy_mean(x, y)
  print xy_mean
  x_mean = s.mean(x)
  y_mean = s.mean(y)
  x_squared_mean = s.mean([xi ** 2 for xi in x])

  # Slope.
  a = (x_mean * y_mean - xy_mean) / (x_mean ** 2 - x_squared_mean)
  # Intercept.
  b = y_mean - a * x_mean

  return (a, b)
Exemplo n.º 27
0
 def ttest_1samp(a, popmean):
     t = (stats.mean(a) - popmean) / (stats.stddev(a) / len(a) ** 0.5)
     v = len(a) - 1.0
     p = gamma((v + 1) / 2) / ((v * pi) ** 0.5 * gamma(v / 2)) * (1 + t ** 2 / v) ** (-(v + 1) / 2)
     return (
         [t, None], 
         [p, None])
Exemplo n.º 28
0
def main():
    [(stat, first), (stat, second)] = load_stats(sys.argv[1:])

    # Attempt to increase robustness by dropping the outlying 10% of values.
    first = trim(first, 0.1)
    second = trim(second, 0.1)

    fmean = stats.mean(first)
    smean = stats.mean(second)
    p = 1 - ttest_1samp(second, fmean)[1][0]
    if p >= 0.95:
        # rejected the null hypothesis
        print sys.argv[1], 'mean of', fmean, 'differs from', sys.argv[2], 'mean of', smean, '(%2.0f%%)' % (p * 100,)
    else:
        # failed to reject the null hypothesis
        print 'cannot prove means (%s, %s) differ (%2.0f%%)' % (fmean, smean, p * 100,)
Exemplo n.º 29
0
def circvar(samples, high=2*pi, low=0):
    """Compute the circular variance for samples assumed to be in the range [low to high]
    """
    ang = (samples - low)*2*pi / (high-low)
    res = stats.mean(exp(1j*ang))
    V = 1-abs(res)
    return ((high-low)/2.0/pi)**2 * V
Exemplo n.º 30
0
def circstd(samples, high=2*pi, low=0):
    """Compute the circular standard deviation for samples assumed to be in the range [low to high]
    """
    ang = (samples - low)*2*pi / (high-low)
    res = stats.mean(exp(1j*ang))
    V = 1-abs(res)
    return ((high-low)/2.0/pi) * sqrt(V)
Exemplo n.º 31
0
    def integrate_box_1d(self, low, high):
        """Computes the integral of a 1D pdf between two bounds.

        Parameters
        ----------
        low : scalar
            lower bound of integration
        high : scalar
            upper bound of integration

        Returns
        -------
        value : scalar
            the result of the integral

        Raises
        ------
        ValueError if the KDE is over more than one dimension.
        """
        if self.d != 1:
            raise ValueError("integrate_box_1d() only handles 1D pdfs")

        stdev = ravel(sqrt(self.covariance))[0]

        normalized_low = ravel((low - self.dataset) / stdev)
        normalized_high = ravel((high - self.dataset) / stdev)

        value = stats.mean(
            special.ndtr(normalized_high) - special.ndtr(normalized_low))
        return value
Exemplo n.º 32
0
def diff_fpkm(diff_file, pseudocount):
    gene_fpkms = {}

    diff_in = open(diff_file)
    diff_in.readline()
    for line in diff_in:
        a = line.split('\t')

        gene_id = a[0]
        sample1 = a[4]
        sample2 = a[5]
        status = a[6]
        fpkm1 = float(a[7])
        fpkm2 = float(a[8])

        if status == 'OK':
            if gene_id in gene_fpkms:
                gene_fpkms[gene_id] += [fpkm1, fpkm2]
            else:
                gene_fpkms[gene_id] = [fpkm1, fpkm2]

    diff_in.close()

    gene_fpkm = {}
    for gene_id in gene_fpkms:
        log_fpkms = [math.log(fpkm+pseudocount,2) for fpkm in gene_fpkms[gene_id]]
        gene_fpkm[gene_id] = stats.mean(log_fpkms)

    return gene_fpkm
Exemplo n.º 33
0
def muestroEsperanzayVar(valores,alfa,ka):
    print("El calculo de la esperanza en la muestra es:",stats.mean(valores))
    print("Teoricamente la Esperanza es = ",ka/alfa)
    print("El calculo de la Varianza es = ",numpy.var(valores))
    print("Teoricamente la Varianza es = ",ka/(alfa**2))
    print("El Desvio estandar es =",math.sqrt(numpy.var(valores)))
    print("Teoricamente es =",math.sqrt(ka/(alfa**2)))
def scale(data_matrix):
    num_rows, num_cols = shape(data_matrix)
    means = [mean(get_column(data_matrix,j))
             for j in range(num_cols)]
    stdevs = [standard_deviation(get_column(data_matrix,j))
              for j in range(num_cols)]
    return means, stdevs
Exemplo n.º 35
0
def sample_stats(n):
    """
    Compute mean and standard deviation on a bunch of 
    random numbers
    """
    sample = tuple(random.random() for i in range(n))
    return mean(sample), sd(sample)
Exemplo n.º 36
0
def main():
    [(_ignore_stat, first), (_ignore_stat, second)] = load_stats(sys.argv[1:])

    # Attempt to increase robustness by dropping the outlying 10% of values.
    first = trim(first, 0.1)
    second = trim(second, 0.1)

    fmean = stats.mean(first)
    smean = stats.mean(second)
    p = ttest_1samp(second, fmean)[1]
    if p >= 0.95:
        # rejected the null hypothesis
        print(sys.argv[1], 'mean of', fmean, 'differs from', sys.argv[2], 'mean of', smean, '(%2.0f%%)' % (p * 100,))
    else:
        # failed to reject the null hypothesis
        print('cannot prove means (%s, %s) differ (%2.0f%%)' % (fmean, smean, p * 100,))
Exemplo n.º 37
0
def _test_mem_bounded_golden_values_fields(activeGpuCount, memUsageTS,
                                           tailStart):
    goldenVal = 1148  # 1 KB plus some swag per field instance (Global, GPU). This is based off of the keyed vector block size and default number of blocks
    tolerance = 0.10  # low tolerance, amount of records stored is bounded

    for fieldId, series in memUsageTS.fieldVals.items():

        seriesTail = series[tailStart:]

        # skip fields that are not implemented
        if sum(seriesTail) == 0:
            continue

        #Don't check the size of binary fields since it's arbitrary per fieldId
        if helper_field_has_variable_size(fieldId):
            logger.info("Skipping variable-sized fieldId %d" % fieldId)
            continue

        mean = stats.mean(seriesTail)

        lowLimit = (1 - tolerance) * goldenVal
        highLimit = (1 + tolerance) * goldenVal * activeGpuCount
        assert lowLimit < mean < highLimit, \
            'Expected field "%d" memory usage to be between %s and %s but got %s' % \
            (fieldId, lowLimit, highLimit, mean) \
            + 'If this new value is expected, change the golden value used for comparison.'
Exemplo n.º 38
0
 def ttest_1samp(a, popmean):
     # T statistic - http://mathworld.wolfram.com/Studentst-Distribution.html
     t = (stats.mean(a) - popmean) / (stats.stddev(a) / len(a)**0.5)
     v = len(a) - 1.0
     p = gamma((v + 1) / 2) / (
         (v * pi)**0.5 * gamma(v / 2)) * (1 + t**2 / v)**(-(v + 1) / 2)
     return (t, p)
Exemplo n.º 39
0
def wordSummary(db, table):
    f = open("wordSummary_%s.txt" % table, 'w')
    d = {}
    header = "word, length, rtAVG, rtSTD, total, percCorrect\n"
    f.write(header)
    wordList = []
    sql = "SELECT DISTINCT(word) FROM %s" % table
    for w in db.query(sql):
        wordList.append(w[0])

    for word in wordList:
        sql = "SELECT RT FROM %s WHERE word = '%s' AND incorrect = 0" % (table,
                                                                         word)
        wordLen = len(word)
        rtList = []
        zList = []

        for rt in db.query(sql):
            rtList.append(rt[0])

        rtAVG = stats.mean(rtList)
        rtSTD = stats.samplestdev(rtList)

        total = db.query("SELECT COUNT(*) FROM %s WHERE word = '%s'" %
                         (table, word))[0][0]
        percCorrect = float(len(rtList)) / float(total) * 100.0

        print len(rtList), total

        myString = "%s, %i, %f, %f, %i, %f\n" % (word, wordLen, rtAVG, rtSTD,
                                                 total, percCorrect)
        print myString
        f.write(myString)

    f.close()
Exemplo n.º 40
0
def wordSummary(db, table):
	f = open("wordSummary_%s.txt" % table, 'w')
	d = {}
	header = "word, length, rtAVG, rtSTD, total, percCorrect\n"
	f.write(header)
	wordList = []
	sql = "SELECT DISTINCT(word) FROM %s" % table
	for w in db.query(sql):
		wordList.append(w[0])
		
	for word in wordList:
		sql = "SELECT RT FROM %s WHERE word = '%s' AND incorrect = 0" % (table, word)
		wordLen = len(word)
		rtList = []
		zList = []

		for rt in db.query(sql):
			rtList.append(rt[0])

		rtAVG = stats.mean(rtList)
		rtSTD = stats.samplestdev(rtList)


		total = db.query("SELECT COUNT(*) FROM %s WHERE word = '%s'" % (table, word))[0][0]
		percCorrect = float(len(rtList)) / float(total) * 100.0

		print len(rtList), total

		myString = "%s, %i, %f, %f, %i, %f\n" % (word, wordLen, rtAVG, rtSTD, total, percCorrect)
		print myString
		f.write(myString)


	f.close()
Exemplo n.º 41
0
def optimal_discard_6(hand):
    # loop over all possible discard options
    possible_hands_scores = {}
    for i in range(6):
        for j in range(6):
            if j > i:
                possible_hand = []
                counter = -1
                for card in hand['hand6']:
                    counter += 1
                    if counter == i or counter == j:
                        continue
                    possible_hand.append(card)

                # get total list of possible scores for that hand
                point_list = check_all_cuts(possible_hand)
                possible_hands_scores[tuple(possible_hand)] = point_list

    max_expected_points = 0
    best_hand = []
    for poss_hand, point_list in possible_hands_scores.iteritems():
        expected_points = mean(point_list)
        # print hand, expected_points
        if expected_points > max_expected_points:
            max_expected_points = expected_points
            best_hand = poss_hand

    discard = list(set(hand['hand6']) - set(best_hand))

    # print("Best Hand = {} for {} points".format(best_hand, max_expected_points))
    return list(best_hand), discard
Exemplo n.º 42
0
def print_latex_stats(diffs, label):
	print '%s & %.3f & %.3f & %.3f & %.3f \\\\' % (
		label,
		min(diffs) / 1000.0,
		max(diffs) / 1000.0,
		stats.mean(diffs) / 1000.0,
		stats.stdev(diffs) / 1000.0)
Exemplo n.º 43
0
def print_stats(L):
    """ Display some information about the lists """

    print "Let's compute some statistics..."
    print "\tMean: %d" % mean(L)
    print "\tStandard deviation: %d" % std(L)
    print "\t# of outliers: %d" % (len(L) - len(remove_outliers(L,1)))
def test_mean1():
    obs = mean([0, 0, 0, 0])
    exp = 0
    assert_equal(obs, exp)

    obs = mean([0, 200])
    exp = 100
    assert_equal(obs, exp)

    obs = mean([0, -200])
    exp = -100
    assert_equal(obs, exp)

    obs = mean([0]) 
    exp = 0
    assert_equal(obs, exp)
Exemplo n.º 45
0
    def integrate_box_1d(self, low, high):
        """Computes the integral of a 1D pdf between two bounds.

        Parameters
        ----------
        low : scalar
          lower bound of integration
        high : scalar
          upper bound of integration

        Returns
        -------
        value : scalar
          the result of the integral
        """
        if self.d != 1:
            raise ValueError("integrate_box_1d() only handles 1D pdfs")

        stdev = ravel(sqrt(self.covariance))[0]

        normalized_low = ravel((low - self.dataset)/stdev)
        normalized_high = ravel((high - self.dataset)/stdev)

        value = stats.mean(special.ndtr(normalized_high) -
                           special.ndtr(normalized_low))
        return value
Exemplo n.º 46
0
 def testVariance(self):
     data = [1, 2, 3]
     assert stats.mean(data) == 2
     self.assertEqual(stats.pvariance(data), 2/3)
     self.assertEqual(stats.variance(data), 1.0)
     self.assertEqual(stats.pstdev(data), math.sqrt(2/3))
     self.assertEqual(stats.stdev(data), 1.0)
Exemplo n.º 47
0
def circvar(samples, high=2*pi, low=0):
    """Compute the circular variance for samples assumed to be in the range [low to high]
    """
    ang = (samples - low)*2*pi / (high-low)
    res = stats.mean(exp(1j*ang))
    V = 1-abs(res)
    return ((high-low)/2.0/pi)**2 * V
Exemplo n.º 48
0
def circstd(samples, high=2*pi, low=0):
    """Compute the circular standard deviation for samples assumed to be in the range [low to high]
    """
    ang = (samples - low)*2*pi / (high-low)
    res = stats.mean(exp(1j*ang))
    V = 1-abs(res)
    return ((high-low)/2.0/pi) * sqrt(V)
Exemplo n.º 49
0
def test_mean1():
    obs = mean([0, 0, 0, 0])
    exp = 0
    assert_equal(obs, exp)

    obs = mean([0, 200])
    exp = 100
    assert_equal(obs, exp)

    obs = mean([0, -200])
    exp = -100
    assert_equal(obs, exp)

    obs = mean([0])
    exp = 0
    assert_equal(obs, exp)
Exemplo n.º 50
0
def diff_fpkm(diff_file, pseudocount):
    gene_fpkms = {}

    diff_in = open(diff_file)
    diff_in.readline()
    for line in diff_in:
        a = line.split('\t')

        gene_id = a[0]
        sample1 = a[4]
        sample2 = a[5]
        status = a[6]
        fpkm1 = float(a[7])
        fpkm2 = float(a[8])

        if status == 'OK':
            if gene_id in gene_fpkms:
                gene_fpkms[gene_id] += [fpkm1, fpkm2]
            else:
                gene_fpkms[gene_id] = [fpkm1, fpkm2]

    diff_in.close()

    gene_fpkm = {}
    for gene_id in gene_fpkms:
        log_fpkms = [
            math.log(fpkm + pseudocount, 2) for fpkm in gene_fpkms[gene_id]
        ]
        gene_fpkm[gene_id] = stats.mean(log_fpkms)

    return gene_fpkm
Exemplo n.º 51
0
def default_score_set(expression, primer_set, primer_locs, max_dist, bg_dist_mean):
    """Evaluate an expression using the provided values and a set of metrics.

    :returns: the score and the metrics used to calculate it
    """
    # Calculate various metrics
    binding_distances = stats.seq_diff(primer_locs)
    namespace = {
        'set_size': len(primer_set),
        'fg_dist_mean': stats.mean(binding_distances),
        'fg_dist_std': stats.stdev(binding_distances),
        'fg_dist_gini': stats.gini(binding_distances),
        'bg_dist_mean': bg_dist_mean,
        'fg_max_dist': max_dist,
        '__builtins__': None}
    permitted_var_str = ", ".join(
        [key for key in namespace.keys() if key is not "__builtins__"])
    score = None
    try:
        score = eval(expression, namespace, {'__builtins__': {}})
    except NameError as e:
        raise NameError(
            e.message +
            '. Permitted variables are %s. Refer to README or docs for help.'
            % permitted_var_str)
    del namespace['__builtins__']
    return score, namespace
Exemplo n.º 52
0
def scaleTestMinFinding():
	xs = range(10)
	distances = []
	noise = 3.5
	n = 1000000
	for i in range(n):
		a = random()
		b = random()
		c = random()
		ys = [x*x*a + x*b + c + random() * noise for x in xs]
		
		#print a, b, c, polynomialFit(xs, ys)[::-1]
		minExp, unc = polynomialFindMinimum(xs, ys, returnErrors = True)
		minCalc = -b/(2.0*a)
		dist = (minCalc - minExp) / unc
		#print minCalc, minExp, unc, dist
		distances.append(dist)
		
	print 'mean: %f' % stats.mean(distances)
	print 'stdDev: %f' % stats.stdDev(distances)
	for sigma in [1, 2, 3]:
		print 'With %d sigma: %f%%' % (sigma, 100.0 * sum([int(abs(d) < sigma) for d in distances]) / n)
	
	pylab.hist(distances, bins = 50, range = (-5, 5))
	pylab.show()
Exemplo n.º 53
0
def cuff_fpkm(fpkm_file, pseudocount):
    cuff = cufflinks.fpkm_tracking(fpkm_tracking_file)

    gene_fpkm = {}
    for gene_id in cuff.genes:
        gene_fpkm[gene_id] = stats.mean([math.log(pseudocount+e,2) for e in cuff.gene_expr(gene_id, not_found=0, fail=0)])

    return gene_fpkm
Exemplo n.º 54
0
def circmean(samples, high=2*pi, low=0):
    """Compute the circular mean for samples assumed to be in the range [low to high]
    """
    ang = (samples - low)*2*pi / (high-low)
    res = angle(stats.mean(exp(1j*ang)))
    if (res < 0):
        res = res + 2*pi
    return res*(high-low)/2.0/pi + low
Exemplo n.º 55
0
def scale(data_matrix):
    """returns means and standard deviations of each column"""
    num_rows, num_columns = la.shape(data_matrix)
    means = [stats.mean(la.get_c(data_matrix, j)) for j in range(num_columns)]
    stdevs = [
        stats.std_dev(la.get_c(data_matrix, j)) for j in range(num_columns)
    ]
    return means, stdevs
Exemplo n.º 56
0
def circmean(samples, high=2*pi, low=0):
    """Compute the circular mean for samples assumed to be in the range [low to high]
    """
    ang = (samples - low)*2*pi / (high-low)
    res = angle(stats.mean(exp(1j*ang)))
    if (res < 0):
        res = res + 2*pi
    return res*(high-low)/2.0/pi + low
Exemplo n.º 57
0
def _getRatingAverages():
    count = 0
    android_rating_total = 0
    ios_rating_total = 0
    android_ratings = []
    ios_ratings = []

    global android_rating_average
    global ios_rating_average

    global android_rating_median
    global ios_rating_median

    global android_rating_q1
    global ios_rating_q1

    global android_rating_q3
    global ios_rating_q3

    for app in collection_ios.find().batch_size(30):
        # count=count+1
        # android_rating_total=android_rating_total+float(app['android_ratingsAllVersions'].replace(',',''))
        # ios_rating_total=ios_rating_total+float(app['ios_ratingsAllVersions_new'].replace(',',''))
        android_ratings.append(float(app["android_ratingsAllVersions"].replace(",", "")))
        ios_ratings.append(float(app["ios_ratingsAllVersions_new"].replace(",", "")))

    android_rating_average = stats.mean(android_ratings)
    ios_rating_average = stats.mean(ios_ratings)

    android_rating_median = stats.median(android_ratings)
    ios_rating_median = stats.median(ios_ratings)

    android_rating_q1 = stats.quartiles(android_ratings)[0]
    ios_rating_q1 = stats.quartiles(ios_ratings)[0]

    android_rating_q3 = stats.quartiles(android_ratings)[1]
    ios_rating_q3 = stats.quartiles(ios_ratings)[1]

    print "ios stats"
    print ios_rating_q1
    print ios_rating_median
    print ios_rating_q3
    print "Android stats"
    print android_rating_q1
    print android_rating_median
    print android_rating_q3
Exemplo n.º 58
0
def scale(data_matrix):
    #各列の平均と標準偏差を返す
    num_rows, num_cols = shape(data_matrix)
    means = [mean(get_column(data_matrix, j)) for j in range(num_cols)]
    stdevs = [
        standard_deviation(get_column(data_matrix, j)) for j in range(num_cols)
    ]
    return means, stdevs