def testMean(self): """ Check that mean works as expected. """ self.assertAlmostEqual(stats.mean(self.dataA), self.meanA, 5) self.assertAlmostEqual(stats.mean(self.dataB), self.meanB, 5) return
def least_squares_fit(x, y): """given training values for x and y, find the least-squares values of alpha and beta""" beta = stats.correlation(x, y) * \ stats.standard_deviation(y) / stats.standard_deviation(x) alpha = stats.mean(y) - beta * stats.mean(x) return alpha, beta
def sync_check(): # print 'Checking sync...' max_mcnt_difference=4 mcnts=dict() mcnts_list=[] mcnt_tot=0 for f,fpga in enumerate(fpgas): mcnts[f]=dict() try: hdr_index=bram_oob[f]['hdr'].index(1) except: print 'ERR: No headers found in BRAM. Are the F engines properly connected?' exit() pkt_64bit = struct.unpack('>Q',bram_dmp['bram_msb'][f]['data'][(4*hdr_index):(4*hdr_index)+4]+bram_dmp['bram_lsb'][f]['data'][(4*hdr_index):(4*hdr_index)+4])[0] mcnts[f]['mcnt'] =(pkt_64bit&((2**64)-(2**16)))>>16 mcnts_list.append(mcnts[f]['mcnt']) # print '[%s] MCNT: %i'%(servers[f],mcnts[f]['mcnt']) mcnts['mean']=stats.mean(mcnts_list) mcnts['median']=stats.median(mcnts_list) mcnts['mode']=stats.mode(mcnts_list) mcnts['modalmean']=stats.mean(mcnts['mode'][1]) # print 'mean: %i, median: %i, modal mean: %i mode:'%(mcnts['mean'],mcnts['median'],mcnts['modalmean']),mcnts['mode'] for f,fpga in enumerate(fpgas): if mcnts[f]['mcnt']>(mcnts['modalmean']+max_mcnt_difference) or mcnts[f]['mcnt'] < (mcnts['modalmean']-max_mcnt_difference): print '%s OUT OF SYNC!!'%servers[f] mcnts[f]['sync_status']='FAIL with error of %i'%(mcnts[f]['mcnt']-mcnts['modalmean']) else: mcnts[f]['sync_status']='PASS' return mcnts
def compute_stats(te_diffs, gene_diffs, plot_dir): pvals = [] table_lines = [] for te_or in te_diffs: rep, fam, orient = te_or for sample_key in te_diffs[te_or]: sample1, sample2 = sample_key # if enough data if len(te_diffs[te_or][sample_key]) >= 10: wo_te = list((gene_diffs[sample_key] - te_diffs[te_or][sample_key]).elements()) w_te = list(te_diffs[te_or][sample_key].elements()) wo_mean = stats.mean(wo_te) w_mean = stats.mean(w_te) z, p = stats.mannwhitneyu(w_te, wo_te) cols = (rep, fam, orient, sample1, sample2, len(w_te), w_mean, wo_mean, z, p) table_lines.append('%-17s %-17s %1s %-10s %-10s %6d %9.2f %9.2f %8.2f %10.2e' % cols) pvals.append(p) # plot ... if rep in ['*'] and fam in ['*','LINE/L1','SINE/Alu','LTR/ERV1','LTR/ERVL-MaLR','LINE/L2','LTR/ERVL','SINE/MIR','DNA/hAT-Charlie','LTR/ERVK','DNA/TcMar-Tigger']: out_pdf = '%s/%s_%s_%s_%s-%s.pdf' % (plot_dir,rep.replace('/','-'),fam.replace('/','-'),orient,sample1,sample2) cdf_plot(te_or, w_te, wo_te, out_pdf) return table_lines, pvals
def fit(self, X, y): n = len(X) # _x e _y serao as medias de x e y. _x = st.mean(X) _y = st.mean(y) self.__class__.b1 = np.sum((X - _x) * (y - _y)) / np.sum((X - _x)**2) self.__class__.b0 = _y - (self.__class__.b1 * _x)
def statsex(self, objects): """ Do some statistics on a source list Return dictionary """ import stats, pstat # Return if we have no objects if len(objects) == 0: return 0 # Define dictionary to hold statistics stat = {} # Get number of objects stat['N'] = str(len(objects)) # Define list (float) of FWHM values fwhm = [ float(obj[7]) for obj in objects ] # Define list (float) of ELLIPTICITY values el = [ float(obj[6]) for obj in objects ] # Define list (float) of THETA_IMAGE values pa = [ float(obj[5]) for obj in objects ] # Define list (float) of 'Stella-like' values stella = [ float(obj[9]) for obj in objects ] # Create a histogram of FWHM values of binsize 1 pixel hfwhm = stats.histogram(fwhm,40,[0,40])[0] stat['medianFWHM'] = "%.2f" % stats.median(fwhm) stat['meanFWHM'] = "%.2f" % stats.mean(fwhm) stat['modeFWHM'] = "%.2f" % float(hfwhm.index(max(hfwhm))+0.5) try: stat['stdevFWHM'] = "%.2f" % stats.stdev(fwhm) except ZeroDivisionError: stat['stdevFWHM'] = '0.00'; stat['medianEL'] = "%.2f" % stats.median(el) stat['meanEL'] = "%.2f" % stats.mean(el) try: stat['stdevEL'] = "%.2f" % stats.stdev(el) except ZeroDivisionError: stat['stdevEL'] = '0.00' # Histogram of Ellipticity PA (-180 to 180, bins of 45 deg) #stat['histoTHETA'] = stats.histogram(pa,8,[-180,180])[0] # Histogram of Stellarity (0 to 1, bins of 0.05) #stat['histoStella'] = stats.histogram(stella,20,[0,1.01])[0] return stat
def testOnTuples(self): """ Checks that methods also work on tuples. """ self.assertAlmostEqual(stats.mean(tuple(self.dataA)), self.meanA, 5) self.assertAlmostEqual(stats.mean(tuple(self.dataB)), self.meanB, 5) self.assertAlmostEqual(stats.stddev(tuple(self.dataA)), self.stddevA, 5) self.assertAlmostEqual(stats.stddev(tuple(self.dataB)), self.stddevB, 5) return
def least_squares_fit(xs: Vector, ys: Vector) -> Tuple[float, float]: """ Given a dataset represented by xs and ys, return the alpha, beta that provide the least squared error fit for a function y_i = alpha * x_i + beta """ alpha = correlation(xs, ys) * standard_deviation(ys) / standard_deviation(xs) beta = mean(ys) - alpha * mean(xs) return alpha, beta
def test_validator_working_correct_negativa(self): with self.assertRaises(TypeError) as raised_exception: stats.missing_data(2, 3.5) self.assertEqual(raised_exception.exception.args[0], "input type must be list") with self.assertRaises(ValueError) as raised_exception: stats.mean([1, 2, (2, 3.5)], [3.5, 2, 1]) self.assertEqual(raised_exception.exception.args[0], "value in x must be numeric")
def findLSRline(dp): if !checkDPFormat(dp): print "ERROR: invalid dotplot format in findLSRline" return "invalid dotplot format" r = CorrCoef(dp) x = [i[0] for i in dp] sx = stats.stdDev(x) y = [i[1] for i in dp] sy = stats.stdDev(y) b = (r * sy) / sx a = stats.mean(y) - b * stats.mean(x)
def corr(xdata, ydata): """corr(xydata) -> float corr(xdata, ydata) -> float Return the sample Pearson's Correlation Coefficient of (x,y) data. If ydata is None or not given, then xdata must be an iterable of (x, y) pairs. Otherwise, both xdata and ydata must be iterables of values, which will be truncated to the shorter of the two. >>> corr([(0.1, 2.3), (0.5, 2.7), (1.2, 3.1), (1.7, 2.9)]) ... #doctest: +ELLIPSIS 0.827429009335... The Pearson correlation is +1 in the case of a perfect positive correlation (i.e. an increasing linear relationship), -1 in the case of a perfect anti-correlation (i.e. a decreasing linear relationship), and some value between -1 and 1 in all other cases, indicating the degree of linear dependence between the variables. >>> xdata = [1, 2, 3, 4, 5, 6] >>> ydata = [2*x for x in xdata] # Perfect correlation. >>> corr(xdata, ydata) 1.0 >>> corr(xdata, [5-y for y in ydata]) # Perfect anti-correlation. -1.0 If there are not at least two data points, or if either all the x values or all the y values are equal, StatsError is raised. """ n = len(xdata) assert n == len(ydata) if n < 2: raise StatsError( 'correlation requires at least two data points, got %d' % n) # First pass is to determine the means. mx = stats.mean(xdata) my = stats.mean(ydata) # Second pass to determine the standard deviations. sx = stats.stdev(xdata, mx) sy = stats.stdev(ydata, my) if sx == 0: raise StatsError('all x values are equal') if sy == 0: raise StatsError('all y values are equal') # Third pass to calculate the correlation coefficient. ap = add_partial total = [] for x, y in zip(xdata, ydata): term = ((x-mx)/sx) * ((y-my)/sy) ap(term, total) r = math.fsum(total)/(n-1) assert -1 <= r <= r return r
def muestroEspeYVar(valores, esperanzaTeorica, alfa): print("El calculo de la Esperanza en la muestra es:", stats.mean(valores)) print("Teoricamente la Esperanza es:", esperanzaTeorica) print("El calculo de la Varianza en la muestra es:", numpy.var(valores)) print("Teoricamente la Varianza es:", calcularVarianza(alfa)) print("El calculo del Desvio Estandar en la muestra es:", math.sqrt(numpy.var(valores))) print("Teoricamente es:", math.sqrt(calcularVarianza(alfa))) print("El parametro alfa de la muestra es :", 1 / (stats.mean(valores))) print("Teoricamente es :", alfa) print("\n")
def get_modules(self, cutoff=.05): modules = [] for e in self: if e.val < min(e.lo_min, e.hi_min, cutoff): if self.datatype=="continuous": e.desc = "lo" if mean(e.a) < mean(e.b) else "hi" else: e.desc = "enriched" modules.append(e) else: modules += e.get_modules(cutoff=cutoff) return modules
def collect_mean(input_list): """ Collect time execution of mean of each module """ begin_py_mean = clock() py_mean(input_list) end_py_mean = clock() begin_mean = clock() mean(input_list) end_mean = clock() times = format_times(end_py_mean - begin_py_mean, end_mean - begin_mean) save_times(times, logs['mean'])
def _SP(xdata, mx, ydata, my): """SP = sum of product of deviations. Helper function for calculating covariance directly. """ if mx is None: # Two pass algorithm. xdata = as_sequence(xdata) mx = stats.mean(xdata) if my is None: # Two pass algorithm. ydata = as_sequence(ydata) my = stats.mean(ydata) return _generalised_sum(zip(xdata, ydata), lambda t: (t[0]-mx)*(t[1]-my))
def corr(x, y): N = len(x) if len(y) != N: raise Exception( "Sequences must be of the same length. X length: {0} ; Y length {1}" .format(N, len(y))) else: sum = 0 for index, xi in enumerate(x): sum += xi * y[index] r = (sum - N * stats.mean(x) * stats.mean(y)) / ( (N - 1) * stats.stdev(x) * stats.stdev(y)) return r
def check_basic(self): a = [3,4,5,10,-3,-5,6] af = [3.,4,5,10,-3,-5,-6] Na = len(a) Naf = len(af) mn1 = 0.0 for el in a: mn1 += el / float(Na) assert_almost_equal(stats.mean(a),mn1,11) mn2 = 0.0 for el in af: mn2 += el / float(Naf) assert_almost_equal(stats.mean(af),mn2,11)
def _getRatingStarsAverages(): android_ratings = [] ios_ratings = [] android_stars = [] ios_stars = [] global android_ratings_average global android_stars_average global ios_ratings_average global ios_stars_average print collection_ios.count( { 'android_ratings_float': { '$gte': 1 }, 'ios_ratings_float': { '$gte': 1 } }, no_cursor_timeout=True) for app in collection_ios.find( { 'android_ratings_float': { '$gte': 1 }, 'ios_ratings_float': { '$gte': 1 } }, no_cursor_timeout=True): android_ratings.append(app['android_ratings_float']) android_stars.append(app['android_stars_float']) #ios stats ios_ratings.append(app['ios_ratings_float']) ios_stars.append(app['ios_stars_float']) android_ratings_average = stats.mean(android_ratings) android_stars_average = stats.mean(android_stars) ios_ratings_average = stats.mean(ios_ratings) ios_stars_average = stats.mean(ios_stars) print "android" print android_ratings_average print android_stars_average print "ios" print ios_ratings_average print ios_stars_average
def check_2d(self): a = [[1.0, 2.0, 3.0], [2.0, 4.0, 6.0], [8.0, 12.0, 7.0]] A = array(a,'d') N1,N2 = (3,3) mn1 = zeros(N2,'d') for k in range(N1): mn1 += A[k,:] / N1 allclose(stats.mean(a),mn1,rtol=1e-13,atol=1e-13) mn2 = zeros(N1,'d') for k in range(N2): mn2 += A[:,k] / N2 allclose(stats.mean(a,axis=0),mn2,rtol=1e-13,atol=1e-13)
def parameter_cedo_meanmean(self, TTL): """ Computes the CEDO distribution parameter with the mean of the mean IMT per each pair of meeting nodes. """ raise Exception() #deprecated logger.debug("Estimating CEDO mean of means parameter.") imts = [] for timestamp, l in self._all_im.values(): if l: imts.append(mean(l)) m = imts and mean(imts) or sys.maxsize return self.apply_ndp(TTL, m)
def _getCollection(): print "Sanity Check" print "android" print android_ratings_average print android_stars_average print "ios" print ios_ratings_average print ios_stars_average ratings_combined_average = stats.mean( [android_ratings_average, ios_ratings_average]) stars_combined_average = stats.mean( [android_stars_average, ios_stars_average]) print "average:" print ratings_combined_average print stars_combined_average for post in collection_ios.find( { 'android_ratings_float': { '$gte': 1 }, 'ios_ratings_float': { '$gte': 1 } }, no_cursor_timeout=True): android_app_id = post['android_app_id'] android_app_rating = post['android_ratings_float'] android_app_star = post['android_stars_float'] ios_app_rating = post['ios_ratings_float'] ios_app_star = post['ios_stars_float'] successAndroid = (float( _sucess(android_app_rating, android_app_star, ratings_combined_average, stars_combined_average)) / 5) * 100 successIos = (float( _sucess(ios_app_rating, ios_app_star, ratings_combined_average, stars_combined_average)) / 5) * 100 collection_ios.find_one_and_update( {"android_app_id": android_app_id}, { '$set': { 'android_success_average': successAndroid, 'ios_success_average': successIos } })
def calculateDividingLine(gestures, maybeGestures, nonGestures): numFolds = min(TESTING_FOLDS, len(gestures)) allGestureDistances = [] allNonGestureDistances = [] for foldNum in range(numFolds): trainingGestures = [gesture for i, gesture in enumerate(gestures) if i % numFolds != foldNum] testingGestures = [localTimeGestures for i, localTimeGestures in enumerate(maybeGestures) if i % numFolds == foldNum] #print 'train, test #s: ', len(trainingGestures), len(testingGestures) #make a distance calculator based on the subset of hte training data distanceCalculator = gestureDistanceCalculator.GestureDistanceCalculator(trainingGestures) #each localTimeGestures is a list of the closest times to when a gesture was identified in training #since the output can be triggered at slightly different times, we should look for a minimum near where #the gesture is known to have happened, compared to the training gestures gestureDistances = [] #print testingGestures for localTimeGestureSet in testingGestures: closestDistance = min(map(distanceCalculator.getDistance, localTimeGestureSet)) gestureDistances.append(closestDistance) #gestureDistances = map(distanceCalculator.getDistance, testingGestures) #print gestureDistances nonGestureDistances = map(distanceCalculator.getDistance, nonGestures) #print gestureDistances allGestureDistances += gestureDistances allNonGestureDistances += nonGestureDistances #break #print len(allGestureDistances), len(allNonGestureDistances) print 'means: ', stats.mean(allGestureDistances), stats.mean(allNonGestureDistances) print 'std devs: ', stats.stdDev(allGestureDistances), stats.stdDev(allNonGestureDistances) meanGesture = stats.mean(allGestureDistances) meanNon = stats.mean(allNonGestureDistances) devGesture = stats.stdDev(allGestureDistances) devNon = stats.stdDev(allNonGestureDistances) line = (meanGesture * devNon + meanNon * devGesture) / ( devGesture + devNon) #print line return line
def liver(request): expressLevelsNishi = naturallysortedexpressionlist(Expression.objects.filter(organ='Liver', experiment='Nishimura')) expressLevelsPMT = naturallysortedexpressionlist(Expression.objects.filter(organ='Liver', experiment__startswith='PMT Sample')) expressBiotroveTransporters = naturallysortedtransporterlist(Transporter.objects.filter(expression__organ='Liver',expression__experiment__startswith='PMT Biotrove').distinct()) important = Transporter.objects.filter(organ__name='Liver') importantNames = [] for x in important: importantNames.append(str(x.symbol)) synquery = Transporter.objects.all() syns = {} for x in synquery: syns[x.symbol] = x.synonyms #Calculate mean expression across all PMT samples pmtTableValues = [] for x in range(len(expressLevelsPMT)/3): build = [] for y in range(3): build.append(expressLevelsPMT[x*3+y].value) avg = stats.mean(build) stdev = stats.stdev(build) id = expressLevelsPMT[x*3].trans pmtTableValues.append([id] + build + [avg, stdev]) #Calculate median and quartiles across biotrove samples biotroveTableValues = [] for x in expressBiotroveTransporters: values = Expression.objects.filter(organ='Liver', experiment__startswith='PMT Biotrove', trans=x).values_list('value',flat='True').order_by('value') build = [] build.append(x.symbol) build.append(quartiles(values,1)) build.append(quartiles(values,2)) build.append(quartiles(values,3)) biotroveTableValues.append(build) return render_to_response('liver.html', {'expressionNishi': expressLevelsNishi, 'expressionPMT': pmtTableValues, 'organ': 'Liver', 'syns': syns, 'important': importantNames, 'expressionBiotrove': biotroveTableValues})
def linear_regression(x, y): """ See: https://www.khanacademy.org/math/probability/regression/regression-correlation/v/regression-line-example """ xy_mean = s.xy_mean(x, y) print xy_mean x_mean = s.mean(x) y_mean = s.mean(y) x_squared_mean = s.mean([xi ** 2 for xi in x]) # Slope. a = (x_mean * y_mean - xy_mean) / (x_mean ** 2 - x_squared_mean) # Intercept. b = y_mean - a * x_mean return (a, b)
def ttest_1samp(a, popmean): t = (stats.mean(a) - popmean) / (stats.stddev(a) / len(a) ** 0.5) v = len(a) - 1.0 p = gamma((v + 1) / 2) / ((v * pi) ** 0.5 * gamma(v / 2)) * (1 + t ** 2 / v) ** (-(v + 1) / 2) return ( [t, None], [p, None])
def main(): [(stat, first), (stat, second)] = load_stats(sys.argv[1:]) # Attempt to increase robustness by dropping the outlying 10% of values. first = trim(first, 0.1) second = trim(second, 0.1) fmean = stats.mean(first) smean = stats.mean(second) p = 1 - ttest_1samp(second, fmean)[1][0] if p >= 0.95: # rejected the null hypothesis print sys.argv[1], 'mean of', fmean, 'differs from', sys.argv[2], 'mean of', smean, '(%2.0f%%)' % (p * 100,) else: # failed to reject the null hypothesis print 'cannot prove means (%s, %s) differ (%2.0f%%)' % (fmean, smean, p * 100,)
def circvar(samples, high=2*pi, low=0): """Compute the circular variance for samples assumed to be in the range [low to high] """ ang = (samples - low)*2*pi / (high-low) res = stats.mean(exp(1j*ang)) V = 1-abs(res) return ((high-low)/2.0/pi)**2 * V
def circstd(samples, high=2*pi, low=0): """Compute the circular standard deviation for samples assumed to be in the range [low to high] """ ang = (samples - low)*2*pi / (high-low) res = stats.mean(exp(1j*ang)) V = 1-abs(res) return ((high-low)/2.0/pi) * sqrt(V)
def integrate_box_1d(self, low, high): """Computes the integral of a 1D pdf between two bounds. Parameters ---------- low : scalar lower bound of integration high : scalar upper bound of integration Returns ------- value : scalar the result of the integral Raises ------ ValueError if the KDE is over more than one dimension. """ if self.d != 1: raise ValueError("integrate_box_1d() only handles 1D pdfs") stdev = ravel(sqrt(self.covariance))[0] normalized_low = ravel((low - self.dataset) / stdev) normalized_high = ravel((high - self.dataset) / stdev) value = stats.mean( special.ndtr(normalized_high) - special.ndtr(normalized_low)) return value
def diff_fpkm(diff_file, pseudocount): gene_fpkms = {} diff_in = open(diff_file) diff_in.readline() for line in diff_in: a = line.split('\t') gene_id = a[0] sample1 = a[4] sample2 = a[5] status = a[6] fpkm1 = float(a[7]) fpkm2 = float(a[8]) if status == 'OK': if gene_id in gene_fpkms: gene_fpkms[gene_id] += [fpkm1, fpkm2] else: gene_fpkms[gene_id] = [fpkm1, fpkm2] diff_in.close() gene_fpkm = {} for gene_id in gene_fpkms: log_fpkms = [math.log(fpkm+pseudocount,2) for fpkm in gene_fpkms[gene_id]] gene_fpkm[gene_id] = stats.mean(log_fpkms) return gene_fpkm
def muestroEsperanzayVar(valores,alfa,ka): print("El calculo de la esperanza en la muestra es:",stats.mean(valores)) print("Teoricamente la Esperanza es = ",ka/alfa) print("El calculo de la Varianza es = ",numpy.var(valores)) print("Teoricamente la Varianza es = ",ka/(alfa**2)) print("El Desvio estandar es =",math.sqrt(numpy.var(valores))) print("Teoricamente es =",math.sqrt(ka/(alfa**2)))
def scale(data_matrix): num_rows, num_cols = shape(data_matrix) means = [mean(get_column(data_matrix,j)) for j in range(num_cols)] stdevs = [standard_deviation(get_column(data_matrix,j)) for j in range(num_cols)] return means, stdevs
def sample_stats(n): """ Compute mean and standard deviation on a bunch of random numbers """ sample = tuple(random.random() for i in range(n)) return mean(sample), sd(sample)
def main(): [(_ignore_stat, first), (_ignore_stat, second)] = load_stats(sys.argv[1:]) # Attempt to increase robustness by dropping the outlying 10% of values. first = trim(first, 0.1) second = trim(second, 0.1) fmean = stats.mean(first) smean = stats.mean(second) p = ttest_1samp(second, fmean)[1] if p >= 0.95: # rejected the null hypothesis print(sys.argv[1], 'mean of', fmean, 'differs from', sys.argv[2], 'mean of', smean, '(%2.0f%%)' % (p * 100,)) else: # failed to reject the null hypothesis print('cannot prove means (%s, %s) differ (%2.0f%%)' % (fmean, smean, p * 100,))
def _test_mem_bounded_golden_values_fields(activeGpuCount, memUsageTS, tailStart): goldenVal = 1148 # 1 KB plus some swag per field instance (Global, GPU). This is based off of the keyed vector block size and default number of blocks tolerance = 0.10 # low tolerance, amount of records stored is bounded for fieldId, series in memUsageTS.fieldVals.items(): seriesTail = series[tailStart:] # skip fields that are not implemented if sum(seriesTail) == 0: continue #Don't check the size of binary fields since it's arbitrary per fieldId if helper_field_has_variable_size(fieldId): logger.info("Skipping variable-sized fieldId %d" % fieldId) continue mean = stats.mean(seriesTail) lowLimit = (1 - tolerance) * goldenVal highLimit = (1 + tolerance) * goldenVal * activeGpuCount assert lowLimit < mean < highLimit, \ 'Expected field "%d" memory usage to be between %s and %s but got %s' % \ (fieldId, lowLimit, highLimit, mean) \ + 'If this new value is expected, change the golden value used for comparison.'
def ttest_1samp(a, popmean): # T statistic - http://mathworld.wolfram.com/Studentst-Distribution.html t = (stats.mean(a) - popmean) / (stats.stddev(a) / len(a)**0.5) v = len(a) - 1.0 p = gamma((v + 1) / 2) / ( (v * pi)**0.5 * gamma(v / 2)) * (1 + t**2 / v)**(-(v + 1) / 2) return (t, p)
def wordSummary(db, table): f = open("wordSummary_%s.txt" % table, 'w') d = {} header = "word, length, rtAVG, rtSTD, total, percCorrect\n" f.write(header) wordList = [] sql = "SELECT DISTINCT(word) FROM %s" % table for w in db.query(sql): wordList.append(w[0]) for word in wordList: sql = "SELECT RT FROM %s WHERE word = '%s' AND incorrect = 0" % (table, word) wordLen = len(word) rtList = [] zList = [] for rt in db.query(sql): rtList.append(rt[0]) rtAVG = stats.mean(rtList) rtSTD = stats.samplestdev(rtList) total = db.query("SELECT COUNT(*) FROM %s WHERE word = '%s'" % (table, word))[0][0] percCorrect = float(len(rtList)) / float(total) * 100.0 print len(rtList), total myString = "%s, %i, %f, %f, %i, %f\n" % (word, wordLen, rtAVG, rtSTD, total, percCorrect) print myString f.write(myString) f.close()
def optimal_discard_6(hand): # loop over all possible discard options possible_hands_scores = {} for i in range(6): for j in range(6): if j > i: possible_hand = [] counter = -1 for card in hand['hand6']: counter += 1 if counter == i or counter == j: continue possible_hand.append(card) # get total list of possible scores for that hand point_list = check_all_cuts(possible_hand) possible_hands_scores[tuple(possible_hand)] = point_list max_expected_points = 0 best_hand = [] for poss_hand, point_list in possible_hands_scores.iteritems(): expected_points = mean(point_list) # print hand, expected_points if expected_points > max_expected_points: max_expected_points = expected_points best_hand = poss_hand discard = list(set(hand['hand6']) - set(best_hand)) # print("Best Hand = {} for {} points".format(best_hand, max_expected_points)) return list(best_hand), discard
def print_latex_stats(diffs, label): print '%s & %.3f & %.3f & %.3f & %.3f \\\\' % ( label, min(diffs) / 1000.0, max(diffs) / 1000.0, stats.mean(diffs) / 1000.0, stats.stdev(diffs) / 1000.0)
def print_stats(L): """ Display some information about the lists """ print "Let's compute some statistics..." print "\tMean: %d" % mean(L) print "\tStandard deviation: %d" % std(L) print "\t# of outliers: %d" % (len(L) - len(remove_outliers(L,1)))
def test_mean1(): obs = mean([0, 0, 0, 0]) exp = 0 assert_equal(obs, exp) obs = mean([0, 200]) exp = 100 assert_equal(obs, exp) obs = mean([0, -200]) exp = -100 assert_equal(obs, exp) obs = mean([0]) exp = 0 assert_equal(obs, exp)
def integrate_box_1d(self, low, high): """Computes the integral of a 1D pdf between two bounds. Parameters ---------- low : scalar lower bound of integration high : scalar upper bound of integration Returns ------- value : scalar the result of the integral """ if self.d != 1: raise ValueError("integrate_box_1d() only handles 1D pdfs") stdev = ravel(sqrt(self.covariance))[0] normalized_low = ravel((low - self.dataset)/stdev) normalized_high = ravel((high - self.dataset)/stdev) value = stats.mean(special.ndtr(normalized_high) - special.ndtr(normalized_low)) return value
def testVariance(self): data = [1, 2, 3] assert stats.mean(data) == 2 self.assertEqual(stats.pvariance(data), 2/3) self.assertEqual(stats.variance(data), 1.0) self.assertEqual(stats.pstdev(data), math.sqrt(2/3)) self.assertEqual(stats.stdev(data), 1.0)
def diff_fpkm(diff_file, pseudocount): gene_fpkms = {} diff_in = open(diff_file) diff_in.readline() for line in diff_in: a = line.split('\t') gene_id = a[0] sample1 = a[4] sample2 = a[5] status = a[6] fpkm1 = float(a[7]) fpkm2 = float(a[8]) if status == 'OK': if gene_id in gene_fpkms: gene_fpkms[gene_id] += [fpkm1, fpkm2] else: gene_fpkms[gene_id] = [fpkm1, fpkm2] diff_in.close() gene_fpkm = {} for gene_id in gene_fpkms: log_fpkms = [ math.log(fpkm + pseudocount, 2) for fpkm in gene_fpkms[gene_id] ] gene_fpkm[gene_id] = stats.mean(log_fpkms) return gene_fpkm
def default_score_set(expression, primer_set, primer_locs, max_dist, bg_dist_mean): """Evaluate an expression using the provided values and a set of metrics. :returns: the score and the metrics used to calculate it """ # Calculate various metrics binding_distances = stats.seq_diff(primer_locs) namespace = { 'set_size': len(primer_set), 'fg_dist_mean': stats.mean(binding_distances), 'fg_dist_std': stats.stdev(binding_distances), 'fg_dist_gini': stats.gini(binding_distances), 'bg_dist_mean': bg_dist_mean, 'fg_max_dist': max_dist, '__builtins__': None} permitted_var_str = ", ".join( [key for key in namespace.keys() if key is not "__builtins__"]) score = None try: score = eval(expression, namespace, {'__builtins__': {}}) except NameError as e: raise NameError( e.message + '. Permitted variables are %s. Refer to README or docs for help.' % permitted_var_str) del namespace['__builtins__'] return score, namespace
def scaleTestMinFinding(): xs = range(10) distances = [] noise = 3.5 n = 1000000 for i in range(n): a = random() b = random() c = random() ys = [x*x*a + x*b + c + random() * noise for x in xs] #print a, b, c, polynomialFit(xs, ys)[::-1] minExp, unc = polynomialFindMinimum(xs, ys, returnErrors = True) minCalc = -b/(2.0*a) dist = (minCalc - minExp) / unc #print minCalc, minExp, unc, dist distances.append(dist) print 'mean: %f' % stats.mean(distances) print 'stdDev: %f' % stats.stdDev(distances) for sigma in [1, 2, 3]: print 'With %d sigma: %f%%' % (sigma, 100.0 * sum([int(abs(d) < sigma) for d in distances]) / n) pylab.hist(distances, bins = 50, range = (-5, 5)) pylab.show()
def cuff_fpkm(fpkm_file, pseudocount): cuff = cufflinks.fpkm_tracking(fpkm_tracking_file) gene_fpkm = {} for gene_id in cuff.genes: gene_fpkm[gene_id] = stats.mean([math.log(pseudocount+e,2) for e in cuff.gene_expr(gene_id, not_found=0, fail=0)]) return gene_fpkm
def circmean(samples, high=2*pi, low=0): """Compute the circular mean for samples assumed to be in the range [low to high] """ ang = (samples - low)*2*pi / (high-low) res = angle(stats.mean(exp(1j*ang))) if (res < 0): res = res + 2*pi return res*(high-low)/2.0/pi + low
def scale(data_matrix): """returns means and standard deviations of each column""" num_rows, num_columns = la.shape(data_matrix) means = [stats.mean(la.get_c(data_matrix, j)) for j in range(num_columns)] stdevs = [ stats.std_dev(la.get_c(data_matrix, j)) for j in range(num_columns) ] return means, stdevs
def _getRatingAverages(): count = 0 android_rating_total = 0 ios_rating_total = 0 android_ratings = [] ios_ratings = [] global android_rating_average global ios_rating_average global android_rating_median global ios_rating_median global android_rating_q1 global ios_rating_q1 global android_rating_q3 global ios_rating_q3 for app in collection_ios.find().batch_size(30): # count=count+1 # android_rating_total=android_rating_total+float(app['android_ratingsAllVersions'].replace(',','')) # ios_rating_total=ios_rating_total+float(app['ios_ratingsAllVersions_new'].replace(',','')) android_ratings.append(float(app["android_ratingsAllVersions"].replace(",", ""))) ios_ratings.append(float(app["ios_ratingsAllVersions_new"].replace(",", ""))) android_rating_average = stats.mean(android_ratings) ios_rating_average = stats.mean(ios_ratings) android_rating_median = stats.median(android_ratings) ios_rating_median = stats.median(ios_ratings) android_rating_q1 = stats.quartiles(android_ratings)[0] ios_rating_q1 = stats.quartiles(ios_ratings)[0] android_rating_q3 = stats.quartiles(android_ratings)[1] ios_rating_q3 = stats.quartiles(ios_ratings)[1] print "ios stats" print ios_rating_q1 print ios_rating_median print ios_rating_q3 print "Android stats" print android_rating_q1 print android_rating_median print android_rating_q3
def scale(data_matrix): #各列の平均と標準偏差を返す num_rows, num_cols = shape(data_matrix) means = [mean(get_column(data_matrix, j)) for j in range(num_cols)] stdevs = [ standard_deviation(get_column(data_matrix, j)) for j in range(num_cols) ] return means, stdevs