def calc(): # read data rawdata = readcsv(DATA_PATH) # calculation DATA = [] S = [] for i in xrange(0, len(rawdata[0])): d = [x[i] for x in rawdata] DATA.append(d) S.append(Stat(d)) data = list(itertools.chain.from_iterable(DATA)) # flattend list from DATA s = Stat(data) SUM_OF_SQUARES = sum((x.sum_of_squares for x in S)) SS = [((x.mean-s.mean)**2)*x.size for x in S] ss = sum(SS) sum_of_squares = (ss, SUM_OF_SQUARES, s.sum_of_squares) degrees = (len(S)-1, sum([x.size-1 for x in S]), s.size-1) mean_squares = [sum_of_squares[i]/degrees[i] for i in (0, 1)] F_value = mean_squares[0] / mean_squares[1] F_dist = F_distribution(degrees[0], degrees[1]) F_dist_001 = F_distribution(degrees[0], degrees[1], alpha=0.01) # output print '[Pure Python]' print 'F value:', F_value print 'F dist(0.05):', F_dist, abs(F_value)>F_dist print 'F dist(0.01):', F_dist_001, abs(F_value)>F_dist_001 print
def calc(): # read data data = readcsv(DATA_PATH) d0 = [x[1] for x in data] d1 = [x[2] for x in data] # calculation s0 = Stat(d0) s1 = Stat(d1) variance_estimation = (s0.variance*s0.size+s1.variance*s1.size)/((s0.size-1)+(s1.size-1)) standard_error = math.sqrt(variance_estimation*((1.0/s0.size)+(1.0/s1.size))) df = (s0.size - 1) + (s1.size - 1) t_dist = t_distribution(df, alpha=0.05) t_dist_001 = t_distribution(df, alpha=0.01) diff_mean = s0.mean - s1. mean confidence_interval = (diff_mean-standard_error*t_dist, diff_mean+standard_error*t_dist) t_value = diff_mean / standard_error # output print '[Pure Python]' print 't value:', t_value print 't dist(0.05):', t_dist, abs(t_value)>t_dist print 't dist(0.01):', t_dist_001, abs(t_value)>t_dist_001 print
def calc(): # read data rawdata = readcsv(DATA_PATH) # pre-calculation x = [z[1] for z in rawdata] y = [z[2] for z in rawdata] # calculation cor, a, b = linear_regression(x, y) # output print '[Pure Python]' print 'Y = %f + %fX (r=%0.2f)' % (a, b, cor) print return x, y, a, b, cor
def calc(): # read data data = readcsv(DATA_PATH) d = [x[1]-x[2] for x in data] # calculation s = Stat(d) df = s.size - 1 t_dist = t_distribution(df, alpha=0.05) t_dist_001 = t_distribution(df, alpha=0.01) t_value = s.mean / s.standard_error # output print '[Pure Python]' print 't value:', t_value print 't dist(0.05):', t_dist, abs(t_value)>t_dist print 't dist(0.01):', t_dist_001, abs(t_value)>t_dist_001 print