def chisquare(matrix): sum_all = float(sum([sum(x) for x in matrix])) def sum_col(i): return sum(matrix[i]) def sum_row(i): return sum([x[i] for x in matrix]) dim_x = len(matrix) dim_y = len(matrix[0]) matrix_expected = [] for i in range(0, dim_x): col = [] for j in range(0, dim_y): element = sum_col(i)*sum_row(j)/sum_all col.append(element) matrix_expected.append(col) matrix_chi = [] for i in range(0, dim_x): col = [] for j in range(0, dim_y): element = matrix[i][j]-matrix_expected[i][j] element *= element divide_by = matrix_expected[i][j] if matrix_expected[i][j]!=0 else 1 element /= divide_by col.append(element) matrix_chi.append(col) chi = sum([sum(x) for x in matrix_chi]) return chi, stats.chisqprob(chi, (dim_x-1)*(dim_y-1))
def chisquare(matrix): sum_all = float(sum([sum(x) for x in matrix])) def sum_col(i): return sum(matrix[i]) def sum_row(i): return sum([x[i] for x in matrix]) dim_x = len(matrix) dim_y = len(matrix[0]) matrix_expected = [] for i in range(0, dim_x): col = [] for j in range(0, dim_y): element = sum_col(i) * sum_row(j) / sum_all col.append(element) matrix_expected.append(col) matrix_chi = [] for i in range(0, dim_x): col = [] for j in range(0, dim_y): element = matrix[i][j] - matrix_expected[i][j] element *= element divide_by = matrix_expected[i][ j] if matrix_expected[i][j] != 0 else 1 element /= divide_by col.append(element) matrix_chi.append(col) chi = sum([sum(x) for x in matrix_chi]) return chi, stats.chisqprob(chi, (dim_x - 1) * (dim_y - 1))
def likelihoodRatioTest(rawString_n, rawString_d, df): test_stat = -2 * (ln(rawString_n) - ln(rawString_d)) print round(test_stat, 2) return stats.chisqprob(test_stat, df)
def calcLRT(LRT_FILE, results): file = open(LRT_FILE, 'w') for i in range(1, len(results)): df_im1 = countFreeParameters(results[i-1]) df_i = countFreeParameters(results[i]) likelihood_im1 = results[i-1]['likelihood'] likelihood_i = results[i]['likelihood'] test_stat = -2 * (ln(likelihood_i) - ln(likelihood_im1)) delta_df = df_im1 - df_i test_prob = stats.chisqprob(test_stat, delta_df) # print entries for latex table file.write(results[i]['model_name'] + " " + "&" + " " + "some description" + " " + "&" + " " + str(df_i) + " " + "&" + " " + str(delta_df) + " " + "&" + " " + likelihood_i + " " + "&" + " " + str(round(test_stat, 2)) + " " + "&" + " " + str(round(test_prob, 4)) + " " + "\\\\" + "\n") print "LTR saved to file '" + LRT_FILE + "'"
data = [float(d) for d in open(argv[1], 'r')] ## compute unimodal model uni = Gaussian(mean(data), std(data)) uni_loglike = sum(log(uni.pdf(d)) for d in data) print 'Best singleton: {0}'.format(uni) print 'Null LL: {0:4.6}'.format(uni_loglike) ## find best one # set defaults best_gaus = None best_loglike = float('-inf') stderr.write('Computing best model with random restarts...\n') for i in xrange(_rand_restarts): mix = GaussianMixture(data, _mu_min, _mu_max, _sigma_min, _sigma_max) # I catch division errors from bad starts, and just throw them out... for i in xrange(_n_iterations): try: mix.iterate() if mix.loglike > best_loglike: best_loglike = mix.loglike best_gaus = mix except (ZeroDivisionError, ValueError): pass print 'Best {0}'.format(best_gaus) print 'Alternative LL: {0:4.6}'.format(best_gaus.loglike) test_stat = -2 * uni_loglike + 2 * best_gaus.loglike print 'Test statistic for LLR (Chi-sq, df=3): {0:4.6}'.format(test_stat) print 'P = {0:4.6}'.format(chisqprob(test_stat, 3))