def test_sample_coefficients_mixed_distr(self): """ 2 coefficients are sampled from different distributions, one stays the same. """ from opus_core.third_party.pstat import chisqprob coef_values = array([1, 0.5, 0.2], dtype="float32") se = array([0, 0.02, 0.001], dtype="float32") coef = Coefficients(names=array(["coef_uniform", "coef_const", "coef_normal"]), values = coef_values, standard_errors = se) sampling_dict = {"coef_uniform": {"distribution": "uniform", "parameters": {"a": 1, "b": 1} }, "coef_normal": {"distribution": "normal", "parameters": {"multiplicator": 10} } } # for coefficient 1 and 3 run a one-sided Chi^2 test expected_values = coef_values TSU = 0 TSN = 0 df = 9 significance_level = 0.05 for j in range(df+1): new_coef = coef.sample_values(distribution_dictionary=sampling_dict) values = new_coef.get_values() TSU += ((values[0] - expected_values[0])**2)/expected_values[0] TSN += ((values[2] - expected_values[2])/(se[2]*10))**2 self.assertEqual(ma.allclose(new_coef.get_values()[1], expected_values[1]), True) prob = chisqprob(TSU, df) if (prob < significance_level/2.0): self.fail(msg="prob=%f is not in [%f,%f]" % (prob, significance_level/2.0, 1-significance_level/2.0)) prob = chisqprob(TSN, df) if (prob < significance_level/2.0): self.fail(msg="prob=%f is not in [%f,%f]" % (prob, significance_level/2.0, 1-significance_level/2.0))
def test_sample_coefficients_mixed_distr(self): """ 2 coefficients are sampled from different distributions, one stays the same. """ from opus_core.third_party.pstat import chisqprob coef_values = array([1, 0.5, 0.2], dtype="float32") se = array([0, 0.02, 0.001], dtype="float32") coef = Coefficients(names=array( ["coef_uniform", "coef_const", "coef_normal"]), values=coef_values, standard_errors=se) sampling_dict = { "coef_uniform": { "distribution": "uniform", "parameters": { "a": 1, "b": 1 } }, "coef_normal": { "distribution": "normal", "parameters": { "multiplicator": 10 } } } # for coefficient 1 and 3 run a one-sided Chi^2 test expected_values = coef_values TSU = 0 TSN = 0 df = 9 significance_level = 0.05 for j in range(df + 1): new_coef = coef.sample_values( distribution_dictionary=sampling_dict) values = new_coef.get_values() TSU += ((values[0] - expected_values[0])**2) / expected_values[0] TSN += ((values[2] - expected_values[2]) / (se[2] * 10))**2 self.assertEqual( ma.allclose(new_coef.get_values()[1], expected_values[1]), True) prob = chisqprob(TSU, df) if (prob < significance_level / 2.0): self.fail( msg="prob=%f is not in [%f,%f]" % (prob, significance_level / 2.0, 1 - significance_level / 2.0)) prob = chisqprob(TSN, df) if (prob < significance_level / 2.0): self.fail( msg="prob=%f is not in [%f,%f]" % (prob, significance_level / 2.0, 1 - significance_level / 2.0))
def chi_square_test_with_known_mean(self, function, mean, variance, number_of_iterations, significance_level=0.01, number_of_tries=5): """For each test, run a two-sided Chi^2 test for sigma = sigma_0 vs. sigma != sigma_0, if means are known. 'mean' and 'variance' are arrays whose length must correspond to the array that the given function produces. Since the stochastic test will fail every once in a while, run the whole test up to number_of_tries times, until either it succeeds or it fails too many times. """ for j in range(number_of_tries): K = mean.size x = zeros((number_of_iterations, K), dtype=float32) for i in range(number_of_iterations): x[i, :] = function() stat = (((x - mean)**2.0) / variance).sum() prob = chisqprob(stat, K * number_of_iterations) logger.log_status( "Stochastic Test: Chi^2 test statistic = " + str(stat) + ", df=", str(K * number_of_iterations), ", p=" + str(prob)) if (prob >= significance_level / 2.0) and ( prob <= (1 - significance_level / 2.0)): # test succeeded -- jump out of the method return # test failed more than number_of_tries times self.fail( msg="prob=%f is not in [%f,%f]" % (prob, significance_level / 2.0, 1 - significance_level / 2.0))
def test_sample_uniform_coefficients(self): """Coefficients are sampled from U(x-0.5, x+0.5), where x is the coefficient value.""" from opus_core.third_party.pstat import chisqprob coef_values = array([0.5, -0.00001], dtype="float32") coef = Coefficients(names=array(["coef1", "coef2"]), values=coef_values) # for each coefficient run a one-sided Chi^2 test expected_values = coef_values TS = zeros(coef_values.size) df = 9 significance_level = 0.05 for j in range(df + 1): new_coef = coef.sample_values(distribution='uniform') values = new_coef.get_values() TS += ((values - expected_values)**2) / expected_values for i in range(values.size): prob = chisqprob(TS[i], df) if (prob < significance_level / 2.0): self.fail(msg="prob=%f is not in [%f,%f]" % (prob, significance_level / 2.0, 1 - significance_level / 2.0)) # check data type self.assert_(values.dtype.name == "float32", msg="Error in coefficients data type.")
def _run_stochastic_test_poisson( self, function, expected_results, number_of_iterations, significance_level=0.01, transformation=None ): """ Run the given function for the specified number_of_iterations. Uses Bayesian statistics to determine whether the produced results are within the specified significance_level of the expected_results. """ K = expected_results.size sum_y = zeros(K, dtype=float32) x_kr = zeros((number_of_iterations, K), dtype=float32) for i in range(number_of_iterations): y_r = function() x_kr[i, :] = try_transformation(y_r, transformation) sum_y = sum_y + x_kr[i, :] lambdak = sum_y / float(number_of_iterations) lambdanull = try_transformation(expected_results.astype(float32), transformation) # print lambdak # print lambdanull sumxk = sum(x_kr, axis=0) LRTS = 2.0 * ( (number_of_iterations * (lambdanull - lambdak).sum()) + (ln(lambdak / ma.masked_where(lambdanull == 0, lambdanull)) * sumxk).sum() ) prob = chisqprob(LRTS, K) # print LRTS, prob logger.log_status("Stochastic Test Poisson: LRTS=" + str(LRTS) + ", df=", str(K), ", p=" + str(prob)) return (prob >= significance_level, "prob=%f < significance level of %f" % (prob, significance_level))
def test_sample_uniform_0_1_coefficients(self): """ All coefficients are sampled from U(5,10). """ from opus_core.third_party.pstat import chisqprob coef_values = array([0, 0], dtype="float32") coef = Coefficients(names=array(["coef1", "coef2"]), values=coef_values) # for each coefficient run a one-sided Chi^2 test expected_values = array([7.5, 7.5]) TS = zeros(coef_values.size) df = 9 significance_level = 0.05 for j in range(df + 1): new_coef = coef.sample_values(distribution='uniform', center_around_value=False, a=5, b=10) values = new_coef.get_values() TS += ((values - expected_values)**2) / expected_values for i in range(values.size): prob = chisqprob(TS[i], df) if (prob < significance_level / 2.0): self.fail(msg="prob=%f is not in [%f,%f]" % (prob, significance_level / 2.0, 1 - significance_level / 2.0))
def chi_square_test_with_known_mean( self, function, mean, variance, number_of_iterations, significance_level=0.01, number_of_tries=5 ): """For each test, run a two-sided Chi^2 test for sigma = sigma_0 vs. sigma != sigma_0, if means are known. 'mean' and 'variance' are arrays whose length must correspond to the array that the given function produces. Since the stochastic test will fail every once in a while, run the whole test up to number_of_tries times, until either it succeeds or it fails too many times. """ for j in range(number_of_tries): K = mean.size x = zeros((number_of_iterations, K), dtype=float32) for i in range(number_of_iterations): x[i, :] = function() stat = (((x - mean) ** 2.0) / variance).sum() prob = chisqprob(stat, K * number_of_iterations) logger.log_status( "Stochastic Test: Chi^2 test statistic = " + str(stat) + ", df=", str(K * number_of_iterations), ", p=" + str(prob), ) if (prob >= significance_level / 2.0) and (prob <= (1 - significance_level / 2.0)): # test succeeded -- jump out of the method return # test failed more than number_of_tries times self.fail(msg="prob=%f is not in [%f,%f]" % (prob, significance_level / 2.0, 1 - significance_level / 2.0))
def compute_stochastic_test_normal(self, function, expected_results, number_of_iterations, significance_level=0.01, transformation="sqrt"): K = expected_results.size sum_y = zeros(K, dtype=float32) x_kr = zeros((number_of_iterations, K), dtype=float32) texpected_results = try_transformation(expected_results, transformation) for i in range(number_of_iterations): y_r = function() x_kr[i, :] = try_transformation(y_r, transformation) sum_y = sum_y + x_kr[i, :] muest = sum_y / float(number_of_iterations) sigma_1 = ((x_kr - muest)**2.0).sum() / float(number_of_iterations * K) self.variance = variance( x_kr, labels=reshape(array(number_of_iterations * range(1, K + 1)), (number_of_iterations, K)), index=arange(K) + 1) sigma_0 = ((x_kr - texpected_results)**2.0).sum() / float( number_of_iterations * K) LRTS = number_of_iterations * K * log(sigma_0 / sigma_1) prob = chisqprob(LRTS, K) return (K, LRTS, prob)
def _run_stochastic_test_pearson(self, function, expected_results, number_of_iterations, significance_level=0.01, transformation=None): K = expected_results.size x_kr = zeros((number_of_iterations, K), dtype=float32) for i in range(number_of_iterations): x_kr[i,:]= function() mue = expected_results.astype(float32) pearson = 0.0 for k in range(K): pearson = pearson + (((x_kr[:,k] - mue[k])**2.0)/mue[k]).sum() prob = chisqprob(pearson, K*number_of_iterations) #print pearson, prob logger.log_status("Stochastic Test: Pearson Chi^2=" + str(pearson) + ", df=", str(K*number_of_iterations),", p=" + str(prob)) return (prob >= significance_level, "prob=%f < significance level of %f" % (prob, significance_level))
def chi_square_test_onesided(self, function, expected_values, number_of_iterations, significance_level=0.01, number_of_tries=5): """For each test, run a two-sided Chi^2 test""" for j in range(number_of_tries): K = expected_values.size x = zeros((number_of_iterations, K), dtype=float32) for i in range(number_of_iterations): x[i,:]= function() stat = (((x - expected_values)**2.0)/expected_values).sum() prob = chisqprob(stat, K*number_of_iterations) logger.log_status("Stochastic Test: Chi^2 test statistic = " + str(stat) + ", df=", str(K*number_of_iterations),", p=" + str(prob)) if (prob >= significance_level/2.0): # test succeeded -- jump out of the method return # test failed more than number_of_tries times self.fail(msg="prob=%f is not in [%f,%f]" % (prob, significance_level/2.0, 1-significance_level/2.0))
def compute_stochastic_test_normal(self, function, expected_results, number_of_iterations, significance_level=0.01, transformation="sqrt"): K = expected_results.size sum_y = zeros(K, dtype=float32) x_kr = zeros((number_of_iterations, K), dtype=float32) texpected_results = try_transformation(expected_results, transformation) for i in range(number_of_iterations): y_r = function() x_kr[i,:] = try_transformation(y_r, transformation) sum_y = sum_y + x_kr[i,:] muest = sum_y/float(number_of_iterations) sigma_1 = ((x_kr - muest)**2.0).sum()/float(number_of_iterations*K) self.variance = variance(x_kr, labels=reshape(array(number_of_iterations*range(1,K+1)), (number_of_iterations,K)), index=arange(K)+1) sigma_0 = ((x_kr - texpected_results)**2.0).sum()/float(number_of_iterations*K) LRTS = number_of_iterations*K * log(sigma_0/sigma_1) prob = chisqprob(LRTS, K) return (K, LRTS, prob)
def test_sample_uniform_0_1_coefficients(self): """ All coefficients are sampled from U(5,10). """ from opus_core.third_party.pstat import chisqprob coef_values = array([0,0], dtype="float32") coef = Coefficients(names=array(["coef1", "coef2"]), values = coef_values) # for each coefficient run a one-sided Chi^2 test expected_values = array([7.5, 7.5]) TS = zeros(coef_values.size) df = 9 significance_level = 0.05 for j in range(df+1): new_coef = coef.sample_values(distribution='uniform', center_around_value=False, a=5, b=10) values = new_coef.get_values() TS += ((values - expected_values)**2)/expected_values for i in range(values.size): prob = chisqprob(TS[i], df) if (prob < significance_level/2.0): self.fail(msg="prob=%f is not in [%f,%f]" % (prob, significance_level/2.0, 1-significance_level/2.0))
def _run_stochastic_test_pearson(self, function, expected_results, number_of_iterations, significance_level=0.01, transformation=None): K = expected_results.size x_kr = zeros((number_of_iterations, K), dtype=float32) for i in range(number_of_iterations): x_kr[i, :] = function() mue = expected_results.astype(float32) pearson = 0.0 for k in range(K): pearson = pearson + (((x_kr[:, k] - mue[k])**2.0) / mue[k]).sum() prob = chisqprob(pearson, K * number_of_iterations) #print pearson, prob logger.log_status( "Stochastic Test: Pearson Chi^2=" + str(pearson) + ", df=", str(K * number_of_iterations), ", p=" + str(prob)) return (prob >= significance_level, "prob=%f < significance level of %f" % (prob, significance_level))
def _run_stochastic_test_poisson(self, function, expected_results, number_of_iterations, significance_level=0.01, transformation=None): """ Run the given function for the specified number_of_iterations. Uses Bayesian statistics to determine whether the produced results are within the specified significance_level of the expected_results. """ K = expected_results.size sum_y = zeros(K, dtype=float32) x_kr = zeros((number_of_iterations, K), dtype=float32) for i in range(number_of_iterations): y_r = function() x_kr[i, :] = try_transformation(y_r, transformation) sum_y = sum_y + x_kr[i, :] lambdak = sum_y / float(number_of_iterations) lambdanull = try_transformation(expected_results.astype(float32), transformation) # print lambdak # print lambdanull sumxk = sum(x_kr, axis=0) LRTS = 2.0 * ( (number_of_iterations * (lambdanull - lambdak).sum()) + (ln(lambdak / ma.masked_where(lambdanull == 0, lambdanull)) * sumxk).sum()) prob = chisqprob(LRTS, K) #print LRTS, prob logger.log_status( "Stochastic Test Poisson: LRTS=" + str(LRTS) + ", df=", str(K), ", p=" + str(prob)) return (prob >= significance_level, "prob=%f < significance level of %f" % (prob, significance_level))
def chi_square_test_onesided(self, function, expected_values, number_of_iterations, significance_level=0.01, number_of_tries=5): """For each test, run a two-sided Chi^2 test""" for j in range(number_of_tries): K = expected_values.size x = zeros((number_of_iterations, K), dtype=float32) for i in range(number_of_iterations): x[i, :] = function() stat = (((x - expected_values)**2.0) / expected_values).sum() prob = chisqprob(stat, K * number_of_iterations) logger.log_status( "Stochastic Test: Chi^2 test statistic = " + str(stat) + ", df=", str(K * number_of_iterations), ", p=" + str(prob)) if (prob >= significance_level / 2.0): # test succeeded -- jump out of the method return # test failed more than number_of_tries times self.fail( msg="prob=%f is not in [%f,%f]" % (prob, significance_level / 2.0, 1 - significance_level / 2.0))
def test_sample_uniform_coefficients(self): """Coefficients are sampled from U(x-0.5, x+0.5), where x is the coefficient value.""" from opus_core.third_party.pstat import chisqprob coef_values = array([0.5, -0.00001], dtype="float32") coef = Coefficients(names=array(["coef1", "coef2"]), values = coef_values) # for each coefficient run a one-sided Chi^2 test expected_values = coef_values TS = zeros(coef_values.size) df = 9 significance_level = 0.05 for j in range(df+1): new_coef = coef.sample_values(distribution='uniform') values = new_coef.get_values() TS += ((values - expected_values)**2)/expected_values for i in range(values.size): prob = chisqprob(TS[i], df) if (prob < significance_level/2.0): self.fail(msg="prob=%f is not in [%f,%f]" % (prob, significance_level/2.0, 1-significance_level/2.0)) # check data type self.assert_(values.dtype.name == "float32", msg = "Error in coefficients data type.")
def estimate_dcm(self, data): nobs, alts, nvars, M = data.shape self.M = M depm = self.resources[ "chosen_choice"] # matrix (nobs x alts) of 0's and 1's. 1 is on positions of chosen location. tags = ["estimate", "result"] vl = 2 coef_names = self.resources.get("coefficient_names", None) nest_numbers = self.get_nest_numbers() index_of_fixed_values = zeros(nvars + M, dtype="bool8") fixed_coefs, fixed_values = self.resources.get("fixed_values", (array([]), array([]))) if (coef_names is not None) and (fixed_coefs.size > 0): index_of_fixed_values[get_indices_of_matched_items( coef_names, fixed_coefs)] = True index_of_not_fixed_values = logical_not(index_of_fixed_values) beta = zeros(nvars + M).astype(float32) beta[-M:] = self.range_mu[1] beta[index_of_fixed_values] = fixed_values.astype(beta.dtype) l_0 = self.nl_loglikelihood(beta, data, depm) ls_idx = arange(nvars, nvars + M) for name, sv in self.resources.get("starting_values", {}).iteritems(): est = True if isinstance(sv, tuple) or isinstance(sv, list): est = sv[1] sv = sv[0] if name.startswith('__logsum_'): if nest_numbers is not None: idx = ls_idx[where(nest_numbers == int(name[9:]))[0]] else: idx = array([ls_idx[int(name[9:]) - 1]]) else: idx = ematch(coef_names, name) beta[idx] = sv index_of_fixed_values[idx] = not (est) index_of_not_fixed_values = where( logical_not(index_of_fixed_values))[0] index_of_fixed_values = where(index_of_fixed_values)[0] bounds = index_of_not_fixed_values.size * [(None, None)] j = 0 for i in range(nvars + M - 1, nvars - 1, -1): if i in index_of_not_fixed_values: bounds[index_of_not_fixed_values.size - j - 1] = self.range_mu j += 1 logger.start_block('BFGS procedure') bfgs_result = fmin_bfgs( self.minus_nl_loglikelihood, beta[index_of_not_fixed_values], args=(data, depm, beta[index_of_fixed_values], index_of_not_fixed_values, index_of_fixed_values), full_output=True, disp=True, epsilon=self.resources.get('bfgs_epsilon', self._epsilon), ) logger.end_block() beta[index_of_not_fixed_values] = bfgs_result[0].astype(beta.dtype) se = zeros(nvars + M) tvalues = zeros(nvars + M) mingrad = bfgs_result[2] if not self.resources.get('bfgs_approximate_second_derivative', self._approximate_second_derivative): inv_hessian = bfgs_result[3] se[index_of_not_fixed_values] = sqrt(diagonal(inv_hessian)) else: sec_der = approximate_second_derivative( self.minus_nl_loglikelihood, beta[index_of_not_fixed_values], args=(data, depm, beta[index_of_fixed_values], index_of_not_fixed_values, index_of_fixed_values)) inv_hessian = 1.0 / sec_der se[index_of_not_fixed_values] = sqrt(inv_hessian) tvalues[index_of_not_fixed_values] = beta[ index_of_not_fixed_values] / se[index_of_not_fixed_values] l_1 = self.nl_loglikelihood(beta, data, depm) ll_ratio = 1 - (l_1 / l_0) adj_ll_ratio = 1 - ((l_1 - nvars - M) / l_0) # http://en.wikipedia.org/wiki/Akaike_information_criterion aic = 2 * index_of_not_fixed_values.size - 2 * l_1 logger.log_status("Akaike's Information Criterion (AIC): ", str(aic), tags=tags, verbosity=vl) bic = -2 * l_1 + index_of_not_fixed_values.size * log(nobs) logger.log_status("Bayesian Information Criterion (BIC): ", str(bic), tags=tags, verbosity=vl) logger.log_status("***********************************************", tags=tags, verbosity_level=vl) logger.log_status('Log-likelihood is: ', l_1, tags=tags, verbosity_level=vl) logger.log_status('Null Log-likelihood is: ', l_0, tags=tags, verbosity_level=vl) logger.log_status('Likelihood ratio index: ', ll_ratio, tags=tags, verbosity_level=vl) logger.log_status('Adj. likelihood ratio index: ', adj_ll_ratio, tags=tags, verbosity_level=vl) logger.log_status('Number of observations: ', nobs, tags=tags, verbosity_level=vl) logger.log_status('Suggested |t-value| > ', sqrt(log(nobs))) logger.log_status("-----------------------------------------------", tags=tags, verbosity_level=vl) if coef_names is not None: nestn = nest_numbers if nestn is None: nestn = range(1, M + 1) names = concatenate( (coef_names, array(map(lambda x: '__logsum_%s' % x, nestn)))) else: names = [''] * (nvars + M) logger.log_status( "Coeff_names\testimate\tstd err\t\tt-values\tgradient", tags=tags, verbosity_level=vl) for i in range(index_of_not_fixed_values.size): logger.log_status( "%10s\t%8g\t%8g\t%8g\t%8g" % (names[index_of_not_fixed_values[i]], beta[index_of_not_fixed_values[i]], se[index_of_not_fixed_values[i]], tvalues[index_of_not_fixed_values[i]], mingrad[i]), tags=tags, verbosity_level=vl) logger.log_status('***********************************************', tags=tags, verbosity_level=vl) logger.log_status('Elapsed time: ', time.clock() - self.start_time, 'seconds', tags=tags, verbosity_level=vl) df = nvars + M - index_of_fixed_values.size lrts = -2 * (l_0 - l_1) return { "estimators": beta, "coefficient_names": names, "standard_errors": se, "other_measures": { "t_statistic": tvalues }, "other_info": { "p-value": chisqprob(lrts, df), "ll_ratio_index": ll_ratio, "ll_ratio_test_statistics": lrts, "df": df, "nobs": nobs } }
def estimate_dcm(self, data): nobs, alts, nvars = data.shape # matrix (nobs x alts) of 0's and 1's. 1 is on positions of chosen location. depm = self.resources["chosen_choice"] coef_names = self.resources.get("coefficient_names", None) tags = ["estimate", "result"] vl = 2 is_fixed_values = zeros(nvars, dtype="bool") fixed_coefs, fixed_values = self.resources.get("fixed_values", (array([]), array([]))) if (coef_names is not None) and (fixed_coefs.size > 0): is_fixed_values[in1d(coef_names, fixed_coefs)] = True is_unfixed_values = logical_not(is_fixed_values) if is_fixed_values.sum() > 0: logger.log_warning("fixed coefficients have not been tested with BFGS estimation procedure; ") logger.log_warning("use with caution!") beta=zeros(nvars, dtype=self.dtype) beta[is_fixed_values] = fixed_values.astype(beta.dtype) ll_0=self.loglikelihood(beta, data, depm) bounds_lower = bounds_upper = repeat([None], beta.size) bounds_lower[is_fixed_values] = bounds_upper[is_fixed_values] = fixed_values bounds = zip(bounds_lower, bounds_upper) logger.start_block('Starting L_BFGS_B procedure...') epsilon = self.resources.get('bfgs_epsilon', self.epsilon) fprime = self.get_gradient if not self.approx_grad else None bfgs_result = fmin_l_bfgs_b(self.minus_loglikelihood, beta, args=(data, depm), fprime=self.get_gradient, approx_grad=self.approx_grad, bounds=bounds, iprint=self.iprint, epsilon=epsilon, maxfun=self.maxiter ) beta = bfgs_result[0].astype(beta.dtype) func_at_min = bfgs_result[1] info = bfgs_result[2] status = {0:'Convergence achieved.', 1:'Maximum iterations reached without convergence.', 2:'Stop for another reason: %s.' % info['task'] if info.has_key('task') \ else 'unknown' } warnflag = '' if info['warnflag'] != 0: warnflag = status[info['warnflag']] grad_at_min = info['grad'] g = self.get_gradient_by_agent(beta, data, depm) try: h=self.get_hessian(g) except: msg = "Estimation led to singular matrix. No results." warnflag += msg + "\n" logger.log_warning(msg, tags=tags, verbosity_level=vl) return {} g=g.sum(axis=0) c=dot(dot(transpose(g),h),g) se=(self.get_standard_error(h)).astype(self.dtype) se[is_fixed_values] = 0.0 tvalues=zeros(nvars, dtype=self.dtype) tvalues[is_unfixed_values] = beta[is_unfixed_values]/se[is_unfixed_values] ll_1=self.loglikelihood(beta, data, depm) ll_ratio = 1-(ll_1/ll_0) adj_ll_ratio = 1-((ll_1-nvars)/ll_0) # http://en.wikipedia.org/wiki/Akaike_information_criterion aic = 2 * is_unfixed_values.size - 2 * ll_1 bic = -2 * ll_1 + is_unfixed_values.size * log(nobs) df=nvars-is_fixed_values.sum() lrts = -2*(ll_0-ll_1) iters = info['funcalls'] results = {"coefficient_names":coef_names, "estimators":beta, "standard_errors":se, "other_measures":{"t_statistic": tvalues}, "other_info":{"aic": aic, "bic": bic, "p-value":chisqprob(lrts, df), "l_0": ll_0, "l_1": ll_1, "ll_ratio_index":ll_ratio, "ll_ratio_test_statistics":lrts, "convergence": c, "df": df, "nobs":nobs, "nvars": nvars, "nalts": alts, "iterations": iters }, "warnflag": warnflag } self.print_results(results) logger.end_block() #logger.log_status('Elapsed time: ', time.clock()-self.start_time, 'seconds', # tags=tags, verbosity_level=vl) return results
def estimate_dcm(self, data): maxiter=self.maximum_iterations #Maximum iterations allowed eps=0.001 #Convergence criterion for gradient*hessian-inv*gradient tags = ["estimate", "result"] vl = 2 nobs, alts, nvars = data.shape depm = self.resources["chosen_choice"] # matrix (nobs x alts) of 0's and 1's. 1 is on positions of chosen location. coef_names = self.resources.get("coefficient_names", None) fixed_coefs, fixed_values = self.resources.get("fixed_values", (array([]), array([]))) if (coef_names is not None) and (fixed_coefs.size > 0): index_of_fixed_values = get_indices_of_matched_items(coef_names, fixed_coefs) index_of_not_fixed_values = ones(nvars, dtype="bool8") index_of_not_fixed_values[index_of_fixed_values] = False index_of_not_fixed_values = where(index_of_not_fixed_values)[0] else: index_of_fixed_values = array([], dtype="int32") index_of_not_fixed_values = arange(nvars) # pdb.set_trace() b2=zeros(nvars).astype(float32) b2[index_of_fixed_values] = fixed_values.astype(b2.dtype) se = zeros(nvars).astype(float32) tvalues = zeros(nvars).astype(float32) l_2=self.mnl_loglikelihood(data, b2, depm) l_0 = l_2 s=1 for it in range(maxiter): b1=b2 l_1=l_2 g=(self.mnl_gradient(data, b1, depm, index_of_not_fixed_values)) try: h=self.get_hessian(g) except: logger.log_warning("Estimation led to singular matrix. No results.", tags=tags, verbosity_level=vl) return {} g=g.sum(axis=0) c=dot(dot(transpose(g),h),g) if c <= eps: logger.log_status('Convergence achieved.', tags=tags, verbosity_level=vl) break d=dot(h,g) b2[index_of_not_fixed_values]=(b1[index_of_not_fixed_values]+s*d).astype(b2.dtype) l_2=self.mnl_loglikelihood(data,b2, depm) if l_2 <= l_1: s=s/2.0 if s <= .001: logger.log_warning('Cannot find increase', tags=tags, verbosity_level=vl) break # end of the iteration loop if it>=(maxiter-1): logger.log_warning('Maximum iterations reached without convergence', tags=tags, verbosity_level=vl) se[index_of_not_fixed_values]=self.get_standard_error(h).astype(se.dtype) tvalues[index_of_not_fixed_values] = (b1[index_of_not_fixed_values]/se[index_of_not_fixed_values]).astype(tvalues.dtype) ll_ratio = 1-(l_1/l_0) adj_ll_ratio = 1-((l_1-nvars)/l_0) # http://en.wikipedia.org/wiki/Akaike_information_criterion aic = 2 * index_of_not_fixed_values.size - 2 * l_1 logger.log_status("Akaike's Information Criterion (AIC): ", str(aic), tags=tags, verbosity=vl) bic = -2 * l_1 + index_of_not_fixed_values.size * log(nobs) logger.log_status("Bayesian Information Criterion (BIC): ", str(bic), tags=tags, verbosity=vl) logger.log_status("Number of Iterations: ", it+1, tags=tags, verbosity_level=vl) logger.log_status("***********************************************", tags=tags, verbosity_level=vl) logger.log_status('Log-likelihood is: ', l_1, tags=tags, verbosity_level=vl) logger.log_status('Null Log-likelihood is: ', l_0, tags=tags, verbosity_level=vl) logger.log_status('Likelihood ratio index: ', ll_ratio, tags=tags, verbosity_level=vl) logger.log_status('Adj. likelihood ratio index: ', adj_ll_ratio, tags=tags, verbosity_level=vl) logger.log_status('Number of observations: ', nobs, tags=tags, verbosity_level=vl) logger.log_status('Suggested |t-value| > ', sqrt(log(nobs))) logger.log_status('Convergence statistic is: ', c, tags=tags, verbosity_level=vl) logger.log_status("-----------------------------------------------", tags=tags, verbosity_level=vl) if coef_names is not None: names = coef_names else: names = ['']*index_of_not_fixed_values.size logger.log_status("Coeff_names\testimate\tstd err\t\tt-values", tags=tags, verbosity_level=vl) for i in index_of_not_fixed_values: logger.log_status("%10s\t%8g\t%8g\t%8g" % (names[i],b1[i],se[i],tvalues[i]), tags=tags, verbosity_level=vl) logger.log_status('***********************************************', tags=tags, verbosity_level=vl) logger.log_status('Elapsed time: ',time.clock()-self.start_time, 'seconds', tags=tags, verbosity_level=vl) est = b1 df=nvars-index_of_fixed_values.size lrts = -2*(l_0-l_1) return {"estimators":est, "standard_errors":se, "other_measures":{"t_statistic": tvalues}, "other_info":{"p-value":chisqprob(lrts, df), "ll_ratio_index":ll_ratio, "ll_ratio_test_statistics":lrts, "df": df, "nobs":nobs}}
def estimate_dcm(self, data): maxiter=self.maximum_iterations #Maximum iterations allowed eps=0.001 #Convergence criterion for gradient*hessian-inv*gradient tags = ["estimate", "result"] vl = 2 nobs, alts, nvars = data.shape depm = self.resources["chosen_choice"] # matrix (nobs x alts) of 0's and 1's. 1 is on positions of chosen location. coef_names = self.resources.get("coefficient_names", None) fixed_coefs, fixed_values = self.resources.get("fixed_values", (array([]), array([]))) if (coef_names is not None) and (fixed_coefs.size > 0): index_of_fixed_values = get_indices_of_matched_items(coef_names, fixed_coefs) index_of_not_fixed_values = ones(nvars, dtype="bool8") index_of_not_fixed_values[index_of_fixed_values] = False index_of_not_fixed_values = where(index_of_not_fixed_values)[0] else: index_of_fixed_values = array([], dtype="int32") index_of_not_fixed_values = arange(nvars) # pdb.set_trace() b2=zeros(nvars).astype(float32) b2[index_of_fixed_values] = fixed_values.astype(b2.dtype) se = zeros(nvars).astype(float32) tvalues = zeros(nvars).astype(float32) l_2=self.mnl_loglikelihood(data, b2, depm) l_0 = l_2 s=1 warnflag = '' for it in range(maxiter): b1=b2 l_1=l_2 g=(self.mnl_gradient(data, b1, depm, index_of_not_fixed_values)) try: h=self.get_hessian(g) except: msg = "Estimation led to singular matrix. No results." warnflag += msg + "\n" logger.log_warning(msg, tags=tags, verbosity_level=vl) return {} g=g.sum(axis=0) c=dot(dot(transpose(g),h),g) if c <= eps: msg = "Convergence achieved." logger.log_status(msg, tags=tags, verbosity_level=vl) break d=dot(h,g) b2[index_of_not_fixed_values]=(b1[index_of_not_fixed_values]+s*d).astype(b2.dtype) l_2=self.mnl_loglikelihood(data,b2, depm) if l_2 <= l_1: s=s/2.0 if s <= .001: msg = "Cannot find increase." warnflag += msg + "\n" #logger.log_warning(msg, tags=tags, verbosity_level=vl) break # end of the iteration loop if it>=(maxiter-1): msg = "Maximum iterations reached without convergence." warnflag += msg + "\n" #logger.log_warning(msg, tags=tags, verbosity_level=vl) se[index_of_not_fixed_values]=self.get_standard_error(h).astype(se.dtype) tvalues[index_of_not_fixed_values] = (b1[index_of_not_fixed_values]/se[index_of_not_fixed_values]).astype(tvalues.dtype) ll_ratio = 1-(l_1/l_0) adj_ll_ratio = 1-((l_1-nvars)/l_0) # http://en.wikipedia.org/wiki/Akaike_information_criterion aic = 2 * index_of_not_fixed_values.size - 2 * l_1 bic = -2 * l_1 + index_of_not_fixed_values.size * log(nobs) if coef_names is not None: names = coef_names else: names = ['']*index_of_not_fixed_values.size est = b1 df=nvars-index_of_fixed_values.size lrts = -2*(l_0-l_1) result = {"coefficient_names":names, "estimators":est, "standard_errors":se, "other_measures":{"t_statistic": tvalues}, "other_info":{"aic": aic, "bic": bic, "p-value":chisqprob(lrts, df), "l_0": l_0, "l_1": l_1, "ll_ratio_index":ll_ratio, "ll_ratio_test_statistics":lrts, "convergence": c, "df": df, "nobs":nobs, "nvars": nvars, "nalts": alts, "iterations": it+1 }, "warnflag": warnflag} self.print_results(result) return result
def estimate_dcm(self, data): maxiter = self.maximum_iterations #Maximum iterations allowed eps = 0.001 #Convergence criterion for gradient*hessian-inv*gradient tags = ["estimate", "result"] vl = 2 nobs, alts, nvars = data.shape depm = self.resources[ "chosen_choice"] # matrix (nobs x alts) of 0's and 1's. 1 is on positions of chosen location. coef_names = self.resources.get("coefficient_names", None) fixed_coefs, fixed_values = self.resources.get("fixed_values", (array([]), array([]))) if (coef_names is not None) and (fixed_coefs.size > 0): index_of_fixed_values = get_indices_of_matched_items( coef_names, fixed_coefs) index_of_not_fixed_values = ones(nvars, dtype="bool8") index_of_not_fixed_values[index_of_fixed_values] = False index_of_not_fixed_values = where(index_of_not_fixed_values)[0] else: index_of_fixed_values = array([], dtype="int32") index_of_not_fixed_values = arange(nvars) # pdb.set_trace() b2 = zeros(nvars).astype(float32) b2[index_of_fixed_values] = fixed_values.astype(b2.dtype) se = zeros(nvars).astype(float32) tvalues = zeros(nvars).astype(float32) l_2 = self.mnl_loglikelihood(data, b2, depm) l_0 = l_2 s = 1 for it in range(maxiter): b1 = b2 l_1 = l_2 g = (self.mnl_gradient(data, b1, depm, index_of_not_fixed_values)) try: h = self.get_hessian(g) except: logger.log_warning( "Estimation led to singular matrix. No results.", tags=tags, verbosity_level=vl) return {} g = g.sum(axis=0) c = dot(dot(transpose(g), h), g) if c <= eps: logger.log_status('Convergence achieved.', tags=tags, verbosity_level=vl) break d = dot(h, g) b2[index_of_not_fixed_values] = (b1[index_of_not_fixed_values] + s * d).astype(b2.dtype) l_2 = self.mnl_loglikelihood(data, b2, depm) if l_2 <= l_1: s = s / 2.0 if s <= .001: logger.log_warning('Cannot find increase', tags=tags, verbosity_level=vl) break # end of the iteration loop if it >= (maxiter - 1): logger.log_warning( 'Maximum iterations reached without convergence', tags=tags, verbosity_level=vl) se[index_of_not_fixed_values] = self.get_standard_error(h).astype( se.dtype) tvalues[index_of_not_fixed_values] = ( b1[index_of_not_fixed_values] / se[index_of_not_fixed_values]).astype(tvalues.dtype) ll_ratio = 1 - (l_1 / l_0) adj_ll_ratio = 1 - ((l_1 - nvars) / l_0) # http://en.wikipedia.org/wiki/Akaike_information_criterion aic = 2 * index_of_not_fixed_values.size - 2 * l_1 logger.log_status("Akaike's Information Criterion (AIC): ", str(aic), tags=tags, verbosity=vl) bic = -2 * l_1 + index_of_not_fixed_values.size * log(nobs) logger.log_status("Bayesian Information Criterion (BIC): ", str(bic), tags=tags, verbosity=vl) logger.log_status("Number of Iterations: ", it + 1, tags=tags, verbosity_level=vl) logger.log_status("***********************************************", tags=tags, verbosity_level=vl) logger.log_status('Log-likelihood is: ', l_1, tags=tags, verbosity_level=vl) logger.log_status('Null Log-likelihood is: ', l_0, tags=tags, verbosity_level=vl) logger.log_status('Likelihood ratio index: ', ll_ratio, tags=tags, verbosity_level=vl) logger.log_status('Adj. likelihood ratio index: ', adj_ll_ratio, tags=tags, verbosity_level=vl) logger.log_status('Number of observations: ', nobs, tags=tags, verbosity_level=vl) logger.log_status('Suggested |t-value| > ', sqrt(log(nobs))) logger.log_status('Convergence statistic is: ', c, tags=tags, verbosity_level=vl) logger.log_status("-----------------------------------------------", tags=tags, verbosity_level=vl) if coef_names is not None: names = coef_names else: names = [''] * index_of_not_fixed_values.size logger.log_status("Coeff_names\testimate\tstd err\t\tt-values", tags=tags, verbosity_level=vl) for i in index_of_not_fixed_values: logger.log_status("%10s\t%8g\t%8g\t%8g" % (names[i], b1[i], se[i], tvalues[i]), tags=tags, verbosity_level=vl) logger.log_status('***********************************************', tags=tags, verbosity_level=vl) logger.log_status('Elapsed time: ', time.clock() - self.start_time, 'seconds', tags=tags, verbosity_level=vl) est = b1 df = nvars - index_of_fixed_values.size lrts = -2 * (l_0 - l_1) return { "estimators": est, "standard_errors": se, "other_measures": { "t_statistic": tvalues }, "other_info": { "p-value": chisqprob(lrts, df), "ll_ratio_index": ll_ratio, "ll_ratio_test_statistics": lrts, "df": df, "nobs": nobs } }
def estimate_dcm(self, data): maxiter = self.maximum_iterations #Maximum iterations allowed eps = 0.001 #Convergence criterion for gradient*hessian-inv*gradient tags = ["estimate", "result"] vl = 2 nobs, alts, nvars = data.shape depm = self.resources[ "chosen_choice"] # matrix (nobs x alts) of 0's and 1's. 1 is on positions of chosen location. coef_names = self.resources.get("coefficient_names", None) fixed_coefs, fixed_values = self.resources.get("fixed_values", (array([]), array([]))) if (coef_names is not None) and (fixed_coefs.size > 0): index_of_fixed_values = get_indices_of_matched_items( coef_names, fixed_coefs) index_of_not_fixed_values = ones(nvars, dtype="bool8") index_of_not_fixed_values[index_of_fixed_values] = False index_of_not_fixed_values = where(index_of_not_fixed_values)[0] else: index_of_fixed_values = array([], dtype="int32") index_of_not_fixed_values = arange(nvars) # pdb.set_trace() b2 = zeros(nvars).astype(float32) b2[index_of_fixed_values] = fixed_values.astype(b2.dtype) se = zeros(nvars).astype(float32) tvalues = zeros(nvars).astype(float32) l_2 = self.mnl_loglikelihood(data, b2, depm) l_0 = l_2 s = 1 warnflag = '' for it in range(maxiter): b1 = b2 l_1 = l_2 g = (self.mnl_gradient(data, b1, depm, index_of_not_fixed_values)) try: h = self.get_hessian(g) except: msg = "Estimation led to singular matrix. No results." warnflag += msg + "\n" logger.log_warning(msg, tags=tags, verbosity_level=vl) return {} g = g.sum(axis=0) c = dot(dot(transpose(g), h), g) if c <= eps: msg = "Convergence achieved." logger.log_status(msg, tags=tags, verbosity_level=vl) break d = dot(h, g) b2[index_of_not_fixed_values] = (b1[index_of_not_fixed_values] + s * d).astype(b2.dtype) l_2 = self.mnl_loglikelihood(data, b2, depm) if l_2 <= l_1: s = s / 2.0 if s <= .001: msg = "Cannot find increase." warnflag += msg + "\n" #logger.log_warning(msg, tags=tags, verbosity_level=vl) break # end of the iteration loop if it >= (maxiter - 1): msg = "Maximum iterations reached without convergence." warnflag += msg + "\n" #logger.log_warning(msg, tags=tags, verbosity_level=vl) se[index_of_not_fixed_values] = self.get_standard_error(h).astype( se.dtype) tvalues[index_of_not_fixed_values] = ( b1[index_of_not_fixed_values] / se[index_of_not_fixed_values]).astype(tvalues.dtype) ll_ratio = 1 - (l_1 / l_0) adj_ll_ratio = 1 - ((l_1 - nvars) / l_0) # http://en.wikipedia.org/wiki/Akaike_information_criterion aic = 2 * index_of_not_fixed_values.size - 2 * l_1 bic = -2 * l_1 + index_of_not_fixed_values.size * log(nobs) if coef_names is not None: names = coef_names else: names = [''] * index_of_not_fixed_values.size est = b1 df = nvars - index_of_fixed_values.size lrts = -2 * (l_0 - l_1) result = { "coefficient_names": names, "estimators": est, "standard_errors": se, "other_measures": { "t_statistic": tvalues }, "other_info": { "aic": aic, "bic": bic, "p-value": chisqprob(lrts, df), "l_0": l_0, "l_1": l_1, "ll_ratio_index": ll_ratio, "ll_ratio_test_statistics": lrts, "convergence": c, "df": df, "nobs": nobs, "nvars": nvars, "nalts": alts, "iterations": it + 1 }, "warnflag": warnflag } self.print_results(result) return result
def estimate_dcm(self, data): nobs, alts, nvars = data.shape # matrix (nobs x alts) of 0's and 1's. 1 is on positions of chosen location. depm = self.resources["chosen_choice"] coef_names = self.resources.get("coefficient_names", None) tags = ["estimate", "result"] vl = 2 is_fixed_values = zeros(nvars, dtype="bool") fixed_coefs, fixed_values = self.resources.get("fixed_values", (array([]), array([]))) if (coef_names is not None) and (fixed_coefs.size > 0): is_fixed_values[in1d(coef_names, fixed_coefs)] = True is_unfixed_values = logical_not(is_fixed_values) if is_fixed_values.sum() > 0: logger.log_warning( "fixed coefficients have not been tested with BFGS estimation procedure; " ) logger.log_warning("use with caution!") beta = zeros(nvars, dtype=self.dtype) beta[is_fixed_values] = fixed_values.astype(beta.dtype) ll_0 = self.loglikelihood(beta, data, depm) bounds_lower = bounds_upper = repeat([None], beta.size) bounds_lower[is_fixed_values] = bounds_upper[ is_fixed_values] = fixed_values bounds = zip(bounds_lower, bounds_upper) logger.start_block('Starting L_BFGS_B procedure...') epsilon = self.resources.get('bfgs_epsilon', self.epsilon) fprime = self.get_gradient if not self.approx_grad else None bfgs_result = fmin_l_bfgs_b(self.minus_loglikelihood, beta, args=(data, depm), fprime=self.get_gradient, approx_grad=self.approx_grad, bounds=bounds, iprint=self.iprint, epsilon=epsilon, maxfun=self.maxiter) beta = bfgs_result[0].astype(beta.dtype) func_at_min = bfgs_result[1] info = bfgs_result[2] status = {0:'Convergence achieved.', 1:'Maximum iterations reached without convergence.', 2:'Stop for another reason: %s.' % info['task'] if info.has_key('task') \ else 'unknown' } warnflag = '' if info['warnflag'] != 0: warnflag = status[info['warnflag']] grad_at_min = info['grad'] g = self.get_gradient_by_agent(beta, data, depm) try: h = self.get_hessian(g) except: msg = "Estimation led to singular matrix. No results." warnflag += msg + "\n" logger.log_warning(msg, tags=tags, verbosity_level=vl) return {} g = g.sum(axis=0) c = dot(dot(transpose(g), h), g) se = (self.get_standard_error(h)).astype(self.dtype) se[is_fixed_values] = 0.0 tvalues = zeros(nvars, dtype=self.dtype) tvalues[is_unfixed_values] = beta[is_unfixed_values] / se[ is_unfixed_values] ll_1 = self.loglikelihood(beta, data, depm) ll_ratio = 1 - (ll_1 / ll_0) adj_ll_ratio = 1 - ((ll_1 - nvars) / ll_0) # http://en.wikipedia.org/wiki/Akaike_information_criterion aic = 2 * is_unfixed_values.size - 2 * ll_1 bic = -2 * ll_1 + is_unfixed_values.size * log(nobs) df = nvars - is_fixed_values.sum() lrts = -2 * (ll_0 - ll_1) iters = info['funcalls'] results = { "coefficient_names": coef_names, "estimators": beta, "standard_errors": se, "other_measures": { "t_statistic": tvalues }, "other_info": { "aic": aic, "bic": bic, "p-value": chisqprob(lrts, df), "l_0": ll_0, "l_1": ll_1, "ll_ratio_index": ll_ratio, "ll_ratio_test_statistics": lrts, "convergence": c, "df": df, "nobs": nobs, "nvars": nvars, "nalts": alts, "iterations": iters }, "warnflag": warnflag } self.print_results(results) logger.end_block() #logger.log_status('Elapsed time: ', time.clock()-self.start_time, 'seconds', # tags=tags, verbosity_level=vl) return results
def estimate_dcm(self, data): nobs, alts, nvars, M = data.shape self.M = M depm = self.resources["chosen_choice"] # matrix (nobs x alts) of 0's and 1's. 1 is on positions of chosen location. tags = ["estimate", "result"] vl = 2 coef_names = self.resources.get("coefficient_names", None) nest_numbers = self.get_nest_numbers() index_of_fixed_values = zeros(nvars+M, dtype="bool8") fixed_coefs, fixed_values = self.resources.get("fixed_values", (array([]), array([]))) if (coef_names is not None) and (fixed_coefs.size > 0): index_of_fixed_values[get_indices_of_matched_items(coef_names, fixed_coefs)] = True index_of_not_fixed_values = logical_not(index_of_fixed_values) beta=ones(nvars+M).astype(float32) beta[-M:] = self.range_mu[1] beta[index_of_fixed_values] = fixed_values.astype(beta.dtype) l_0beta = zeros(nvars+M).astype(float32) l_0beta[-M:] = 1 l_0 = self.nl_loglikelihood(l_0beta, data, depm) ls_idx = arange(nvars, nvars+M) for name, sv in self.resources.get("starting_values", {}).iteritems(): est = True if isinstance(sv, tuple) or isinstance(sv, list): est = sv[1] sv = sv[0] if name.startswith('__logsum_'): if nest_numbers is not None: idx = ls_idx[where(nest_numbers == int(name[9:]))[0]] else: idx = array([ls_idx[int(name[9:])-1]]) else: idx = ematch(coef_names, name) beta[idx] = sv index_of_fixed_values[idx] = not(est) index_of_not_fixed_values = where(logical_not(index_of_fixed_values))[0] index_of_fixed_values = where(index_of_fixed_values)[0] bounds = index_of_not_fixed_values.size*[(-5.0,5.0)] j=0 for i in range(nvars+M-1, nvars-1, -1): if i in index_of_not_fixed_values: bounds[index_of_not_fixed_values.size-j-1] = self.range_mu j+=1 logger.start_block('BFGS procedure') bfgs_result = fmin_l_bfgs_b(self.minus_nl_loglikelihood, beta[index_of_not_fixed_values], pgtol=.01, args=(data, depm, beta[index_of_fixed_values], index_of_not_fixed_values, index_of_fixed_values), bounds=bounds,approx_grad=True, disp=True, epsilon=self.resources.get('bfgs_epsilon', self._epsilon), ) logger.end_block() beta[index_of_not_fixed_values] = bfgs_result[0].astype(beta.dtype) se = zeros(nvars+M) tvalues = zeros(nvars+M) mingrad = bfgs_result[2]['grad'] if 0: # hessian is no longer provided by bfgs ## not self.resources.get('bfgs_approximate_second_derivative', self._approximate_second_derivative): inv_hessian = bfgs_result[3] se[index_of_not_fixed_values] = sqrt(diagonal(inv_hessian)) else: sec_der = approximate_second_derivative(self.minus_nl_loglikelihood, beta[index_of_not_fixed_values], args=(data, depm, beta[index_of_fixed_values], index_of_not_fixed_values, index_of_fixed_values)) inv_hessian = 1.0/sec_der se[index_of_not_fixed_values] = sqrt(inv_hessian) tvalues[index_of_not_fixed_values] = beta[index_of_not_fixed_values]/se[index_of_not_fixed_values] l_1=self.nl_loglikelihood(beta, data, depm) ll_ratio = 1-(l_1/l_0) adj_ll_ratio = 1-((l_1-nvars-M)/l_0) # http://en.wikipedia.org/wiki/Akaike_information_criterion aic = 2 * index_of_not_fixed_values.size - 2 * l_1 logger.log_status("Akaike's Information Criterion (AIC): ", str(aic), tags=tags, verbosity=vl) bic = -2 * l_1 + index_of_not_fixed_values.size * log(nobs) logger.log_status("Bayesian Information Criterion (BIC): ", str(bic), tags=tags, verbosity=vl) logger.log_status("***********************************************", tags=tags, verbosity_level=vl) logger.log_status('Log-likelihood is: ', l_1, tags=tags, verbosity_level=vl) logger.log_status('Null Log-likelihood is: ', l_0, tags=tags, verbosity_level=vl) logger.log_status('Likelihood ratio index: ', ll_ratio, tags=tags, verbosity_level=vl) logger.log_status('Adj. likelihood ratio index: ', adj_ll_ratio, tags=tags, verbosity_level=vl) logger.log_status('Number of observations: ', nobs, tags=tags, verbosity_level=vl) logger.log_status('Suggested |t-value| > ', sqrt(log(nobs))) logger.log_status('WARNING: Standard errors printed below are approximated') logger.log_status("-----------------------------------------------", tags=tags, verbosity_level=vl) if coef_names is not None: nestn = nest_numbers if nestn is None: nestn = range(1,M+1) names = concatenate((coef_names, array(map(lambda x: '__logsum_%s' % x, nestn)))) else: names = ['']*(nvars+M) logger.log_status("Coeff_names\testimate\tstd err\t\tt-values\tgradient", tags=tags, verbosity_level=vl) for i in range(index_of_not_fixed_values.size): logger.log_status("%10s\t%8g\t%8g\t%8g\t%8g" % (names[index_of_not_fixed_values[i]], beta[index_of_not_fixed_values[i]], se[index_of_not_fixed_values[i]], tvalues[index_of_not_fixed_values[i]], mingrad[i]), tags=tags, verbosity_level=vl) logger.log_status('***********************************************', tags=tags, verbosity_level=vl) logger.log_status('Elapsed time: ',time.clock()-self.start_time, 'seconds', tags=tags, verbosity_level=vl) df=nvars+M-index_of_fixed_values.size lrts = -2*(l_0-l_1) return {"estimators":beta, "coefficient_names": names, "standard_errors":se, "other_measures":{"t_statistic": tvalues}, "other_info":{"p-value":chisqprob(lrts, df), "ll_ratio_index":ll_ratio, "ll_ratio_test_statistics":lrts, "df": df, "nobs":nobs}}