def match_variable_with_coefficient_names(self, coefnames, varnames): """The i-th element of the string array 'coefnames' is matched to the i-th element of the string array 'varnames'. """ ndisteqs = self.nequations() for ivar in range(self.n): matches = ematch(varnames, self.variable_names[ivar].get_alias()) l = matches.size if (l > (ndisteqs*self.nsubmodels*max(1,sum(self.get_other_ndim())))) or (l == 0): raise StandardError, "Method match_variable_with_coefficient_names: something wrong with variable names." for i in range(l): #iterate over matches of variables v_matches = ematch(self.coefficient_names, coefnames[matches[i]]) if v_matches.size == 0: raise StandardError, "Method match_variable_with_coefficient_names: Mismatch in coefficient and variable names." for j in range(v_matches.size): #iterate over matches in coefficient class if (self.nsubmodels==1) or (self.specification.get_submodels()[matches[i]] == self.coefficients.get_submodels()[v_matches[j]]): eqidx = 0 submidx = 0 if self.nsubmodels > 1: submidx = self.submodels_mapping[self.specification.get_submodels()[matches[i]]] if len(self.specification.get_equations()) > 1: if len(self._equation_index_mapping.keys()) > 0: eqidx = self._equation_index_mapping[self.specification.get_equations()[matches[i]]] else: eqidx = int(self.specification.get_equations()[matches[i]]-1) else: eqidx = range(self.coefmap.shape[0]) coefmap_index = [eqidx,ivar,submidx] for dimname in self.other_dimensions_values.keys(): idx = self.other_dimensions_mapping[dimname][self.specification.get_other_field(dimname)[matches[i]]] coefmap_index.append(idx) self.coefmap[tuple(coefmap_index)] = v_matches[j]
def get_constants(specification): variable_names = asarray(specification.get_variable_names()) matches = ematch(variable_names, constant_string) # matches = matches[where(specification.equations[matches] < 1)] coefnames = get_distinct_names(specification.get_coefficient_names()[matches]) # if len(coefnames) > 1: # raise CoefConstantsLengthException return coefnames
def delete(self, variables): """ Delete given variables from specification.""" variables = tuple(variables) idx_list = [] variable_names = asarray(map(lambda x: x.get_alias(), self.variables)) nvariables = variable_names.size will_not_delete = array(nvariables * [True], dtype='bool8') for var in variables: idx = ematch(variable_names, var) if idx.size > 0: will_not_delete[idx] = False self.do_shrink(variable_names, where(will_not_delete)[0])
def shrink(self, variables): """ Shrink all arrays of class attributes to those elements that correspond to given variables. """ variables = tuple(variables) idx_list = [] variable_names = asarray(map(lambda x: x.get_alias(), self.variables)) for var in variables: idx = ematch(variable_names, var) if idx.size > 0: idx_list.append(idx[0]) idx_array = asarray(idx_list) self.do_shrink(variable_names, idx_array)
def truncate_coefficients(self, coefficients): """Leave only that part of coefficients that corresponds to specification.""" specnames = self.specification.get_distinct_coefficient_names() if specnames.size <= 0: return coefficients.copy_and_truncate(array([], dtype='int32')) coefnames = coefficients.get_names() index_list = [] for icoef in range(specnames.size): matches = ematch(coefnames, specnames[icoef]) l = len(matches) if l > 0: for i in range(l): index_list.append(matches[i]) return coefficients.copy_and_truncate(array(index_list, dtype=int16))
def truncate_coefficients(self, coefficients): """Leave only that part of coefficients that corresponds to specification.""" specnames = self.specification.get_distinct_coefficient_names() if specnames.size <= 0: return coefficients.copy_and_truncate(array([], dtype='int32')) coefnames = coefficients.get_names() index_list=[] for icoef in range(specnames.size): matches = ematch(coefnames, specnames[icoef]) l = len(matches) if l > 0: for i in range(l): index_list.append(matches[i]) # don't remove reserved names, i.e. starting with '__' #idx = where(map(lambda x: x.startswith('__'), coefnames))[0] #[index_list.append(i) for i in idx if i not in index_list] return coefficients.copy_and_truncate(array(index_list, dtype=int16))
def sample_values(self, distribution=None, distribution_dictionary=None, **kwargs): """ Return a copy of self, where values are sampled from given distribution(s). If 'distribution' is 'normal', all coefficients are sampled from normal distribution (see docstring for sample_values_from_normal_distribution). If 'distribution' is 'uniform', all coefficients are sampled from uniform distribution (see docstring for sample_values_from_uniform_distribution). In both cases, kwargs are passed to the appropriate method. If 'distribution' is None, argument 'distribution_dictionary' must be given. This dictionary contains arguments for sampling different coefficients using different distributions. Keys of this dictionary are coefficient names, values are again dictionaries. These must have a key 'distribution' which is either 'normal' or 'uniform'. An optional entry 'parameters' contains a dictionary with keyword arguments passed to either sample_one_value_from_normal_distribution (multiplicator) or sample_one_value_from_uniform_distribution (a, b, center_around_value). See example in test_sample_coefficients_mixed_distr. """ if distribution == 'normal': logger.log_status( 'Sampling coefficient values from normal distribution.') return self.sample_values_from_normal_distribution(**kwargs) elif distribution == 'uniform': logger.log_status( 'Sampling coefficient values from uniform distribution.') return self.sample_values_from_uniform_distribution(**kwargs) elif distribution is None: if not isinstance(distribution_dictionary, dict): raise TypeError, "Either argument 'distribution' or argument 'distribution_dictionary' must be not None." # Every coefficient can have different distribution new_coef = self.copy_and_truncate(arange(self.size())) for name, args in distribution_dictionary.iteritems(): idx = ematch(self.get_names(), name) if idx.size <= 0: logger.log_warning( 'Coefficient %s not found. Sampling for this coefficient ignored.' % name) continue if 'distribution' not in args.keys(): logger.log_warning( "The sampling dictionary for coefficient %s must contain the entry 'distribution'. Sampling for this coefficient ignored." % name) continue pars = {} if 'parameters' in args: pars = args['parameters'] if args['distribution'] == 'normal': new_coef.values[ idx] = self.sample_one_value_from_normal_distribution( idx, **pars) elif args['distribution'] == 'uniform': new_coef.values[ idx] = self.sample_one_value_from_uniform_distribution( idx, **pars) else: logger.log_warning( "Sampling from %s distribution not implemented. Sampling for %s ignored.", (args['distribution'], name)) return new_coef raise ValueError("Sampling from %s distribution not implemented." % distribution)
def get_values_of_one_coefficient(self, name): """Get values of a coefficient given by 'name'.""" idx = ematch(self.get_names(), name) return array(self.get_values())[idx]
def run(self, data, upc_sequence, resources): """ 'data' is of shape (nobservations, nchoices, nvariables). """ nobs, alts, nvars = data.shape if resources.get("skip_generating_model_file", False): model_name = resources.get("biogeme_model_name", "default") else: model_name = create_model_file(alts, resources) choice_matrix = resources[ "chosen_choice"] # matrix (nobs x alts) of 0's and 1's. 1 is on positions of chosen location. chosen_choice = where(choice_matrix)[1] + 1 # flatten data into 2d var_names = resources[ "specified_coefficients"].get_variable_names_from_alt() index_of_non_constants = [] for i in range(nvars): if not (var_names[i] == "constant"): index_of_non_constants.append(i) index_of_non_constants = array(index_of_non_constants) nvars_without_const = index_of_non_constants.size data_for_biogeme = zeros((nobs, alts * nvars_without_const + 1), dtype=float64) biogeme_var_names = [] for ivar in range(nvars_without_const): for ialt in range(alts): biogeme_var_names.append( var_names[index_of_non_constants[ivar]] + "_" + str(ialt + 1)) data_for_biogeme[:, ivar * alts + ialt] = data[:, ialt, index_of_non_constants[ivar]] data_for_biogeme[:, alts * nvars_without_const] = chosen_choice a_ptr = getpointer(data_for_biogeme) # # Prepare data headers # ncols = data_for_biogeme.shape[1] headers = biogeme.vectorStr(ncols) for i in range(ncols - 1): headers[i] = biogeme_var_names[i] headers[ncols - 1] = "choice" # # Define variables for the results # estimationResults = biogeme.patPythonResults() # # Invoke biogeme # biogemeObject = biogeme.patBiogemeScripting() biogemeObject.estimate(model_name, a_ptr, nobs, ncols, headers, estimationResults) # # Use the results # print " timeStamp: ", estimationResults.getTimeStamp() print " version: ", estimationResults.getVersion() print " description: ", estimationResults.getDescription() print " model: ", estimationResults.getModel() print " drawsType: ", estimationResults.getDrawsType() print " numberOfDraws: ", estimationResults.numberOfDraws print " numberOfParameters: ", estimationResults.numberOfParameters print " numberOfObservations: ", estimationResults.numberOfObservations print " numberOfIndividuals: ", estimationResults.numberOfIndividuals print " nullLogLikelihood: ", estimationResults.nullLoglikelihood print " initLoglikelihood: ", estimationResults.initLoglikelihood print " finalLoglikelihood: ", estimationResults.finalLoglikelihood print " likelihoodRatioTest: ", estimationResults.likelihoodRatioTest print " rhoSquare: ", estimationResults.rhoSquare print " rhoBarSquare: ", estimationResults.rhoBarSquare print " finalGradientNorm: ", estimationResults.finalGradientNorm print " varianceCovariance: ", estimationResults.getVarianceCovariance( ) print "Parameters\tEstimate\tStdErr\ttTest\tpValue" coef_names = resources[ "specified_coefficients"].get_coefficient_names_from_alt() est_values = zeros(coef_names.size, dtype=float32) std_errors = zeros(coef_names.size, dtype=float32) tstat = zeros(coef_names.size, dtype=float32) pvalues = zeros(coef_names.size, dtype=float32) for i in range(estimationResults.totalNumberOfParameters): if (estimationResults.getFixed(i)): print estimationResults.getParamName(i), " is fixed" else: name = estimationResults.getParamName(i) idx = ematch(coef_names, name)[0] print name , '\t',\ estimationResults.getEstimate(i), '\t',\ estimationResults.getStdErrRobust(i),'\t',\ estimationResults.getTTestRobust(i),'\t',\ estimationResults.getPValueRobust(i) #print "Check... " , estimationResults.getEstimate(estimationResults.getParamName(i)) ; est_values[idx] = estimationResults.getEstimate(i) std_errors[idx] = estimationResults.getStdErrRobust(i) tstat[idx] = estimationResults.getTTestRobust(i) pvalues[idx] = estimationResults.getPValueRobust(i) logger.log_status("Biogeme model: ", model_name) return { "estimators": est_values, "standard_errors": std_errors, "other_measures": { "t_statistic": tstat, "p_values": pvalues }, "other_info": { "ll_ratio_index": 1 - (estimationResults.finalLoglikelihood / estimationResults.nullLoglikelihood), "ll_ratio_test_statistics": estimationResults.likelihoodRatioTest, "nobs": estimationResults.numberOfObservations, "Rho-Square": estimationResults.rhoSquare, "Rho-bar-Square": estimationResults.rhoBarSquare } }
def estimate_dcm(self, data): nobs, alts, nvars, M = data.shape self.M = M depm = self.resources[ "chosen_choice"] # matrix (nobs x alts) of 0's and 1's. 1 is on positions of chosen location. tags = ["estimate", "result"] vl = 2 coef_names = self.resources.get("coefficient_names", None) nest_numbers = self.get_nest_numbers() index_of_fixed_values = zeros(nvars + M, dtype="bool8") fixed_coefs, fixed_values = self.resources.get("fixed_values", (array([]), array([]))) if (coef_names is not None) and (fixed_coefs.size > 0): index_of_fixed_values[get_indices_of_matched_items( coef_names, fixed_coefs)] = True index_of_not_fixed_values = logical_not(index_of_fixed_values) beta = zeros(nvars + M).astype(float32) beta[-M:] = self.range_mu[1] beta[index_of_fixed_values] = fixed_values.astype(beta.dtype) l_0 = self.nl_loglikelihood(beta, data, depm) ls_idx = arange(nvars, nvars + M) for name, sv in self.resources.get("starting_values", {}).iteritems(): est = True if isinstance(sv, tuple) or isinstance(sv, list): est = sv[1] sv = sv[0] if name.startswith('__logsum_'): if nest_numbers is not None: idx = ls_idx[where(nest_numbers == int(name[9:]))[0]] else: idx = array([ls_idx[int(name[9:]) - 1]]) else: idx = ematch(coef_names, name) beta[idx] = sv index_of_fixed_values[idx] = not (est) index_of_not_fixed_values = where( logical_not(index_of_fixed_values))[0] index_of_fixed_values = where(index_of_fixed_values)[0] bounds = index_of_not_fixed_values.size * [(None, None)] j = 0 for i in range(nvars + M - 1, nvars - 1, -1): if i in index_of_not_fixed_values: bounds[index_of_not_fixed_values.size - j - 1] = self.range_mu j += 1 logger.start_block('BFGS procedure') bfgs_result = fmin_bfgs( self.minus_nl_loglikelihood, beta[index_of_not_fixed_values], args=(data, depm, beta[index_of_fixed_values], index_of_not_fixed_values, index_of_fixed_values), full_output=True, disp=True, epsilon=self.resources.get('bfgs_epsilon', self._epsilon), ) logger.end_block() beta[index_of_not_fixed_values] = bfgs_result[0].astype(beta.dtype) se = zeros(nvars + M) tvalues = zeros(nvars + M) mingrad = bfgs_result[2] if not self.resources.get('bfgs_approximate_second_derivative', self._approximate_second_derivative): inv_hessian = bfgs_result[3] se[index_of_not_fixed_values] = sqrt(diagonal(inv_hessian)) else: sec_der = approximate_second_derivative( self.minus_nl_loglikelihood, beta[index_of_not_fixed_values], args=(data, depm, beta[index_of_fixed_values], index_of_not_fixed_values, index_of_fixed_values)) inv_hessian = 1.0 / sec_der se[index_of_not_fixed_values] = sqrt(inv_hessian) tvalues[index_of_not_fixed_values] = beta[ index_of_not_fixed_values] / se[index_of_not_fixed_values] l_1 = self.nl_loglikelihood(beta, data, depm) ll_ratio = 1 - (l_1 / l_0) adj_ll_ratio = 1 - ((l_1 - nvars - M) / l_0) # http://en.wikipedia.org/wiki/Akaike_information_criterion aic = 2 * index_of_not_fixed_values.size - 2 * l_1 logger.log_status("Akaike's Information Criterion (AIC): ", str(aic), tags=tags, verbosity=vl) bic = -2 * l_1 + index_of_not_fixed_values.size * log(nobs) logger.log_status("Bayesian Information Criterion (BIC): ", str(bic), tags=tags, verbosity=vl) logger.log_status("***********************************************", tags=tags, verbosity_level=vl) logger.log_status('Log-likelihood is: ', l_1, tags=tags, verbosity_level=vl) logger.log_status('Null Log-likelihood is: ', l_0, tags=tags, verbosity_level=vl) logger.log_status('Likelihood ratio index: ', ll_ratio, tags=tags, verbosity_level=vl) logger.log_status('Adj. likelihood ratio index: ', adj_ll_ratio, tags=tags, verbosity_level=vl) logger.log_status('Number of observations: ', nobs, tags=tags, verbosity_level=vl) logger.log_status('Suggested |t-value| > ', sqrt(log(nobs))) logger.log_status("-----------------------------------------------", tags=tags, verbosity_level=vl) if coef_names is not None: nestn = nest_numbers if nestn is None: nestn = range(1, M + 1) names = concatenate( (coef_names, array(map(lambda x: '__logsum_%s' % x, nestn)))) else: names = [''] * (nvars + M) logger.log_status( "Coeff_names\testimate\tstd err\t\tt-values\tgradient", tags=tags, verbosity_level=vl) for i in range(index_of_not_fixed_values.size): logger.log_status( "%10s\t%8g\t%8g\t%8g\t%8g" % (names[index_of_not_fixed_values[i]], beta[index_of_not_fixed_values[i]], se[index_of_not_fixed_values[i]], tvalues[index_of_not_fixed_values[i]], mingrad[i]), tags=tags, verbosity_level=vl) logger.log_status('***********************************************', tags=tags, verbosity_level=vl) logger.log_status('Elapsed time: ', time.clock() - self.start_time, 'seconds', tags=tags, verbosity_level=vl) df = nvars + M - index_of_fixed_values.size lrts = -2 * (l_0 - l_1) return { "estimators": beta, "coefficient_names": names, "standard_errors": se, "other_measures": { "t_statistic": tvalues }, "other_info": { "p-value": chisqprob(lrts, df), "ll_ratio_index": ll_ratio, "ll_ratio_test_statistics": lrts, "df": df, "nobs": nobs } }