def run(self, individual_dataset, fraction_dataset, id_name1='blockgroup_id', id_name2='zone_id', fraction_attribute_name='fraction'): """ """ assert id_name1 in individual_dataset.get_known_attribute_names() if id_name2 not in individual_dataset.get_known_attribute_names(): individual_dataset.add_primary_attribute(-1*ones(individual_dataset.size()), id_name2) fraction_id1 = fraction_dataset.get_attribute(id_name1) individual_id1 = individual_dataset.get_attribute(id_name1) unique_ids = unique(fraction_id1) for id1 in unique_ids: individual_of_id1 = where(individual_id1==id1)[0] n = individual_of_id1.size logger.log_status("Processing %s %s: %s individuals" % (id_name1, id1, n) ) if n > 0: fractions = fraction_dataset.get_attribute(fraction_attribute_name)[fraction_id1==id1] id2 = fraction_dataset.get_attribute(id_name2)[fraction_id1==id1] ## ignore households in geography with sum of fractions less than 1.0e-6 if fractions.sum() < 1.0e-2: continue if not allclose(fractions.sum(), 1.0, rtol=1.e-2): fractions = normalize(fractions) fractions_cumsum = ncumsum(fractions) R = random(n) index = searchsorted(fractions_cumsum, R) individual_dataset.modify_attribute(id_name2, id2[index], index=individual_of_id1)
def _sample_by_agent_and_stratum(self, index1, index2, stratum, prob_array, chosen_choice_index, strata_sample_setting): """agent by agent and stratum by stratum stratified sampling, suitable for 2d prob_array and/or sample_size varies for agents this method is slower than _sample_by_stratum, for simpler stratified sampling use _sample_by_stratum instead""" rank_of_prob = rank(prob_array) rank_of_strata = rank(strata_sample_setting) J = self.__determine_sampled_index_size(strata_sample_setting, rank_of_strata) sampled_index = zeros((index1.size, J), dtype=DTYPE) - 1 self._sampling_probability = zeros((index1.size, J), dtype=float32) self._stratum_id = ones((index1.size, J), dtype=DTYPE) * NO_STRATUM_ID for i in range(index1.size): if rank_of_strata == 3: strata_sample_pairs = strata_sample_setting[i, :] else: strata_sample_pairs = strata_sample_setting if rank_of_prob == 2: prob = prob_array[i, :] else: prob = prob_array j = 0 for (this_stratum, this_size) in strata_sample_pairs: if this_size <= 0: continue index_not_in_stratum = where(stratum != this_stratum)[0] this_prob = copy.copy(prob) this_prob[index_not_in_stratum] = 0.0 this_prob = normalize(this_prob) if nonzerocounts(this_prob) < this_size: logger.log_warning( "weight array dosen't have enough non-zero counts, use sample with replacement" ) # chosen_index_to_index2 = where(index2 == chosen_choice_index[i])[0] #exclude_index passed to probsample_noreplace needs to be indexed to index2 this_sampled_index = probsample_noreplace( index2, sample_size=this_size, prob_array=this_prob, exclude_index=chosen_choice_index[i], return_index=True) sampled_index[i, j:j + this_size] = this_sampled_index self._sampling_probability[ i, j:j + this_size] = this_prob[this_sampled_index] self._stratum_id[i, j:j + this_size] = ones( (this_sampled_index.size, ), dtype=DTYPE) * this_stratum j += this_size return index2[sampled_index]
def _sample_by_agent_and_stratum( self, index1, index2, stratum, prob_array, chosen_choice_index, strata_sample_setting ): """agent by agent and stratum by stratum stratified sampling, suitable for 2d prob_array and/or sample_size varies for agents this method is slower than _sample_by_stratum, for simpler stratified sampling use _sample_by_stratum instead""" rank_of_prob = rank(prob_array) rank_of_strata = rank(strata_sample_setting) J = self.__determine_sampled_index_size(strata_sample_setting, rank_of_strata) sampled_index = zeros((index1.size, J), dtype="int32") - 1 self._sampling_probability = zeros((index1.size, J), dtype=float32) self._stratum_id = ones((index1.size, J), dtype="int32") * NO_STRATUM_ID for i in range(index1.size): if rank_of_strata == 3: strata_sample_pairs = strata_sample_setting[i, :] else: strata_sample_pairs = strata_sample_setting if rank_of_prob == 2: prob = prob_array[i, :] else: prob = prob_array j = 0 for (this_stratum, this_size) in strata_sample_pairs: if this_size <= 0: continue index_not_in_stratum = where(stratum != this_stratum)[0] this_prob = copy.copy(prob) this_prob[index_not_in_stratum] = 0.0 this_prob = normalize(this_prob) if nonzerocounts(this_prob) < this_size: logger.log_warning("weight array dosen't have enough non-zero counts, use sample with replacement") # chosen_index_to_index2 = where(index2 == chosen_choice_index[i])[0] # exclude_index passed to probsample_noreplace needs to be indexed to index2 this_sampled_index = probsample_noreplace( index2, sample_size=this_size, prob_array=this_prob, exclude_index=chosen_choice_index[i], return_index=True, ) sampled_index[i, j : j + this_size] = this_sampled_index self._sampling_probability[i, j : j + this_size] = this_prob[this_sampled_index] self._stratum_id[i, j : j + this_size] = ones((this_sampled_index.size,), dtype="int32") * this_stratum j += this_size return index2[sampled_index]
def _sample_by_stratum(self, index1, index2, stratum, prob_array, chosen_choice_index, strata_sample_setting): """stratum by stratum stratified sampling, suitable for 1d prob_array and sample_size is the same for all agents""" if prob_array.ndim <> 1: raise RuntimeError, "_sample_by_stratum only suitable for 1d prob_array" sampled_index = zeros((index1.size, 1), dtype=DTYPE) - 1 self._sampling_probability = zeros((index1.size, 1), dtype=float32) self._stratum_id = ones((index1.size, 1), dtype=DTYPE) * NO_STRATUM_ID for this_stratum, this_size in strata_sample_setting: index_not_in_stratum = where(stratum != this_stratum)[0] this_prob = copy.copy(prob_array) this_prob[index_not_in_stratum] = 0.0 this_prob = normalize(this_prob) replace = False # non-repeat sampling if nonzerocounts(this_prob) < this_size: logger.log_warning( "weight array dosen't have enough non-zero counts, sample with replacement" ) replace = True this_sampled_index = prob2dsample( index2, sample_size=(index1.size, this_size), prob_array=this_prob, exclude_index=chosen_choice_index, replace=replace, return_index=True) sampled_index = concatenate((sampled_index, this_sampled_index), axis=1) self._sampling_probability = concatenate( (self._sampling_probability, this_prob[this_sampled_index]), axis=1) self._stratum_id = concatenate( (self._stratum_id, ones((this_sampled_index.shape[0], 1), dtype=DTYPE) * this_stratum), axis=1) self._sampling_probability = self._sampling_probability[:, 1:] self._stratum_id = self._stratum_id[:, 1:] return index2[sampled_index[:, 1:]]
def _sample_by_stratum(self, index1, index2, stratum, prob_array, chosen_choice_index, strata_sample_setting): """stratum by stratum stratified sampling, suitable for 1d prob_array and sample_size is the same for all agents""" if prob_array.ndim <> 1: raise RuntimeError, "_sample_by_stratum only suitable for 1d prob_array" sampled_index = zeros((index1.size, 1), dtype="int32") - 1 self._sampling_probability = zeros((index1.size, 1), dtype=float32) self._stratum_id = ones((index1.size, 1), dtype="int32") * NO_STRATUM_ID for this_stratum, this_size in strata_sample_setting: index_not_in_stratum = where(stratum != this_stratum)[0] this_prob = copy.copy(prob_array) this_prob[index_not_in_stratum] = 0.0 this_prob = normalize(this_prob) replace = False # non-repeat sampling if nonzerocounts(this_prob) < this_size: logger.log_warning("weight array dosen't have enough non-zero counts, sample with replacement") replace = True this_sampled_index = prob2dsample( index2, sample_size=(index1.size, this_size), prob_array=this_prob, exclude_index=chosen_choice_index, replace=replace, return_index=True, ) sampled_index = concatenate((sampled_index, this_sampled_index), axis=1) self._sampling_probability = concatenate( (self._sampling_probability, this_prob[this_sampled_index]), axis=1 ) self._stratum_id = concatenate( (self._stratum_id, ones((this_sampled_index.shape[0], 1), dtype="int32") * this_stratum), axis=1 ) self._sampling_probability = self._sampling_probability[:, 1:] self._stratum_id = self._stratum_id[:, 1:] return index2[sampled_index[:, 1:]]
def run(self, individual_dataset, fraction_dataset, id_name1='blockgroup_id', id_name2='zone_id', fraction_attribute_name='fraction', dataset_pool=None): """ """ if dataset_pool is None: dataset_pool = SessionConfiguration().get_dataset_pool() if isinstance(individual_dataset, str): individual_dataset = dataset_pool[individual_dataset] if isinstance(fraction_dataset, str): fraction_dataset = dataset_pool[fraction_dataset] assert id_name1 in individual_dataset.get_known_attribute_names() if id_name2 not in individual_dataset.get_known_attribute_names(): dtype = fraction_dataset.get_attribute(id_name2).dtype default_values = -1*ones(individual_dataset.size(), dtype=dtype) individual_dataset.add_primary_attribute(default_values, id_name2) fraction_id1 = fraction_dataset.get_attribute(id_name1) individual_id1 = individual_dataset.get_attribute(id_name1) unique_ids = unique(fraction_id1) for id1 in unique_ids: individual_of_id1 = where(individual_id1==id1)[0] n = individual_of_id1.size logger.log_status("Processing %s %s: %s individuals" % (id_name1, id1, n) ) if n > 0: fractions = fraction_dataset.get_attribute(fraction_attribute_name)[fraction_id1==id1] id2 = fraction_dataset.get_attribute(id_name2)[fraction_id1==id1] ## ignore individuals in geography with sum of fractions less than 1.0e-2 if fractions.sum() < 1.0e-2: continue if not allclose(fractions.sum(), 1.0, rtol=1.e-2): fractions = normalize(fractions) fractions_cumsum = ncumsum(fractions) R = random(n) index = searchsorted(fractions_cumsum, R) individual_dataset.modify_attribute(id_name2, id2[index], index=individual_of_id1) individual_dataset.flush_dataset()
def run(self, individual_dataset, fraction_dataset, id_name1='blockgroup_id', id_name2='zone_id', fraction_attribute_name='fraction'): """ """ assert id_name1 in individual_dataset.get_known_attribute_names() if id_name2 not in individual_dataset.get_known_attribute_names(): individual_dataset.add_primary_attribute( -1 * ones(individual_dataset.size()), id_name2) fraction_id1 = fraction_dataset.get_attribute(id_name1) individual_id1 = individual_dataset.get_attribute(id_name1) unique_ids = unique(fraction_id1) for id1 in unique_ids: individual_of_id1 = where(individual_id1 == id1)[0] n = individual_of_id1.size logger.log_status("Processing %s %s: %s individuals" % (id_name1, id1, n)) if n > 0: fractions = fraction_dataset.get_attribute( fraction_attribute_name)[fraction_id1 == id1] id2 = fraction_dataset.get_attribute(id_name2)[fraction_id1 == id1] ## ignore households in geography with sum of fractions less than 1.0e-6 if fractions.sum() < 1.0e-2: continue if not allclose(fractions.sum(), 1.0, rtol=1.e-2): fractions = normalize(fractions) fractions_cumsum = ncumsum(fractions) R = random(n) index = searchsorted(fractions_cumsum, R) individual_dataset.modify_attribute(id_name2, id2[index], index=individual_of_id1)
def run(self, data, upc_sequence, resources=None): self.mnl_probabilities=upc_sequence.probability_class self.bhhh_estimation = bhhh_mnl_estimation() modified_upc_sequence = UPCFactory().get_model( utilities=None, probabilities="opus_core.mnl_probabilities", choices=None) modified_upc_sequence.utility_class = upc_sequence.utility_class N, neqs, V = data.shape max_iter = resources.get("max_iterations", 100) # default sc = SessionConfiguration() dataset_pool = sc.get_dataset_pool() sample_rate = dataset_pool.get_dataset("sample_rate") CLOSE = sc["CLOSE"] info_filename = sc["info_file"] info_filename = os.path.join('.', info_filename) info_file = open(info_filename, "a") constraint_dict = {1:'constrained', 0:'unconstrained'} swing_cases_fix = 0 #set swing alternatives to constrained (1) or unconstrained (0) prob_correlation = None choice_set = resources['_model_'].choice_set J = choice_set.size() alt_id = choice_set.get_id_attribute() movers = choice_set.get_attribute('movers') resources.check_obligatory_keys(["capacity_string"]) supply = choice_set.get_attribute(resources["capacity_string"]) index = resources.get("index", None) if index is None: # no sampling case, alternative set is the full choice_set index = arange(J) if index.ndim <= 1: index = repeat(index[newaxis,:], N, axis=0) if resources.get('aggregate_to_dataset', None): aggregate_dataset = dataset_pool.get_dataset(resources.get('aggregate_to_dataset')) choice_set_aggregate_id = choice_set.get_attribute(aggregate_dataset.get_id_name()[0]) index = aggregate_dataset.get_id_index(choice_set_aggregate_id[index].ravel()).reshape(index.shape) supply = aggregate_dataset.get_attribute(resources["capacity_string"]) J = aggregate_dataset.size() movers = aggregate_dataset.get_attribute("movers") demand_history = movers[:, newaxis] resources.merge({"index":index}) pi = ones(index.shape, dtype=float32) #initialize pi #average_omega = ones(J,dtype=float32) #initialize average_omega logger.start_block('Outer Loop') for i in range(max_iter): logger.log_status('Outer Loop Iteration %s' % i) result = self.bhhh_estimation.run(data, modified_upc_sequence, resources) del self.bhhh_estimation; collect() self.bhhh_estimation = bhhh_mnl_estimation() probability = modified_upc_sequence.get_probabilities() if data.shape[2] == V: #insert a placeholder for ln(pi) in data data = concatenate((data,ones((N,neqs,1),dtype=float32)), axis=2) coef_names = resources.get("coefficient_names") coef_names = concatenate( (coef_names, array(["ln_pi"])) ) resources.merge({"coefficient_names":coef_names}) else: beta_ln_pi = result['estimators'][where(coef_names == 'ln_pi')][0] logger.log_status("mu = 1/%s = %s" % (beta_ln_pi, 1/beta_ln_pi)) prob_hat = safe_array_divide(probability, pi ** beta_ln_pi) #prob_hat = safe_array_divide(probability, pi) prob_hat_sum = prob_hat.sum(axis=1, dtype=float32) if not ma.allclose(prob_hat_sum, 1.0): logger.log_status("probability doesn't sum up to 1, with minimum %s, and maximum %s" % (prob_hat_sum.min(), prob_hat_sum.max())) probability = normalize(prob_hat) demand = self.mnl_probabilities.get_demand(index, probability, J) * 1 / sample_rate demand_history = concatenate((demand_history, demand[:, newaxis]), axis=1) sdratio = safe_array_divide(supply, demand, return_value_if_denominator_is_zero=2.0) sdratio_matrix = sdratio[index] ## debug info from numpy import histogram from opus_core.misc import unique cc = histogram(index.ravel(), unique(index.ravel()))[0] logger.log_status( "=================================================================") logger.log_status( "Probability min: %s, max: %s" % (probability.min(), probability.max()) ) logger.log_status( "Demand min: %s, max: %s" % (demand.min(), demand.max()) ) logger.log_status( "sdratio min: %s, max: %s" % (sdratio.min(), sdratio.max()) ) logger.log_status( "demand[sdratio==sdratio.min()]=%s" % demand[sdratio==sdratio.min()] ) logger.log_status( "demand[sdratio==sdratio.max()]=%s" % demand[sdratio==sdratio.max()] ) logger.log_status( "Counts of unique submarkets in alternatives min: %s, max: %s" % (cc.min(), cc.max()) ) logger.log_status( "=================================================================") constrained_locations_matrix, omega, info = self.inner_loop(supply, demand, probability, index, sdratio_matrix, J, max_iteration=max_iter) inner_iterations, constrained_locations_history, swing_index, average_omega_history = info for idx in swing_index: logger.log_status("swinging alt with id %s set to %s" % (alt_id[idx], constraint_dict[swing_cases_fix])) constrained_locations_matrix[index==idx] = swing_cases_fix if swing_index.size > 0: info_file.write("swing of constraints found with id %s \n" % alt_id[swing_index]) info_file.write("outer_iteration, %i, " % i + ", ".join([str(i)]*(len(inner_iterations))) + "\n") info_file.write("inner_iteration, , " + ", ".join(inner_iterations) + "\n") info_file.write("id, sdratio, " + ", ".join(["avg_omega"]*len(inner_iterations)) + "\n") for idx in swing_index: line = str(alt_id[idx]) + ',' line += str(sdratio[idx]) + ',' line += ",".join([str(x) for x in average_omega_history[idx,]]) line += "\n" info_file.write(line) info_file.write("\n") info_file.flush() outer_iterations = [str(i)] * len(inner_iterations) prob_min = [str(probability.min())] * len(inner_iterations) prob_max = [str(probability.max())] * len(inner_iterations) pi_new = self.mnl_probabilities.get_pi(sdratio_matrix, omega, constrained_locations_matrix) data[:,:,-1] = ln(pi_new) #diagnostic output if not ma.allclose(pi, pi_new, atol=CLOSE): if i > 0: #don't print this for the first iteration logger.log_status("min of abs(pi(l+1) - pi(l)): %s" % absolute(pi_new - pi).min()) logger.log_status("max of abs(pi(l+1) - pi(l)): %s" % absolute(pi_new - pi).max()) logger.log_status("mean of pi(l+1) - pi(l): %s" % (pi_new - pi).mean()) logger.log_status('Standard Deviation pi(l+1) - pi(l): %s' % standard_deviation(pi_new - pi)) logger.log_status('correlation of pi(l+1) and pi(l): %s' % corr(pi_new.ravel(), pi.ravel())[0,1]) pi = pi_new probability_old = probability # keep probability of the previous loop, for statistics computation only else: #convergence criterion achieved, quiting outer loop logger.log_status("pi(l) == pi(l+1): Convergence criterion achieved") info_file.write("\nConstrained Locations History:\n") info_file.write("outer_iteration," + ",".join(outer_iterations) + "\n") info_file.write("inner_iteration," + ",".join(inner_iterations) + "\n") info_file.write("minimum_probability," + ",".join(prob_min) + "\n") info_file.write("maximum_probability," + ",".join(prob_max) + "\n") for row in range(J): line = [str(x) for x in constrained_locations_history[row,]] info_file.write(str(alt_id[row]) + "," + ",".join(line) + "\n") info_file.flush() info_file.write("\nDemand History:\n") i_str = [str(x) for x in range(i)] info_file.write("outer_iteration, (movers)," + ",".join(i_str) + "\n") #info_file.write(", ,\n") for row in range(J): line = [str(x) for x in demand_history[row,]] info_file.write(str(alt_id[row]) + "," + ",".join(line) + "\n") demand_history_info_criteria = [500, 100, 50, 20] for criterion in demand_history_info_criteria: com_rows_index = where(movers <= criterion)[0] info_file.write("\nDemand History for alternatives with less than or equal to %s movers in 1998:\n" % criterion) i_str = [str(x) for x in range(i)] info_file.write("outer_iteration, (movers)," + ",".join(i_str) + "\n") #info_file.write(", movers,\n") for row in com_rows_index: line = [str(x) for x in demand_history[row,]] info_file.write(str(alt_id[row]) + "," + ",".join(line) + "\n") #import pdb; pdb.set_trace() #export prob correlation history correlation_indices, prob_correlation = self.compute_prob_correlation(probability_old, probability, prob_hat, index, resources) info_file.write("\nCorrelation of Probabilities:\n") c_name = ['corr(p_ij p~_ij)', 'corr(p_ij p^_ij)', 'corr(p_ij dummy)', 'corr(p~_ij p^_ij)', 'corr(p~_ij dummy)', 'corr(p^_ij dummy)'] info_file.write("com_id, " + ",".join(c_name) + "\n") #info_file.write(", ,\n") for row in range(correlation_indices.size): line = [str(x) for x in prob_correlation[row,]] info_file.write(str(alt_id[correlation_indices[row]]) + "," + ",".join(line) + "\n") info_file.close() result['pi'] = pi return result logger.end_block() try:info_file.close() except:pass raise RuntimeError, "max iteration reached without convergence."
def run(self, dataset1, dataset2, index1=None, index2=None, sample_size=10, weight=None, include_chosen_choice=False, with_replacement=False, resources=None, dataset_pool=None): """this function samples number of sample_size (scalar value) alternatives from dataset2 for agent set specified by dataset1. If index1 is not None, only samples alterantives for agents with indices in index1; if index2 is not None, only samples alternatives from indices in index2. sample_size specifies number of alternatives to be sampled for each agent. weight, to be used as sampling weight, is either an attribute name of dataset2, or a 1d array of the same length as index2 or 2d array of shape (index1.size, index2.size). Also refer to document of interaction_dataset""" if dataset_pool is None: try: sc = SessionConfiguration() dataset_pool = sc.get_dataset_pool() except: dataset_pool = DatasetPool() local_resources = Resources(resources) local_resources.merge_if_not_None({ "dataset1": dataset1, "dataset2": dataset2, "index1": index1, "index2": index2, "sample_size": sample_size, "weight": weight, "with_replacement": with_replacement, "include_chosen_choice": include_chosen_choice }) local_resources.check_obligatory_keys( ['dataset1', 'dataset2', 'sample_size']) agent = local_resources["dataset1"] index1 = local_resources.get("index1", None) if index1 is None: index1 = arange(agent.size()) choice = local_resources["dataset2"] index2 = local_resources.get("index2", None) if index2 is None: index2 = arange(choice.size()) if index1.size == 0 or index2.size == 0: err_msg = "either choice size or agent size is zero, return None" logger.log_warning(err_msg) return None include_chosen_choice = local_resources.get("include_chosen_choice", False) J = local_resources["sample_size"] if include_chosen_choice: J = J - 1 with_replacement = local_resources.get("with_replacement") weight = local_resources.get("weight", None) if isinstance(weight, str): if weight in choice.get_known_attribute_names(): weight = choice.get_attribute(weight) rank_of_weight = 1 else: varname = VariableName(weight) if varname.get_dataset_name() == choice.get_dataset_name(): weight = choice.compute_variables( weight, dataset_pool=dataset_pool) rank_of_weight = 1 elif varname.get_interaction_set_names() is not None: ## weights can be an interaction variable interaction_dataset = InteractionDataset(local_resources) weight = interaction_dataset.compute_variables( weight, dataset_pool=dataset_pool) rank_of_weight = 2 assert (len(weight.shape) >= rank_of_weight) else: err_msg = ("weight is neither a known attribute name " "nor a simple variable from the choice dataset " "nor an interaction variable: '%s'" % weight) logger.log_error(err_msg) raise ValueError, err_msg elif isinstance(weight, ndarray): rank_of_weight = weight.ndim elif not weight: ## weight is None or empty string weight = ones(index2.size) rank_of_weight = 1 else: err_msg = "unkown weight type" logger.log_error(err_msg) raise TypeError, err_msg if (weight.size <> index2.size) and (weight.shape[rank_of_weight - 1] <> index2.size): if weight.shape[rank_of_weight - 1] == choice.size(): if rank_of_weight == 1: weight = take(weight, index2) if rank_of_weight == 2: weight = take(weight, index2, axis=1) else: err_msg = "weight array size doesn't match to size of dataset2 or its index" logger.log_error(err_msg) raise ValueError, err_msg prob = normalize(weight) #chosen_choice = ones(index1.size) * UNPLACED_ID chosen_choice_id = agent.get_attribute(choice.get_id_name()[0])[index1] #index_of_placed_agent = where(greater(chosen_choice_id, UNPLACED_ID))[0] chosen_choice_index = choice.try_get_id_index( chosen_choice_id, return_value_if_not_found=UNPLACED_ID) chosen_choice_index_to_index2 = lookup(chosen_choice_index, index2, index_if_not_found=UNPLACED_ID) if rank_of_weight == 1: # if weight_array is 1d, then each agent shares the same weight for choices replace = with_replacement # sampling with no replacement non_zero_counts = nonzerocounts(weight) if non_zero_counts < J: logger.log_warning( "weight array dosen't have enough non-zero counts, use sample with replacement" ) replace = True if non_zero_counts > 0: sampled_index = prob2dsample( index2, sample_size=(index1.size, J), prob_array=prob, exclude_index=chosen_choice_index_to_index2, replace=replace, return_index=True) else: # all alternatives have a zero weight sampled_index = zeros((index1.size, 0), dtype=DTYPE) #return index2[sampled_index] if rank_of_weight == 2: sampled_index = zeros((index1.size, J), dtype=DTYPE) - 1 for i in range(index1.size): replace = with_replacement # sampling with/without replacement i_prob = prob[i, :] if nonzerocounts(i_prob) < J: logger.log_warning( "weight array dosen't have enough non-zero counts, use sample with replacement" ) replace = True #exclude_index passed to probsample_noreplace needs to be indexed to index2 sampled_index[i, :] = probsample_noreplace( index2, sample_size=J, prob_array=i_prob, exclude_index=chosen_choice_index_to_index2[i], return_index=True) sampling_prob = take(prob, sampled_index) sampled_index_within_prob = sampled_index.copy() sampled_index = index2[sampled_index] is_chosen_choice = zeros(sampled_index.shape, dtype="bool") #chosen_choice = -1 * ones(chosen_choice_index.size, dtype="int32") if include_chosen_choice: sampled_index = column_stack( (chosen_choice_index[:, newaxis], sampled_index)) is_chosen_choice = zeros(sampled_index.shape, dtype="bool") is_chosen_choice[chosen_choice_index != UNPLACED_ID, 0] = 1 #chosen_choice[where(is_chosen_choice)[0]] = where(is_chosen_choice)[1] ## this is necessary because prob is indexed to index2, not to the choice set (as is chosen_choice_index) sampling_prob_for_chosen_choices = take( prob, chosen_choice_index_to_index2[:, newaxis]) ## if chosen choice chosen equals unplaced_id then the sampling prob is 0 sampling_prob_for_chosen_choices[where( chosen_choice_index == UNPLACED_ID)[0], ] = 0.0 sampling_prob = column_stack( [sampling_prob_for_chosen_choices, sampling_prob]) interaction_dataset = self.create_interaction_dataset( dataset1, dataset2, index1, sampled_index) interaction_dataset.add_attribute(sampling_prob, '__sampling_probability') interaction_dataset.add_attribute(is_chosen_choice, 'chosen_choice') if local_resources.get("include_mnl_bias_correction_term", False): if include_chosen_choice: sampled_index_within_prob = column_stack( (chosen_choice_index_to_index2[:, newaxis], sampled_index_within_prob)) interaction_dataset.add_mnl_bias_correction_term( prob, sampled_index_within_prob) ## to get the older returns #sampled_index = interaction_dataset.get_2d_index() #chosen_choices = UNPLACED_ID * ones(index1.size, dtype="int32") #where_chosen = where(interaction_dataset.get_attribute("chosen_choice")) #chosen_choices[where_chosen[0]]=where_chosen[1] #return (sampled_index, chosen_choice) return interaction_dataset
def run(self, dataset1, dataset2, index1=None, index2=None, sample_size=10, weight=None, include_chosen_choice=False, with_replacement=False, resources=None, dataset_pool=None): """this function samples number of sample_size (scalar value) alternatives from dataset2 for agent set specified by dataset1. If index1 is not None, only samples alterantives for agents with indices in index1; if index2 is not None, only samples alternatives from indices in index2. sample_size specifies number of alternatives to be sampled for each agent. weight, to be used as sampling weight, is either an attribute name of dataset2, or a 1d array of the same length as index2 or 2d array of shape (index1.size, index2.size). Also refer to document of interaction_dataset""" if dataset_pool is None: try: sc = SessionConfiguration() dataset_pool=sc.get_dataset_pool() except: dataset_pool = DatasetPool() local_resources = Resources(resources) local_resources.merge_if_not_None( {"dataset1": dataset1, "dataset2": dataset2, "index1":index1, "index2": index2, "sample_size": sample_size, "weight": weight, "with_replacement": with_replacement, "include_chosen_choice": include_chosen_choice}) local_resources.check_obligatory_keys(['dataset1', 'dataset2', 'sample_size']) agent = local_resources["dataset1"] index1 = local_resources.get("index1", None) if index1 is None: index1 = arange(agent.size()) choice = local_resources["dataset2"] index2 = local_resources.get("index2", None) if index2 is None: index2 = arange(choice.size()) if index1.size == 0 or index2.size == 0: err_msg = "either choice size or agent size is zero, return None" logger.log_warning(err_msg) return None include_chosen_choice = local_resources.get("include_chosen_choice", False) J = local_resources["sample_size"] if include_chosen_choice: J = J - 1 with_replacement = local_resources.get("with_replacement") weight = local_resources.get("weight", None) if isinstance(weight, str): if weight in choice.get_known_attribute_names(): weight=choice.get_attribute(weight) rank_of_weight = 1 elif VariableName(weight).get_dataset_name() == choice.get_dataset_name(): weight=choice.compute_variables(weight, dataset_pool=dataset_pool) rank_of_weight = 1 else: ## weights can be an interaction variable interaction_dataset = InteractionDataset(local_resources) weight=interaction_dataset.compute_variables(weight, dataset_pool=dataset_pool) rank_of_weight = 2 elif isinstance(weight, ndarray): rank_of_weight = weight.ndim elif not weight: ## weight is None or empty string weight = ones(index2.size) rank_of_weight = 1 else: err_msg = "unkown weight type" logger.log_error(err_msg) raise TypeError, err_msg if (weight.size <> index2.size) and (weight.shape[rank_of_weight-1] <> index2.size): if weight.shape[rank_of_weight-1] == choice.size(): if rank_of_weight == 1: weight = take(weight, index2) if rank_of_weight == 2: weight = take(weight, index2, axis=1) else: err_msg = "weight array size doesn't match to size of dataset2 or its index" logger.log_error(err_msg) raise ValueError, err_msg prob = normalize(weight) #chosen_choice = ones(index1.size) * UNPLACED_ID chosen_choice_id = agent.get_attribute(choice.get_id_name()[0])[index1] #index_of_placed_agent = where(greater(chosen_choice_id, UNPLACED_ID))[0] chosen_choice_index = choice.try_get_id_index(chosen_choice_id, return_value_if_not_found=UNPLACED_ID) chosen_choice_index_to_index2 = lookup(chosen_choice_index, index2, index_if_not_found=UNPLACED_ID) if rank_of_weight == 1: # if weight_array is 1d, then each agent shares the same weight for choices replace = with_replacement # sampling with no replacement if nonzerocounts(weight) < J: logger.log_warning("weight array dosen't have enough non-zero counts, use sample with replacement") replace = True sampled_index = prob2dsample( index2, sample_size=(index1.size, J), prob_array=prob, exclude_index=chosen_choice_index_to_index2, replace=replace, return_index=True ) #return index2[sampled_index] if rank_of_weight == 2: sampled_index = zeros((index1.size,J), dtype="int32") - 1 for i in range(index1.size): replace = with_replacement # sampling with/without replacement i_prob = prob[i,:] if nonzerocounts(i_prob) < J: logger.log_warning("weight array dosen't have enough non-zero counts, use sample with replacement") replace = True #exclude_index passed to probsample_noreplace needs to be indexed to index2 sampled_index[i,:] = probsample_noreplace( index2, sample_size=J, prob_array=i_prob, exclude_index=chosen_choice_index_to_index2[i], return_index=True ) sampling_prob = take(prob, sampled_index) sampled_index = index2[sampled_index] is_chosen_choice = zeros(sampled_index.shape, dtype="bool") #chosen_choice = -1 * ones(chosen_choice_index.size, dtype="int32") if include_chosen_choice: sampled_index = column_stack((chosen_choice_index[:,newaxis],sampled_index)) is_chosen_choice = zeros(sampled_index.shape, dtype="bool") is_chosen_choice[chosen_choice_index!=UNPLACED_ID, 0] = 1 #chosen_choice[where(is_chosen_choice)[0]] = where(is_chosen_choice)[1] ## this is necessary because prob is indexed to index2, not to the choice set (as is chosen_choice_index) sampling_prob_for_chosen_choices = take(prob, chosen_choice_index_to_index2[:, newaxis]) ## if chosen choice chosen equals unplaced_id then the sampling prob is 0 sampling_prob_for_chosen_choices[where(chosen_choice_index==UNPLACED_ID)[0],] = 0.0 sampling_prob = column_stack([sampling_prob_for_chosen_choices, sampling_prob]) interaction_dataset = self.create_interaction_dataset(dataset1, dataset2, index1, sampled_index) interaction_dataset.add_attribute(sampling_prob, '__sampling_probability') interaction_dataset.add_attribute(is_chosen_choice, 'chosen_choice') ## to get the older returns #sampled_index = interaction_dataset.get_2d_index() #chosen_choices = UNPLACED_ID * ones(index1.size, dtype="int32") #where_chosen = where(interaction_dataset.get_attribute("chosen_choice")) #chosen_choices[where_chosen[0]]=where_chosen[1] #return (sampled_index, chosen_choice) return interaction_dataset
def run(self, dataset1, dataset2, index1=None, index2=None, stratum=None, weight=None, sample_size=1, sample_size_from_each_stratum=None, sample_size_from_chosen_stratum=None, sample_rate=None, include_chosen_choice=False, resources=None, with_replacement=False, dataset_pool=None, **kwargs): """this function samples number of sample_size (scalar value) alternatives from dataset2 for agent set specified by dataset1. If index1 is not None, only samples alternatives for agents with indices in index1; if index2 is not None, only samples alternatives from indices in index2. sample_size specifies number of alternatives to be sampled from each stratum, and is overwritten by sample_size_from_each_stratum if it's not None weight, to be used as sampling weight, is either an attribute name of dataset2, or a 1d array of the same length as index2 or 2d array of shape (index1.size, index2.size). Also refer to document of interaction_dataset""" if dataset_pool is None: try: sc = SessionConfiguration() dataset_pool = sc.get_dataset_pool() except: dataset_pool = DatasetPool() local_resources = Resources(resources) local_resources.merge_if_not_None({ "dataset1": dataset1, "dataset2": dataset2, "index1": index1, "index2": index2, "with_replacement": with_replacement, "stratum": stratum, "weight": weight, "sample_size": sample_size, "sample_size_from_each_stratum": sample_size_from_each_stratum, "sample_size_from_chosen_stratum": sample_size_from_chosen_stratum, "sample_rate": sample_rate, "include_chosen_choice": include_chosen_choice }) local_resources.check_obligatory_keys(['dataset1', 'dataset2']) index1 = local_resources.get("index1", None) agent = dataset1 if index1 is None: agent.get_id_attribute() index1 = arange(agent.size()) choice = local_resources["dataset2"] index2 = local_resources.get("index2", None) if index2 is None: choice.get_id_attribute() index2 = arange(choice.size()) if index1.size == 0 or index2.size == 0: err_msg = "either choice size or agent size is zero, return None" logger.log_warning(err_msg) return (None, None) include_chosen_choice = local_resources.get("include_chosen_choice", False) weight = local_resources.get("weight", None) if isinstance(weight, str): choice.compute_variables(weight, resources=local_resources) weight = choice.get_attribute(weight) rank_of_weight = 1 elif isinstance(weight, ndarray): rank_of_weight = weight.ndim elif weight is None: weight = ones(index2.size) rank_of_weight = 1 else: err_msg = "unknown weight type" logger.log_error(err_msg) raise TypeError, err_msg if (weight.size <> index2.size) and (weight.shape[rank_of_weight - 1] <> index2.size): if weight.shape[rank_of_weight - 1] == choice.size(): weight = take(weight, index2) else: err_msg = "weight array size doesn't match to size of dataset2 or its index" logger.log_error(err_msg) raise ValueError, err_msg prob = normalize(weight) stratum = local_resources.get("stratum", None) if stratum is None: raise StandardError, "'stratum' must be defined for stratified sampling." if isinstance(stratum, str): choice.compute_variables(stratum, resources=local_resources) stratum = choice.get_attribute(stratum) #chosen_choice = ones(index1.size) * UNPLACED_ID chosen_choice_id = agent.get_attribute(choice.get_id_name()[0])[index1] #index_of_placed_agent = where(greater(chosen_choice_id, UNPLACED_ID))[0] chosen_choice_index = choice.try_get_id_index( chosen_choice_id, return_value_if_not_found=-1) chosen_choice_index_to_index2 = lookup(chosen_choice_index, index2, index_if_not_found=UNPLACED_ID) ##TODO: check all chosen strata are in selectable strata #i.e. chosen_choice_index is in index2 chosen_stratum = ones(chosen_choice_index.size, dtype=DTYPE) * NO_STRATUM_ID chosen_stratum[where( chosen_choice_index != -1)] = stratum[chosen_choice_index[where( chosen_choice_index != -1)]] selectable_strata = stratum[index2] unique_strata = unique(selectable_strata) unique_strata = unique_strata[where(unique_strata != NO_STRATUM_ID)] # if rank_of_weight == 2: # raise RuntimeError, "stratified sampling for 2d weight is unimplemented yet" # sampled_index = zeros((index1.size,1)) - 1 sample_size = local_resources.get("sample_size", None) sample_size_from_each_stratum = local_resources.get( "sample_size_from_each_stratum", None) if sample_size_from_each_stratum is None: sample_size_from_each_stratum = sample_size strata_sample_size = ones(unique_strata.size, dtype=DTYPE) * sample_size_from_each_stratum sample_rate = local_resources.get("sample_rate", None) if sample_rate is not None: raise UnImplementedError, "sample_rate is not implemented yet." ##TODO: to be finished #num_elements_in_strata = histogram(selectable_strata, unique_strata) #strata_sample_size = round(num_elements_in_strata * sample_rate) sample_size_from_chosen_stratum = local_resources.get( "sample_size_from_chosen_stratum", None) if sample_size_from_chosen_stratum is None and not include_chosen_choice: strata_sample_pairs = array( map(lambda x, y: [x, y], unique_strata, strata_sample_size)) if rank_of_weight == 1: sampled_index = self._sample_by_stratum( index1, index2, selectable_strata, prob, chosen_choice_index_to_index2, strata_sample_pairs) elif rank_of_weight == 2: sampled_index = self._sample_by_agent_and_stratum( index1, index2, selectable_strata, prob, chosen_choice_index_to_index2, strata_sample_pairs) else: strata_sample_setting = zeros((index1.size, unique_strata.size, 2), dtype=DTYPE) for i in range(index1.size): agents_strata_sample_size = copy.copy(strata_sample_size) if sample_size_from_chosen_stratum is None: ## if sample_size_from_chosen_stratum is None and include_chosen_choice is True, ## sample one less from the chosen stratum agents_strata_sample_size[where( unique_strata == chosen_stratum[i])] += -1 else: agents_strata_sample_size[where( unique_strata == chosen_stratum[i])] = sample_size_from_chosen_stratum strata_sample_pairs = array( map(lambda x, y: [x, y], unique_strata, agents_strata_sample_size)) strata_sample_setting[i, ...] = strata_sample_pairs sampled_index = self._sample_by_agent_and_stratum( index1, index2, selectable_strata, prob, chosen_choice_index_to_index2, strata_sample_setting) #chosen_choice = None is_chosen_choice = zeros(sampled_index.shape, dtype="bool") if include_chosen_choice: sampled_index = concatenate( (chosen_choice_index[:, newaxis], sampled_index), axis=1) #chosen_choice = zeros(chosen_choice_index.shape, dtype="int32") - 1 #chosen_choice[where(chosen_choice_index>UNPLACED_ID)] = 0 #make chosen_choice index to sampled_index, instead of choice (as chosen_choice_index does) #since the chosen choice index is attached to the first column, the chosen choice should be all zeros #for valid chosen_choice_index is_chosen_choice = zeros(sampled_index.shape, dtype="bool") is_chosen_choice[chosen_choice_index != UNPLACED_ID, 0] = 1 chosen_probability = zeros( (chosen_choice_index.size, ), dtype=float32) - 1 for stratum in unique_strata: w = chosen_stratum == stratum chosen_probability[w] = ( prob[chosen_choice_index[w]] / prob[selectable_strata == stratum].sum()).astype(float32) self._sampling_probability = concatenate( (chosen_probability[:, newaxis], self._sampling_probability), axis=1) self._stratum_id = concatenate( (chosen_stratum[:, newaxis], self._stratum_id), axis=1) interaction_dataset = self.create_interaction_dataset( dataset1, dataset2, index1, sampled_index) interaction_dataset.add_attribute(self._sampling_probability, '__sampling_probability') interaction_dataset.add_attribute(is_chosen_choice, 'chosen_choice') interaction_dataset.add_attribute(self._stratum_id, 'stratum_id') ## to get the older returns #sampled_index = interaction_dataset.get_2d_index() #chosen_choices = UNPLACED_ID * ones(index1.size, dtype="int32") #where_chosen = where(interaction_dataset.get_attribute("chosen_choice")) #chosen_choices[where_chosen[0]]=where_chosen[1] #return (sampled_index, chosen_choice) return interaction_dataset
def run(self, dataset1, dataset2, index1=None, index2=None, stratum=None, weight=None, sample_size=1, sample_size_from_each_stratum=None, sample_size_from_chosen_stratum=None, sample_rate=None, include_chosen_choice=False, resources=None, with_replacement=False, dataset_pool=None, **kwargs): """this function samples number of sample_size (scalar value) alternatives from dataset2 for agent set specified by dataset1. If index1 is not None, only samples alternatives for agents with indices in index1; if index2 is not None, only samples alternatives from indices in index2. sample_size specifies number of alternatives to be sampled from each stratum, and is overwritten by sample_size_from_each_stratum if it's not None weight, to be used as sampling weight, is either an attribute name of dataset2, or a 1d array of the same length as index2 or 2d array of shape (index1.size, index2.size). Also refer to document of interaction_dataset""" if dataset_pool is None: try: sc = SessionConfiguration() dataset_pool=sc.get_dataset_pool() except: dataset_pool = DatasetPool() local_resources = Resources(resources) local_resources.merge_if_not_None( {"dataset1": dataset1, "dataset2": dataset2, "index1":index1, "index2": index2, "with_replacement": with_replacement, "stratum":stratum, "weight": weight, "sample_size": sample_size, "sample_size_from_each_stratum": sample_size_from_each_stratum, "sample_size_from_chosen_stratum": sample_size_from_chosen_stratum, "sample_rate": sample_rate, "include_chosen_choice": include_chosen_choice}) local_resources.check_obligatory_keys(['dataset1', 'dataset2']) index1 = local_resources.get("index1", None) agent = dataset1 if index1 is None: agent.get_id_attribute() index1 = arange(agent.size()) choice = local_resources["dataset2"] index2 = local_resources.get("index2", None) if index2 is None: choice.get_id_attribute() index2 = arange(choice.size()) if index1.size == 0 or index2.size == 0: err_msg = "either choice size or agent size is zero, return None" logger.log_warning(err_msg) return (None, None) include_chosen_choice = local_resources.get("include_chosen_choice", False) weight = local_resources.get("weight", None) if isinstance(weight, str): choice.compute_variables(weight, resources = local_resources ) weight=choice.get_attribute(weight) rank_of_weight = 1 elif isinstance(weight, ndarray): rank_of_weight = weight.ndim elif weight is None: weight = ones(index2.size) rank_of_weight = 1 else: err_msg = "unknown weight type" logger.log_error(err_msg) raise TypeError, err_msg if (weight.size <> index2.size) and (weight.shape[rank_of_weight-1] <> index2.size): if weight.shape[rank_of_weight-1] == choice.size(): weight = take(weight, index2) else: err_msg = "weight array size doesn't match to size of dataset2 or its index" logger.log_error(err_msg) raise ValueError, err_msg prob = normalize(weight) stratum = local_resources.get("stratum", None) if stratum is None: raise StandardError, "'stratum' must be defined for stratified sampling." if isinstance(stratum, str): choice.compute_variables(stratum, resources = local_resources ) stratum=choice.get_attribute(stratum) #chosen_choice = ones(index1.size) * UNPLACED_ID chosen_choice_id = agent.get_attribute(choice.get_id_name()[0])[index1] #index_of_placed_agent = where(greater(chosen_choice_id, UNPLACED_ID))[0] chosen_choice_index = choice.try_get_id_index(chosen_choice_id, return_value_if_not_found=-1) chosen_choice_index_to_index2 = lookup(chosen_choice_index, index2, index_if_not_found=UNPLACED_ID) ##TODO: check all chosen strata are in selectable strata #i.e. chosen_choice_index is in index2 chosen_stratum = ones(chosen_choice_index.size, dtype=DTYPE) * NO_STRATUM_ID chosen_stratum[where(chosen_choice_index!=-1)] = stratum[chosen_choice_index[where(chosen_choice_index!=-1)]] selectable_strata = stratum[index2] unique_strata = unique(selectable_strata) unique_strata = unique_strata[where(unique_strata!=NO_STRATUM_ID)] # if rank_of_weight == 2: # raise RuntimeError, "stratified sampling for 2d weight is unimplemented yet" # sampled_index = zeros((index1.size,1)) - 1 sample_size = local_resources.get("sample_size", None) sample_size_from_each_stratum = local_resources.get("sample_size_from_each_stratum", None) if sample_size_from_each_stratum is None: sample_size_from_each_stratum = sample_size strata_sample_size = ones(unique_strata.size, dtype=DTYPE) * sample_size_from_each_stratum sample_rate = local_resources.get("sample_rate", None) if sample_rate is not None: raise UnImplementedError, "sample_rate is not implemented yet." ##TODO: to be finished #num_elements_in_strata = histogram(selectable_strata, unique_strata) #strata_sample_size = round(num_elements_in_strata * sample_rate) sample_size_from_chosen_stratum = local_resources.get("sample_size_from_chosen_stratum", None) if sample_size_from_chosen_stratum is None and not include_chosen_choice: strata_sample_pairs = array(map(lambda x,y: [x,y], unique_strata, strata_sample_size)) if rank_of_weight == 1: sampled_index = self._sample_by_stratum(index1, index2, selectable_strata, prob, chosen_choice_index_to_index2, strata_sample_pairs) elif rank_of_weight == 2: sampled_index = self._sample_by_agent_and_stratum(index1, index2, selectable_strata, prob, chosen_choice_index_to_index2, strata_sample_pairs) else: strata_sample_setting = zeros((index1.size,unique_strata.size,2), dtype=DTYPE) for i in range(index1.size): agents_strata_sample_size = copy.copy(strata_sample_size) if sample_size_from_chosen_stratum is None: ## if sample_size_from_chosen_stratum is None and include_chosen_choice is True, ## sample one less from the chosen stratum agents_strata_sample_size[where(unique_strata==chosen_stratum[i])] += - 1 else: agents_strata_sample_size[where(unique_strata==chosen_stratum[i])] = sample_size_from_chosen_stratum strata_sample_pairs = array(map(lambda x,y: [x,y], unique_strata, agents_strata_sample_size)) strata_sample_setting[i,...] = strata_sample_pairs sampled_index = self._sample_by_agent_and_stratum(index1, index2, selectable_strata, prob, chosen_choice_index_to_index2, strata_sample_setting) #chosen_choice = None is_chosen_choice = zeros(sampled_index.shape, dtype="bool") if include_chosen_choice: sampled_index = concatenate((chosen_choice_index[:,newaxis],sampled_index), axis=1) #chosen_choice = zeros(chosen_choice_index.shape, dtype="int32") - 1 #chosen_choice[where(chosen_choice_index>UNPLACED_ID)] = 0 #make chosen_choice index to sampled_index, instead of choice (as chosen_choice_index does) #since the chosen choice index is attached to the first column, the chosen choice should be all zeros #for valid chosen_choice_index is_chosen_choice = zeros(sampled_index.shape, dtype="bool") is_chosen_choice[chosen_choice_index!=UNPLACED_ID, 0] = 1 chosen_probability = zeros((chosen_choice_index.size,),dtype=float32) - 1 for stratum in unique_strata: w = chosen_stratum==stratum chosen_probability[w] = (prob[chosen_choice_index[w]] / prob[selectable_strata==stratum].sum()).astype(float32) self._sampling_probability = concatenate((chosen_probability[:,newaxis], self._sampling_probability), axis=1) self._stratum_id = concatenate((chosen_stratum[:,newaxis], self._stratum_id), axis=1) interaction_dataset = self.create_interaction_dataset(dataset1, dataset2, index1, sampled_index) interaction_dataset.add_attribute(self._sampling_probability, '__sampling_probability') interaction_dataset.add_attribute(is_chosen_choice, 'chosen_choice') interaction_dataset.add_attribute(self._stratum_id, 'stratum_id') ## to get the older returns #sampled_index = interaction_dataset.get_2d_index() #chosen_choices = UNPLACED_ID * ones(index1.size, dtype="int32") #where_chosen = where(interaction_dataset.get_attribute("chosen_choice")) #chosen_choices[where_chosen[0]]=where_chosen[1] #return (sampled_index, chosen_choice) return interaction_dataset
def run(self, data, upc_sequence, resources=None): self.mnl_probabilities = upc_sequence.probability_class self.bhhh_estimation = bhhh_mnl_estimation() modified_upc_sequence = UPCFactory().get_model( utilities=None, probabilities="opus_core.mnl_probabilities", choices=None) modified_upc_sequence.utility_class = upc_sequence.utility_class N, neqs, V = data.shape max_iter = resources.get("max_iterations", 100) # default sc = SessionConfiguration() dataset_pool = sc.get_dataset_pool() sample_rate = dataset_pool.get_dataset("sample_rate") CLOSE = sc["CLOSE"] info_filename = sc["info_file"] info_filename = os.path.join('.', info_filename) info_file = open(info_filename, "a") constraint_dict = {1: 'constrained', 0: 'unconstrained'} swing_cases_fix = 0 #set swing alternatives to constrained (1) or unconstrained (0) prob_correlation = None choice_set = resources['_model_'].choice_set J = choice_set.size() alt_id = choice_set.get_id_attribute() movers = choice_set.get_attribute('movers') resources.check_obligatory_keys(["capacity_string"]) supply = choice_set.get_attribute(resources["capacity_string"]) index = resources.get("index", None) if index is None: # no sampling case, alternative set is the full choice_set index = arange(J) if index.ndim <= 1: index = repeat(index[newaxis, :], N, axis=0) if resources.get('aggregate_to_dataset', None): aggregate_dataset = dataset_pool.get_dataset( resources.get('aggregate_to_dataset')) choice_set_aggregate_id = choice_set.get_attribute( aggregate_dataset.get_id_name()[0]) index = aggregate_dataset.get_id_index( choice_set_aggregate_id[index].ravel()).reshape(index.shape) supply = aggregate_dataset.get_attribute( resources["capacity_string"]) J = aggregate_dataset.size() movers = aggregate_dataset.get_attribute("movers") demand_history = movers[:, newaxis] resources.merge({"index": index}) pi = ones(index.shape, dtype=float32) #initialize pi #average_omega = ones(J,dtype=float32) #initialize average_omega logger.start_block('Outer Loop') for i in range(max_iter): logger.log_status('Outer Loop Iteration %s' % i) result = self.bhhh_estimation.run(data, modified_upc_sequence, resources) del self.bhhh_estimation collect() self.bhhh_estimation = bhhh_mnl_estimation() probability = modified_upc_sequence.get_probabilities() if data.shape[2] == V: #insert a placeholder for ln(pi) in data data = concatenate((data, ones((N, neqs, 1), dtype=float32)), axis=2) coef_names = resources.get("coefficient_names") coef_names = concatenate((coef_names, array(["ln_pi"]))) resources.merge({"coefficient_names": coef_names}) else: beta_ln_pi = result['estimators'][where( coef_names == 'ln_pi')][0] logger.log_status("mu = 1/%s = %s" % (beta_ln_pi, 1 / beta_ln_pi)) prob_hat = safe_array_divide(probability, pi**beta_ln_pi) #prob_hat = safe_array_divide(probability, pi) prob_hat_sum = prob_hat.sum(axis=1, dtype=float32) if not ma.allclose(prob_hat_sum, 1.0): logger.log_status( "probability doesn't sum up to 1, with minimum %s, and maximum %s" % (prob_hat_sum.min(), prob_hat_sum.max())) probability = normalize(prob_hat) demand = self.mnl_probabilities.get_demand(index, probability, J) * 1 / sample_rate demand_history = concatenate((demand_history, demand[:, newaxis]), axis=1) sdratio = safe_array_divide( supply, demand, return_value_if_denominator_is_zero=2.0) sdratio_matrix = sdratio[index] ## debug info from numpy import histogram from opus_core.misc import unique cc = histogram(index.ravel(), unique(index.ravel()))[0] logger.log_status( "=================================================================" ) logger.log_status("Probability min: %s, max: %s" % (probability.min(), probability.max())) logger.log_status("Demand min: %s, max: %s" % (demand.min(), demand.max())) logger.log_status("sdratio min: %s, max: %s" % (sdratio.min(), sdratio.max())) logger.log_status("demand[sdratio==sdratio.min()]=%s" % demand[sdratio == sdratio.min()]) logger.log_status("demand[sdratio==sdratio.max()]=%s" % demand[sdratio == sdratio.max()]) logger.log_status( "Counts of unique submarkets in alternatives min: %s, max: %s" % (cc.min(), cc.max())) logger.log_status( "=================================================================" ) constrained_locations_matrix, omega, info = self.inner_loop( supply, demand, probability, index, sdratio_matrix, J, max_iteration=max_iter) inner_iterations, constrained_locations_history, swing_index, average_omega_history = info for idx in swing_index: logger.log_status( "swinging alt with id %s set to %s" % (alt_id[idx], constraint_dict[swing_cases_fix])) constrained_locations_matrix[index == idx] = swing_cases_fix if swing_index.size > 0: info_file.write("swing of constraints found with id %s \n" % alt_id[swing_index]) info_file.write("outer_iteration, %i, " % i + ", ".join([str(i)] * (len(inner_iterations))) + "\n") info_file.write("inner_iteration, , " + ", ".join(inner_iterations) + "\n") info_file.write("id, sdratio, " + ", ".join(["avg_omega"] * len(inner_iterations)) + "\n") for idx in swing_index: line = str(alt_id[idx]) + ',' line += str(sdratio[idx]) + ',' line += ",".join( [str(x) for x in average_omega_history[idx, ]]) line += "\n" info_file.write(line) info_file.write("\n") info_file.flush() outer_iterations = [str(i)] * len(inner_iterations) prob_min = [str(probability.min())] * len(inner_iterations) prob_max = [str(probability.max())] * len(inner_iterations) pi_new = self.mnl_probabilities.get_pi( sdratio_matrix, omega, constrained_locations_matrix) data[:, :, -1] = ln(pi_new) #diagnostic output if not ma.allclose(pi, pi_new, atol=CLOSE): if i > 0: #don't print this for the first iteration logger.log_status("min of abs(pi(l+1) - pi(l)): %s" % absolute(pi_new - pi).min()) logger.log_status("max of abs(pi(l+1) - pi(l)): %s" % absolute(pi_new - pi).max()) logger.log_status("mean of pi(l+1) - pi(l): %s" % (pi_new - pi).mean()) logger.log_status( 'Standard Deviation pi(l+1) - pi(l): %s' % standard_deviation(pi_new - pi)) logger.log_status('correlation of pi(l+1) and pi(l): %s' % corr(pi_new.ravel(), pi.ravel())[0, 1]) pi = pi_new probability_old = probability # keep probability of the previous loop, for statistics computation only else: #convergence criterion achieved, quiting outer loop logger.log_status( "pi(l) == pi(l+1): Convergence criterion achieved") info_file.write("\nConstrained Locations History:\n") info_file.write("outer_iteration," + ",".join(outer_iterations) + "\n") info_file.write("inner_iteration," + ",".join(inner_iterations) + "\n") info_file.write("minimum_probability," + ",".join(prob_min) + "\n") info_file.write("maximum_probability," + ",".join(prob_max) + "\n") for row in range(J): line = [ str(x) for x in constrained_locations_history[row, ] ] info_file.write( str(alt_id[row]) + "," + ",".join(line) + "\n") info_file.flush() info_file.write("\nDemand History:\n") i_str = [str(x) for x in range(i)] info_file.write("outer_iteration, (movers)," + ",".join(i_str) + "\n") #info_file.write(", ,\n") for row in range(J): line = [str(x) for x in demand_history[row, ]] info_file.write( str(alt_id[row]) + "," + ",".join(line) + "\n") demand_history_info_criteria = [500, 100, 50, 20] for criterion in demand_history_info_criteria: com_rows_index = where(movers <= criterion)[0] info_file.write( "\nDemand History for alternatives with less than or equal to %s movers in 1998:\n" % criterion) i_str = [str(x) for x in range(i)] info_file.write("outer_iteration, (movers)," + ",".join(i_str) + "\n") #info_file.write(", movers,\n") for row in com_rows_index: line = [str(x) for x in demand_history[row, ]] info_file.write( str(alt_id[row]) + "," + ",".join(line) + "\n") #import pdb; pdb.set_trace() #export prob correlation history correlation_indices, prob_correlation = self.compute_prob_correlation( probability_old, probability, prob_hat, index, resources) info_file.write("\nCorrelation of Probabilities:\n") c_name = [ 'corr(p_ij p~_ij)', 'corr(p_ij p^_ij)', 'corr(p_ij dummy)', 'corr(p~_ij p^_ij)', 'corr(p~_ij dummy)', 'corr(p^_ij dummy)' ] info_file.write("com_id, " + ",".join(c_name) + "\n") #info_file.write(", ,\n") for row in range(correlation_indices.size): line = [str(x) for x in prob_correlation[row, ]] info_file.write( str(alt_id[correlation_indices[row]]) + "," + ",".join(line) + "\n") info_file.close() result['pi'] = pi return result logger.end_block() try: info_file.close() except: pass raise RuntimeError, "max iteration reached without convergence."