def get_all_data(self, submodel=-2): """Model must have a property 'data' which is a dictionary that has for each submodel some data. It returns data for the given submodel. Meant to be used for analyzing estimation data.""" if submodel in self.data.keys(): return self.data[submodel] logger.log_warning("No available data for submodel %s." % submodel) return None
def load(self, resources=None, in_storage=None, in_table_name=None): """ """ # TODO: insert docstring local_resources = Resources(resources) local_resources.merge_with_defaults({ "field_submodel_id": self.field_submodel_id, "field_coefficient_name": self.field_coefficient_name, "field_estimate": self.field_estimate, "field_standard_error": self.field_standard_error, "other_fields": self.other_fields }) if in_storage <> None: self.in_storage = in_storage if not isinstance(self.in_storage, Storage): logger.log_warning( "in_storage has to be of type Storage. No coefficients loaded." ) else: data = self.in_storage.load_table(table_name=in_table_name) submodels = data[local_resources["field_submodel_id"]] self.names = data[local_resources["field_coefficient_name"]] self.values = data[local_resources["field_estimate"]] self.standard_errors = data[ local_resources["field_standard_error"]] for measure in local_resources["other_fields"]: if measure in data.keys(): self.other_measures[measure] = data[measure] if submodels.max() >= 0: self.submodels = submodels self.check_consistency()
def _add(self, amount=0, attribute='', dataset=None, index=None, data_dict={}, **kwargs): new_data = {} dataset_known_attributes = dataset.get_known_attribute_names() if index.size > 0: # sample from agents lucky_index = sample_replace(index, amount) for attr in dataset_known_attributes: new_data[attr] = dataset.get_attribute_by_index( attr, lucky_index) else: ## if attributes are not fully specified, the missing attributes will be filled with 0's for attr in dataset.get_primary_attribute_names(): if data_dict.has_key(attr): new_data[attr] = resize(array(data_dict[attr]), amount) else: if attr == dataset.get_id_name()[0]: new_data[attr] = zeros( amount, dtype=dataset.get_id_attribute().dtype) else: logger.log_warning( "Attribute %s is unspecified for 'add' event; its value will be sampled from all %s values of %s." % (attr, attr, dataset.get_dataset_name())) new_data[attr] = sample_replace( dataset.get_attribute(attr), amount) dataset.add_elements(data=new_data, change_ids_if_not_unique=True)
def compute(self, dataset_pool): interaction_dataset = self.get_dataset() travel_data = dataset_pool.get_dataset('travel_data') travel_data_attr_mat = travel_data.get_attribute_as_matrix( self.travel_data_attribute, fill=self.default_value) var1 = interaction_dataset.get_dataset(1).get_attribute_by_index( self.agent_zone_id, interaction_dataset.get_2d_index_of_dataset1()) var2 = interaction_dataset.get_2d_dataset_attribute( self.location_zone_id) if self.direction_from_home: home_zone = var1.astype("int32") work_zone = var2.astype("int32") else: home_zone = var2.astype("int32") work_zone = var1.astype("int32") results = resize(array([self.default_value], dtype=float32), home_zone.shape) results = travel_data_attr_mat[home_zone, work_zone] missing_pairs_index = travel_data.get_od_pair_index_not_in_dataset( home_zone, work_zone) if missing_pairs_index[0].size > 0: results[missing_pairs_index] = self.default_value logger.log_warning( "zone pairs at index %s are not in travel data; value set to %s." % (str(missing_pairs_index), self.default_value)) return results
def create_logit_data(self, coefficients, index=None): """It creates a data array corresponding to specified coefficients (=coefficients connected to a specification) as one variable per column. 'coefficients' is of type "SpecifiedCoefficientsFor1Submodel". If 'index' is not None, it is considered as index (1D array) of dataset1 determining which individuals should be considered. Return a 3D array (nobservations|len(index) x nequations x nvariables). """ shape = coefficients.getshape() neqs, nvar = shape[0:2] other_dims = () if len(shape) > 2: other_dims = shape[2:] nparenteqs = coefficients.parent.nequations() if (neqs <> self.get_reduced_m()) and (nparenteqs <> self.get_reduced_m()): self._raise_error( StandardError, "create_logit_data: Mismatch in number of equations and size of dataset2." ) if index <> None: nobs = index.size else: nobs = self.get_reduced_n() index = arange(nobs) variables = coefficients.get_full_variable_names() mapping = coefficients.get_coefficient_mapping() # Fill the x array from data array data_shape = tuple([nobs, neqs, nvar] + list(other_dims)) try: x = zeros(data_shape, dtype=float32) except: # in case it fails due to memory allocation error logger.log_warning( "Not enough memory. Deleting not used attributes.", tags=["memory", "logit"]) var_names = map(lambda x: x.get_alias(), variables) self.dataset1.unload_not_used_attributes(var_names) self.dataset2.unload_not_used_attributes(var_names) collect() x = zeros(data_shape, dtype=float32) if (len(variables) <= 0) or (nobs <= 0): return x for ivar in range(nvar): # Iterate over variables if variables[ivar].is_constant_or_reserved_name(): c = where(mapping[:, ivar] < 0, 0.0, 1) x[:, :, ivar] = c else: data = ma.filled(self.get_attribute(variables[ivar]), 0.0)[index, ] if neqs < nparenteqs: data = take(data, coefficients.get_equations_index(), axis=1) if x.ndim > 3: data = resize(data, tuple(list(x.shape[0:2]) + list(other_dims))) x[:, :, ivar] = data return x
def compute(self, dataset_pool): interaction_dataset = self.get_dataset() travel_data = dataset_pool.get_dataset('travel_data') travel_data_attr_mat = travel_data.get_attribute_as_matrix(self.travel_data_attribute, fill=self.default_value) zone1 = interaction_dataset.get_dataset(1).get_attribute_by_index(self.agent_zone_id, interaction_dataset.get_2d_index_of_dataset1()) zone2 = interaction_dataset.get_2d_dataset_attribute(self.location_zone_id) if self.direction_from_home: home_zone = zone1.astype("int32") work_zone = zone2.astype("int32") else: home_zone = zone2.astype("int32") work_zone = zone1.astype("int32") results = resize(array([self.default_value], dtype=float32), home_zone.shape) results = travel_data_attr_mat[home_zone, work_zone] missing_pairs_index = travel_data.get_od_pair_index_not_in_dataset(home_zone, work_zone) if missing_pairs_index[0].size > 0: results[missing_pairs_index] = self.default_value logger.log_warning("Values for %s O-D pairs in %s interaction dataset reset." % ( missing_pairs_index[0].size, home_zone.shape ) ) logger.log_warning("O-D pairs below do not appear in travel data; their values set to %s:\n %s" % ( self.default_value, str(array(zip(home_zone[missing_pairs_index], work_zone[missing_pairs_index]))) ) ) return results
def compute(self, dataset_pool): zone_set = self.get_dataset() travel_data = dataset_pool.get_dataset('travel_data') from_zone_id = travel_data.get_attribute('from_zone_id') zone_ids = zone_set.get_attribute('zone_id') time = travel_data.get_attribute(self.time_attribute_name) trips = travel_data.get_attribute(self.trips_attribute_name) numerator = array(ndimage_sum(time * trips, labels = from_zone_id, index=zone_ids)) denominator = array(ndimage_sum(trips, labels = from_zone_id, index=zone_ids), dtype=float32) # if there is a divide by zero then subsititute the values from the zone one below that one # if there are contigious places of zero division the values should propigate upon iteration no_trips_from_here = where(denominator == 0)[0] while no_trips_from_here.size != 0: if no_trips_from_here.size == denominator.size: logger.log_warning("%s attribute of travel_data is all zeros; %s returns all zeros" % (self.trips_attribute_name, self.name() )) break substitute_locations = no_trips_from_here - 1 # a mapping, what zone the new data will come from if substitute_locations[0] < 0: substitute_locations[0] = 1 numerator[no_trips_from_here] = numerator[substitute_locations] denominator[no_trips_from_here] = denominator[substitute_locations] no_trips_from_here = where(denominator == 0)[0] return safe_array_divide(numerator, denominator)
def write(self, resources=None, out_storage=None, out_table_name=None): """ """ # TODO: insert docstring local_resources = Resources(resources) local_resources.merge_with_defaults({ "field_submodel_id": self.field_submodel_id, "field_equation_id": self.field_equation_id, "field_coefficient_name": self.field_coefficient_name, "field_variable_name": self.field_variable_name, "field_fixed_value": self.field_fixed_value, "out_table_name": out_table_name }) if out_storage <> None: self.out_storage = out_storage if not isinstance(self.out_storage, Storage): logger.log_warning( "out_storage has to be of type Storage. No EquationSpecifications written." ) return submodel_ids = self.get_submodels() if submodel_ids.size == 0: submodel_ids = resize(array( [-2], dtype="int32"), len(self.get_coefficient_names( ))) #set sub_model_id = -2 when there is no or 1 submodels equation_ids = self.get_equations() if equation_ids.size == 0: equation_ids = resize(array([-2], dtype="int32"), submodel_ids.size) values = { local_resources["field_submodel_id"]: submodel_ids, local_resources["field_equation_id"]: equation_ids, local_resources["field_coefficient_name"]: self.get_coefficient_names(), local_resources["field_variable_name"]: self.get_long_variable_names() } if self.fixed_values.size > 0: values[local_resources["field_fixed_value"]] = self.fixed_values for field in self.other_fields.keys(): values[field] = self.other_fields[field] types = { local_resources["field_submodel_id"]: 'integer', local_resources["field_equation_id"]: 'integer', local_resources["field_coefficient_name"]: 'text', local_resources["field_variable_name"]: 'text' } local_resources.merge({ "values": values, 'valuetypes': types, "drop_table_flag": 1 }) self.out_storage.write_table( table_name=local_resources['out_table_name'], table_data=local_resources['values'])
def try_convert_to_float(x): try: return float(x) except: logger.log_warning('Invalid value in %s: %s' % (full_file_name, x)) return 0
def _write_results(self, indicators, source_data, file_name_for_indicator_results, display_error_box): #generate a static html page for browsing outputted indicators and store the path to the html results_page_path = None results = IndicatorResults() try: results_page_path = results.create_page( source_data = source_data, page_name = file_name_for_indicator_results, indicators = indicators) except: message = 'failed to generate indicator results page' if display_error_box: display_message_dialog(message) logger.enable_hidden_error_and_warning_words() logger.log_warning(message) logger.disable_hidden_error_and_warning_words() if results_page_path is not None: results_page_path = 'file://' + results_page_path return results_page_path
def _delete(self, agents_pool, amount, agent_dataset, location_dataset, this_refinement, dataset_pool ): """similar to subtract action, instead of unplacing agents delete remove agents from the agent dataset, those agents won't be available for later action """ fit_index = self.get_fit_agents_index(agent_dataset, this_refinement.agent_filter, '', dataset_pool) if amount > fit_index.size or amount < 0: logger.log_warning("Request to delete %i agents, but there are %i agents in total satisfying %s;" \ "delete %i agents instead" % (amount, fit_index.size, this_refinement.agent_filter, fit_index.size) ) amount = fit_index.size if amount == fit_index.size: movers_index = fit_index else: movers_index = sample_noreplace( fit_index, amount ) agents_pool = list( set(agents_pool) - set(movers_index) ) agent_dataset.remove_elements( array(movers_index) )
def correct_infinite_values(self, dataset, outcome_attribute_name, maxvalue=1e+38, clip_all_larger_values=False): """Check if the model resulted in infinite values. If yes, print warning and clip the values to maxvalue. If clip_all_larger_values is True, all values larger than maxvalue are clip to maxvalue. """ infidx = where(dataset.get_attribute(outcome_attribute_name) == inf)[0] if infidx.size > 0: logger.log_warning("Infinite values in %s. Clipped to %s." % (outcome_attribute_name, maxvalue)) dataset.set_values_of_one_attribute(outcome_attribute_name, maxvalue, infidx) if clip_all_larger_values: idx = where( dataset.get_attribute(outcome_attribute_name) > maxvalue)[0] if idx.size > 0: logger.log_warning( "Values in %s larger than %s. Clipped to %s." % (outcome_attribute_name, maxvalue, maxvalue)) dataset.set_values_of_one_attribute(outcome_attribute_name, maxvalue, idx)
def import_run_from_cache(self, cache_directory, run_info={}): baseyear = run_info.get('baseyear', -1) years = self.get_years_run(cache_directory, baseyear=baseyear) if years == []: msg = 'Cannot import run from %s: it contains no data for simulation years' % cache_directory logger.log_warning(msg) return (False, msg) else: run_id = run_manager._get_new_run_id() run_name = run_info.get('run_name', os.path.basename(cache_directory)) start_year, end_year = min(years), max(years) project_name = os.environ.get('OPUSPROJECTNAME', None) resources = {'cache_directory': cache_directory, 'description': 'run imported from cache', 'years': (start_year, end_year), 'project_name': project_name } resources.update(run_info) self.add_row_to_history(run_id=run_id, run_name=run_name, resources=resources, status='done',) return (True, '')
def compute(self, dataset_pool): with logger.block(name="compute variable persons_within_DDD_of_parcel with DDD=%s" % self.radius, verbose=False): results = None with logger.block(name="trying to read cache file %s" % self.cache_file_name, verbose=False): try: results = self._load_results() except IOError: logger.log_warning("Cache file could not be loaded") with logger.block(name="initialize datasets", verbose=False): parcels = self.get_dataset() arr = self.get_dataset().sum_dataset_over_ids(dataset_pool.get_dataset('household'), attribute_name="persons") if not results: with logger.block(name="initialize coords", verbose=False): coords = column_stack( (parcels.get_attribute("x_coord_sp"), parcels.get_attribute("y_coord_sp")) ) with logger.block(name="build KDTree", verbose=False): kd_tree = KDTree(coords, 100) with logger.block(name="compute"): results = kd_tree.query_ball_tree(kd_tree, self.radius) with logger.block(name="cache"): if not SimulationState().cache_directory_exists(): logger.log_warning("Cache does not exist and is created.") SimulationState().create_cache_directory() self._cache_results(results) with logger.block(name="sum results", verbose=False): return_values = array(map(lambda l: arr[l].sum(), results)) return return_values
def run(self, config, year, *args, **kwargs): """Runs the travel model, using appropriate info from config. """ tm_config = config["travel_model_configuration"] self.prepare_for_run(tm_config, year) project_year_dir = get_project_year_dir(tm_config, year) # year_dir = tm_config[year] #'CoreEA0511202006\\urbansim\\2001' # dir_part1,dir_part2 = os.path.split(year_dir) # while dir_part1: # dir_part1, dir_part2 = os.path.split(dir_part1) # project_year_dir = os.path.join(tm_data_dir, dir_part2) #C:/SEMCOG_baseline/CoreEA0511202006 logger.log_status('Start travel model from directory %s for year %d' % (project_year_dir, year)) #for macroname, ui_db_file in tm_config['macro']['run_semcog_travel_model'].iteritems(): #pass macroname, ui_db_file = tm_config['macro']['run_semcog_travel_model'], tm_config['ui_file'] loops = 1 logger.log_status('Running travel model ...') tcwcmd = win32api.GetShortPathName(tm_config['transcad_binary']) os.system('start /B "start TransCAD" %s' % tcwcmd) #start TransCAD in background time.sleep(1) #os.system("%s -a %s -ai '%s'" % (tcwcmd, ui_db_file, macroname)) run_transcad_macro(macroname, ui_db_file, loops) try: pass ##win32process.TerminateProcess(self.hProcess, 0) except: logger.log_warning("The code has problem to terminate the TransCAD it started.")
def main(): option_group = EstimationOptionGroup() parser = option_group.parser (options, args) = parser.parse_args() if options.model_name is None: raise StandardError, "Model name (argument -m) must be given." if (options.configuration_path is None) and (options.xml_configuration is None): raise StandardError, "Configuration path (argument -c) or XML configuration (argument -x) must be given." if (options.specification is None) and (options.xml_configuration is None): logger.log_warning("No specification given (arguments -s or -x). Specification taken from the cache.") if options.xml_configuration is not None: xconfig = XMLConfiguration(options.xml_configuration) else: xconfig = None if options.configuration_path is None: config = None else: config = get_config_from_opus_path(options.configuration_path) estimator = EstimationRunner(model=options.model_name, specification_module=options.specification, xml_configuration=xconfig, model_group=options.model_group, configuration=config, save_estimation_results=options.save_results) estimator.estimate() return estimator
def compute(self, dataset_pool): interaction_dataset = self.get_dataset() travel_data = dataset_pool.get_dataset('travel_data') travel_data_attr_mat = travel_data.get_attribute_as_matrix(self.travel_data_attribute, fill=self.default_value) var1 = interaction_dataset.get_dataset(1).get_attribute_by_index(self.agent_zone_id, interaction_dataset.get_2d_index_of_dataset1()) var2 = interaction_dataset.get_2d_dataset_attribute(self.location_zone_id) if self.direction_from_home: home_zone = var1.astype("int32") work_zone = var2.astype("int32") else: home_zone = var2.astype("int32") work_zone = var1.astype("int32") results = resize(array([self.default_value], dtype=float32), home_zone.shape) results = travel_data_attr_mat[home_zone, work_zone] missing_pairs_index = travel_data.get_od_pair_index_not_in_dataset(home_zone, work_zone) if missing_pairs_index[0].size > 0: results[missing_pairs_index] = self.default_value logger.log_warning("zone pairs at index %s are not in travel data; value set to %s." % ( str(missing_pairs_index), self.default_value) ) return results
def export_dataset(self, dataset_name, in_storage, out_storage, overwrite=True, out_dataset_name=None, nchunks = 1, **kwargs): if not overwrite and dataset_name in out_storage.get_table_names(): logger.log_note('Dataset %s ignored because it already exists in OPUS' % dataset_name) return with logger.block('Exporting dataset %s' % dataset_name): if out_dataset_name is None: out_dataset_name = dataset_name cols_in_this_chunk = in_storage.ALL_COLUMNS if nchunks > 1: colnames = in_storage.get_column_names(dataset_name) chunk_size = int(ceil(len(colnames) / float(nchunks))) for chunk in range(nchunks): if nchunks > 1: cols_in_this_chunk = colnames[int(chunk*chunk_size):int((chunk+1)*chunk_size)] with logger.block('Loading %s - chunk %s out of %s' % (dataset_name, chunk+1, nchunks)): values_from_storage = in_storage.load_table(dataset_name, column_names=cols_in_this_chunk) length = len(values_from_storage) and len(values_from_storage.values()[0]) if length == 0: logger.log_warning("Dataset %s ignored because it's empty" % dataset_name) return with logger.block('Storing %s' % dataset_name): if chunk > 0: kwargs['mode'] = out_storage.APPEND out_storage.write_table(out_dataset_name, values_from_storage, **kwargs) logger.log_note("Exported %s records for dataset %s" % (length, dataset_name))
def _get_PER_YEAR_form(self, dataset_name, attributes, primary_keys, years): per_year_data = {} cols = [computed_name for name, computed_name in attributes] for year in years: table_data = self.input_stores[year].load_table( table_name = dataset_name, column_names = primary_keys + cols) data_subset = {} for col in cols: col_name = self._get_year_replaced_attribute(attribute = col, year = year) if col in table_data: data_subset[col_name] = table_data[col] else: logger.log_warning('No indicator %s loaded!'%col) for key in primary_keys: data_subset[key] = table_data[key] per_year_data[year] = data_subset return per_year_data
def load(self, resources=None, in_storage=None, in_table_name=None): """ """ # TODO: insert docstring local_resources = Resources(resources) local_resources.merge_with_defaults({ "field_submodel_id":self.field_submodel_id, "field_coefficient_name":self.field_coefficient_name, "field_estimate":self.field_estimate, "field_standard_error":self.field_standard_error, "other_fields":self.other_fields}) if in_storage <> None: self.in_storage = in_storage if not isinstance(self.in_storage, Storage): logger.log_warning("in_storage has to be of type Storage. No coefficients loaded.") else: data = self.in_storage.load_table(table_name=in_table_name) submodels = data[local_resources["field_submodel_id"]] self.names = data[local_resources["field_coefficient_name"]] self.values = data[local_resources["field_estimate"]] self.standard_errors = data[local_resources["field_standard_error"]] for measure in local_resources["other_fields"]: if measure in data.keys(): self.other_measures[measure] = data[measure] if submodels.max() >= 0: self.submodels=submodels self.check_consistency()
def compute(self, dataset_pool): interaction_dataset = self.get_dataset() zones = dataset_pool.get_dataset('zone') travel_data = dataset_pool.get_dataset('travel_data') travel_data_attr_mat = travel_data.get_attribute_as_matrix(self.travel_data_attribute, fill=self.travel_data_attribute_default_value) agent_resource = interaction_dataset.get_dataset(1).get_attribute_by_index(self.agent_resource, interaction_dataset.get_2d_index_of_dataset1()) var1 = interaction_dataset.get_dataset(1).get_attribute_by_index(self.agent_zone_id, interaction_dataset.get_2d_index_of_dataset1()) var2 = interaction_dataset.get_2d_dataset_attribute(self.choice_zone_id) if self.direction_from_agent_to_choice: from_zone = var1.astype("int32") to_zone = var2.astype("int32") else: from_zone = var2.astype("int32") to_zone = var1.astype("int32") results = resize(array([self.default_value], dtype=self._return_type), from_zone.shape) zone_ids = zones.get_id_attribute() for zone in zone_ids: tmp_zone = zone * ones(from_zone.shape, dtype="int32") t1 = travel_data_attr_mat[from_zone, tmp_zone] t2 = travel_data_attr_mat[tmp_zone, to_zone] results[where( t1 + t2 <= agent_resource)] += zones.get_attribute_by_id(self.zone_attribute_to_access, zone) missing_pairs_index = travel_data.get_od_pair_index_not_in_dataset(from_zone, to_zone) if missing_pairs_index[0].size > 0: results[missing_pairs_index] = self.default_value logger.log_warning("zone pairs at index %s are not in travel data; value set to %s." % ( str(missing_pairs_index), self.default_value) ) return results
def _find_opus_test_cases_for_package(self, package, test_case_class): root = OpusPackage().get_path_for_package(package) modules_with_test_cases = [] for path, dirs, files in os.walk(root, topdown=True): for file in files: if not file.endswith('.py'): continue f = open(os.path.join(path, file), 'r') import_pattern = re.compile('^\s*(import|from).*unittest') skip_pattern = re.compile('^.*#.*IGNORE_THIS_FILE') found_import = False for line in f: if skip_pattern.match(line): break if import_pattern.match(line): found_import = True break if not found_import: # No unittest import found in file. continue module_name = self._get_module_name(package, root, path, file) try: exec('import %s' % module_name) except Exception, val: logger.log_error("Could not import %s!" % module_name) traceback.print_exc() continue module = eval(module_name) if inspect.ismodule(module): members = inspect.getmembers(module) member_dict = {} for key, value in members: member_dict[key] = value for key in member_dict.keys(): try: is_subclass = issubclass(member_dict[key], test_case_class) except: pass else: if is_subclass: class_name = member_dict[key].__name__ modules_with_test_cases.append( (module_name, class_name)) else: logger.log_warning('WARNING: %s is not a module!' % module)
def _update_submodel(self, current_node, edited_node): ''' Updating a submodel node (current_node) based on an edited version of it (edited_node)''' # the effect of renaming a shadowing node is that a new (local) copy is created and # the inherited node is reinserted. If the user did not rename the node we overwrite # the old submodel with the new values. name_change = current_node.get('name') != edited_node.get('name') if self.project.is_shadowing(current_node) and name_change: parent_node = current_node.getparent() row = parent_node.index(current_node) new_submodel_node = self.project.insert_node( edited_node, parent_node, row) if new_submodel_node is None: msg = ('Tried to insert a new submodel (%s) but failed. ' 'The recent submodel changes have been lost.' % current_node.get('name')) logger.log_warning(msg) self.project.delete_node(current_node) else: for key in edited_node.attrib: current_node.attrib[key] = edited_node.attrib[key] for child in current_node: current_node.remove(child) for child in edited_node: current_node.append(child) self.project.dirty = True
def run(self, utilities=None, resources=None): """ Return a probability array obtained from a RateDataset. 'resources' must contain an entry with name self.rate_set (a RateDataset dataset) and an entry self.agent_set dataset that is able to provide attributes defined in rate_set columns. Otherwise the method returns equal probability of 0.25. """ if self.agent_set: agents = resources.get(self.agent_set, None) else: agents = resources.get('agent_set', None) #if agents is not None: self.agent_set = agents.get_dataset_name() if agents is None: raise RuntimeError("Unable to get agent set " + self.agent_set) if self.rate_set: rates = resources.get(self.rate_set, None) else: rates = resources.get('rate_set', None) #if rates is not None: self.rate_set = rates.get_dataset_name() if (rates is None) or (not isinstance(rates, RateDataset)): logger.log_warning("Rate table %s is not loaded; agents in agent_set %s will have probability of 0.0" % (self.rate_set, self.agent_set)) return zeros(agents.size(), dtype="float32") probability = rates.get_rate(agents) return probability
def test_running_emme2_to_get_matrix(self): if self._has_travel_model: tm_output = TravelModelOutput() tm_output._get_matrix_into_data_file('au1tim', 80, self.real_bank_path) else: logger.log_warning('Test skipped. TRAVELMODELROOT environment ' 'variable not found.')
def get_rate(self, dataset): probability_attribute = self.get_probability_attribute_name() column_names = set(self.get_known_attribute_names()) - set( [probability_attribute, 'rate_id', '_hidden_id_']) self.independent_variables = list( set([ re.sub('_max$', '', re.sub('_min$', '', col)) for col in column_names ])) ## rstip below could turn 'sex' --> 'se' ##self.independent_variables = list(set([col.rstrip('_min').rstrip('_max') for col in column_names])) self._compute_variables_for_dataset_if_needed( dataset, self.independent_variables) known_attributes = dataset.get_known_attribute_names() prob = -1 + zeros(dataset.size(), dtype='float64') for index in range(self.size()): indicator = ones(dataset.size(), dtype='bool') for attribute in self.independent_variables: alias = self.attribute_aliases.get(attribute) if attribute in known_attributes: dataset_attribute = dataset.get_attribute(attribute) elif alias in known_attributes: dataset_attribute = dataset.get_attribute(alias) else: raise ValueError, "attribute %s used in rate dataset can not be found in dataset %s" % ( attribute, dataset.get_dataset_name()) if attribute + '_min' in column_names and self.get_attribute( attribute + '_min')[index] != -1: indicator *= dataset_attribute >= self.get_attribute( attribute + '_min')[index] if attribute + '_max' in column_names and self.get_attribute( attribute + '_max')[index] != -1: indicator *= dataset_attribute <= self.get_attribute( attribute + '_max')[index] if attribute in column_names and self.get_attribute( attribute)[index] != -1: rate_attribute = self.get_attribute(attribute) if rate_attribute[index] != -2: indicator *= dataset_attribute == rate_attribute[index] else: ##all other values not appeared in this column, i.e. the complement set complement_values = setdiff1d(dataset_attribute, rate_attribute) has_one_of_the_complement_value = zeros( dataset_attribute.size, dtype='bool') for value in complement_values: has_one_of_the_complement_value += dataset_attribute == value indicator *= has_one_of_the_complement_value prob[logical_and( indicator, prob < 0)] = self.get_attribute(probability_attribute)[index] if any(prob < 0): logger.log_warning('There are %i %ss whose probability is ' % ((prob < 0).sum(), dataset.get_dataset_name()) + 'un-specified by the rate dataset. ' + 'Their probability is set to 0.') prob[prob < 0] = 0.0 return prob
def run(self, config, show_output = False): logger.log_status("Caching large SQL tables to: " + config['cache_directory']) self.show_output = show_output #import pydevd;pydevd.settrace() server_configuration = config['scenario_database_configuration'] scenario_database_manager = ScenarioDatabaseManager( server_configuration = server_configuration, base_scenario_database_name = server_configuration.database_name ) self.database_server = DatabaseServer(server_configuration) database_to_table_mapping = scenario_database_manager.get_database_to_table_mapping() self.tables_to_cache = config['creating_baseyear_cache_configuration'].tables_to_cache simulation_state = SimulationState() if 'low_memory_run' in config: simulation_state.set_low_memory_run(config['low_memory_run']) simulation_state.set_cache_directory(config['cache_directory']) simulation_state.set_current_time(config['base_year']) self.tables_cached = set() for database_name, tables in database_to_table_mapping.items(): self.cache_database_tables(config, database_name, tables) un_cached_tables = set(self.tables_to_cache) - self.tables_cached if un_cached_tables: logger.log_warning('The following requested tables were NOT cached:') for table_name in un_cached_tables: logger.log_warning('\t%s' % table_name)
def run(self, utilities=None, resources=None): """ Return a probability array obtained from a RateDataset. 'resources' must contain an entry with name self.rate_set (a RateDataset dataset) and an entry self.agent_set dataset that is able to provide attributes defined in rate_set columns. Otherwise the method returns equal probability of 0.25. """ if self.agent_set: agents = resources.get(self.agent_set, None) else: agents = resources.get('agent_set', None) if agents is not None: self.agent_set = agents.get_dataset_name() if agents == None: raise RuntimeError("Unable to get agent set " + self.agent_set) if self.rate_set: rates = resources.get(self.rate_set, None) else: rates = resources.get('relocation_rate', None) if rates is not None: self.rate_set = rates.get_dataset_name() if (rates is None) or (not isinstance(rates, RateDataset)): logger.log_warning( "Rate table %s is not loaded; agents in agent_set %s will have probability of 0.0" % (self.rate_set, self.agent_set)) return zeros(agents.size(), dtype="float32") probability = rates.get_rate(agents) return probability
def setUp(self): db_configs = [] for engine in _get_installed_database_engines(): config = TestDatabaseConfiguration(protocol=engine) db_configs.append(config) self.test_db = 'OpusDatabaseTestDatabase' test_table = 'test_table' self.dbs = [] for config in db_configs: try: server = DatabaseServer(config) if server.has_database(self.test_db): server.drop_database(self.test_db) server.create_database(self.test_db) self.assertTrue( server.has_database(database_name=self.test_db)) db = OpusDatabase(database_server_configuration=config, database_name=self.test_db) self.assertFalse(db.table_exists(test_table)) self.dbs.append((db, server)) except: import traceback traceback.print_exc() logger.log_warning('Could not start server for protocol %s' % config.protocol)
def invoke_run_travel_model(config, year): """ """ tm_config = config['travel_model_configuration'] scenario = tm_config['travel_model_scenario'] try: travel_model_year = travel_model_year_mapping[year] except KeyError: logger.log_warning("no travel model year mapping for %d." % year) travel_model_year = year # put the travel model input data in place data_exchange_dir = mtc_common.tm_get_data_exchange_dir(config, year) cache_directory = config['cache_directory'] mtc_dir = os.path.join(cache_directory, "mtc_data") for f in glob.glob(os.path.join(mtc_dir, '*' + str(year) + '*')): logger.log_status("Copying over travel model input " + f + " to " + data_exchange_dir) shutil.copy(f, data_exchange_dir) my_location = os.path.split(__file__)[0] script_filepath = os.path.join(my_location, "run_travel_model.py") cmd = "%s %s -s %s -y %s -n" % (sys.executable, script_filepath, scenario, travel_model_year) # form the desired output dir for the travel model data. Make it look # like the urbansim run cache for easy association. Note that we # explicitly use the forward slash instead of os.sep and friends # because the travel model is managed via ssh on a cygwin machine, not # run on the local machine. outdir = "runs/" + config['cache_directory'].split(os.sep)[-1] outdir = outdir + "/%d_%s" % (year, scenario) cmd = cmd + " -o " + outdir logger.log_status("Launching %s" % cmd) if os.system(cmd) != 0: raise TravelModelError # Run the emfac report # TODO: the travel model server should come from the configuration. But # for this we must migrate the configuration from mtc_config.py to the # top-level xml. So for now we just hard-code it :-/ Same for # travel_model_home. tm_server = "*****@*****.**" travel_model_home = "/cygdrive/e/mtc_travel_model/" server_model = winssh.winssh(tm_server, "OPUS_MTC_SERVER_PASSWD") (rc, emfac_windir) = server_model.cmd("cygpath -w " + outdir) if rc != 0: logger.log_error("Failed to find windows path for emfac dir " + outdir) sys.exit(1) emfac_windir = emfac_windir.replace('\r', '').replace('\n','') logger.log_status("Attempting to generate EMFAC report...") cmd = 'cd ' + travel_model_home + 'model_support_files/EMFAC_Files' logger.log_status(cmd) server_model.cmd_or_fail(cmd) cmd = "cmd /c 'RunEmfac.bat " + emfac_windir + " " + str(year) + "' | tee emfac.log" logger.log_status(cmd) (rc, out) = server_model.cmd(cmd, supress_output=False, pipe_position=0) if rc != 0: logger.log_warning("WARNING: Failed to prepare emfac report")
def test_getting_several_emme2_data_into_travel_data_set(self): if self._has_travel_model: num_zones = 30 zone_storage = StorageFactory().get_storage('dict_storage') zone_table_name = 'zone' zone_storage.write_table( table_name=zone_table_name, table_data={ 'zone_id':array(range(num_zones))+1 }, ) zone_set = ZoneDataset(in_storage=zone_storage, in_table_name=zone_table_name) matrix_attribute_map = {'au1tim':'single_vehicle_to_work_travel_time', 'biketm':'bike_to_work_travel_time'} tm_output = TravelModelOutput() travel_data_set = tm_output.get_travel_data_set(zone_set, matrix_attribute_map, self.real_bank_path) self.assertEqual(travel_data_set.get_attribute('single_vehicle_to_work_travel_time').size, num_zones*num_zones) self.assertEqual(travel_data_set.get_attribute('bike_to_work_travel_time').size, num_zones*num_zones) from numpy import ma self.assertEqual(False, ma.allclose(travel_data_set.get_attribute('single_vehicle_to_work_travel_time'), travel_data_set.get_attribute('bike_to_work_travel_time'))) else: logger.log_warning('Test skipped. TRAVELMODELROOT environment ' 'variable not found.')
def main(): option_group = EstimationOptionGroup() parser = option_group.parser (options, args) = parser.parse_args() if options.model_name is None: raise StandardError, "Model name (argument -m) must be given." if (options.configuration_path is None) and (options.xml_configuration is None): raise StandardError, "Configuration path (argument -c) or XML configuration (argument -x) must be given." if (options.specification is None) and (options.xml_configuration is None): logger.log_warning( "No specification given (arguments -s or -x). Specification taken from the cache." ) if options.xml_configuration is not None: xconfig = XMLConfiguration(options.xml_configuration) else: xconfig = None if options.configuration_path is None: config = None else: config = get_config_from_opus_path(options.configuration_path) estimator = EstimationRunner(model=options.model_name, specification_module=options.specification, xml_configuration=xconfig, model_group=options.model_group, configuration=config, save_estimation_results=options.save_results) estimator.estimate() return estimator
def setUp(self): db_configs = [] for engine in _get_installed_database_engines(): config = TestDatabaseConfiguration(protocol=engine) db_configs.append(config) self.test_db = "OpusDatabaseTestDatabase" test_table = "test_table" self.dbs = [] for config in db_configs: try: server = DatabaseServer(config) if server.has_database(self.test_db): server.drop_database(self.test_db) server.create_database(self.test_db) self.assertTrue(server.has_database(database_name=self.test_db)) db = OpusDatabase(database_server_configuration=config, database_name=self.test_db) self.assertFalse(db.table_exists(test_table)) self.dbs.append((db, server)) except: import traceback traceback.print_exc() logger.log_warning("Could not start server for protocol %s" % config.protocol)
def import_run_from_cache(self, cache_directory, run_info={}): baseyear = run_info.get('baseyear', -1) years = self.get_years_run(cache_directory, baseyear=baseyear) if years == []: msg = 'Cannot import run from %s: it contains no data for simulation years' % cache_directory logger.log_warning(msg) return (False, msg) else: run_id = run_manager._get_new_run_id() run_name = run_info.get('run_name', os.path.basename(cache_directory)) start_year, end_year = min(years), max(years) project_name = os.environ.get('OPUSPROJECTNAME', None) resources = { 'cache_directory': cache_directory, 'description': 'run imported from cache', 'years': (start_year, end_year), 'project_name': project_name } resources.update(run_info) self.add_row_to_history( run_id=run_id, run_name=run_name, resources=resources, status='done', ) return (True, '')
def run_chunk(self, index, dataset, specification, coefficients): self.specified_coefficients = SpecifiedCoefficients().create( coefficients, specification, neqs=1) compute_resources = Resources({"debug": self.debug}) submodels = self.specified_coefficients.get_submodels() self.get_status_for_gui().update_pieces_using_submodels( submodels=submodels, leave_pieces=2) self.map_agents_to_submodels(submodels, self.submodel_string, dataset, index, dataset_pool=self.dataset_pool, resources=compute_resources) variables = self.specified_coefficients.get_full_variable_names_without_constants( ) self.debug.print_debug("Compute variables ...", 4) self.increment_current_status_piece() dataset.compute_variables(variables, dataset_pool=self.dataset_pool, resources=compute_resources) data = {} coef = {} outcome = self.initial_values[index].copy() for submodel in submodels: coef[submodel] = SpecifiedCoefficientsFor1Submodel( self.specified_coefficients, submodel) self.coefficient_names[submodel] = coef[ submodel].get_coefficient_names_without_constant()[0, :] self.debug.print_debug( "Compute regression for submodel " + str(submodel), 4) self.increment_current_status_piece() self.data[submodel] = dataset.create_regression_data( coef[submodel], index=index[self.observations_mapping[submodel]]) nan_index = where(isnan(self.data[submodel]))[1] inf_index = where(isinf(self.data[submodel]))[1] vnames = asarray(coef[submodel].get_variable_names()) if nan_index.size > 0: nan_var_index = unique(nan_index) self.data[submodel] = nan_to_num(self.data[submodel]) logger.log_warning( "NaN(Not A Number) is returned from variable %s; it is replaced with %s." % (vnames[nan_var_index], nan_to_num(nan))) #raise ValueError, "NaN(Not A Number) is returned from variable %s; check the model specification table and/or attribute values used in the computation for the variable." % vnames[nan_var_index] if inf_index.size > 0: inf_var_index = unique(inf_index) self.data[submodel] = nan_to_num(self.data[submodel]) logger.log_warning( "Inf is returned from variable %s; it is replaced with %s." % (vnames[inf_var_index], nan_to_num(inf))) #raise ValueError, "Inf is returned from variable %s; check the model specification table and/or attribute values used in the computation for the variable." % vnames[inf_var_index] if (self.data[submodel].shape[0] > 0) and (self.data[submodel].size > 0): # observations for this submodel available outcome[self.observations_mapping[submodel]] = \ self.regression.run(self.data[submodel], coef[submodel].get_coefficient_values()[0,:], resources=self.run_config).astype(outcome.dtype) return outcome
def _sample_by_agent_and_stratum(self, index1, index2, stratum, prob_array, chosen_choice_index, strata_sample_setting): """agent by agent and stratum by stratum stratified sampling, suitable for 2d prob_array and/or sample_size varies for agents this method is slower than _sample_by_stratum, for simpler stratified sampling use _sample_by_stratum instead""" rank_of_prob = rank(prob_array) rank_of_strata = rank(strata_sample_setting) J = self.__determine_sampled_index_size(strata_sample_setting, rank_of_strata) sampled_index = zeros((index1.size, J), dtype=DTYPE) - 1 self._sampling_probability = zeros((index1.size, J), dtype=float32) self._stratum_id = ones((index1.size, J), dtype=DTYPE) * NO_STRATUM_ID for i in range(index1.size): if rank_of_strata == 3: strata_sample_pairs = strata_sample_setting[i, :] else: strata_sample_pairs = strata_sample_setting if rank_of_prob == 2: prob = prob_array[i, :] else: prob = prob_array j = 0 for (this_stratum, this_size) in strata_sample_pairs: if this_size <= 0: continue index_not_in_stratum = where(stratum != this_stratum)[0] this_prob = copy.copy(prob) this_prob[index_not_in_stratum] = 0.0 this_prob = normalize(this_prob) if nonzerocounts(this_prob) < this_size: logger.log_warning( "weight array dosen't have enough non-zero counts, use sample with replacement" ) # chosen_index_to_index2 = where(index2 == chosen_choice_index[i])[0] #exclude_index passed to probsample_noreplace needs to be indexed to index2 this_sampled_index = probsample_noreplace( index2, sample_size=this_size, prob_array=this_prob, exclude_index=chosen_choice_index[i], return_index=True) sampled_index[i, j:j + this_size] = this_sampled_index self._sampling_probability[ i, j:j + this_size] = this_prob[this_sampled_index] self._stratum_id[i, j:j + this_size] = ones( (this_sampled_index.size, ), dtype=DTYPE) * this_stratum j += this_size return index2[sampled_index]
def _delete(self, agents_pool, amount, agent_dataset, location_dataset, this_refinement, dataset_pool): """similar to subtract action, instead of unplacing agents delete remove agents from the agent dataset, those agents won't be available for later action """ fit_index = self.get_fit_agents_index( agent_dataset, this_refinement.agent_expression, this_refinement.location_expression, dataset_pool) if amount > fit_index.size or amount < 0: logger.log_warning("Refinement requests to delete %i agents, but there are %i agents in total satisfying %s;" \ "delete %i agents instead" % (amount, fit_index.size, ' and '.join( [this_refinement.agent_expression, this_refinement.location_expression] ).strip(' and '), fit_index.size) ) amount = fit_index.size if amount == fit_index.size: movers_index = fit_index else: movers_index = sample_noreplace(fit_index, amount) agents_pool = list(set(agents_pool) - set(movers_index)) ## modify location capacity attribute if specified if this_refinement.location_capacity_attribute is not None and len( this_refinement.location_capacity_attribute) > 0: location_dataset = dataset_pool.get_dataset( VariableName( this_refinement.location_expression).get_dataset_name()) movers_location_id = agent_dataset.get_attribute( location_dataset.get_id_name()[0])[movers_index] movers_location_index = location_dataset.get_id_index( movers_location_id) # see previous comment about histogram function num_of_movers_by_location = histogram( movers_location_index, bins=arange(location_dataset.size() + 1))[0] num_of_agents_by_location = location_dataset.compute_variables( "number_of_agents=%s.number_of_agents(%s)" % \ (location_dataset.dataset_name, agent_dataset.dataset_name), dataset_pool=dataset_pool) shrink_factor = safe_array_divide( (num_of_agents_by_location - num_of_movers_by_location).astype('float32'), num_of_agents_by_location, return_value_if_denominator_is_zero=1.0) new_values = round_(shrink_factor * location_dataset.get_attribute( this_refinement.location_capacity_attribute)) location_dataset.modify_attribute( this_refinement.location_capacity_attribute, new_values) self._add_refinement_info_to_dataset(location_dataset, self.id_names, this_refinement, index=movers_location_index) agent_dataset.remove_elements(array(movers_index))
def run(self, data, regression, resources=None): """ The method prints out summary of the BMA procedure and creates an imageplot. If resources has an entry 'bma_imageplot_filename', the imageplot is sent to this file as pdf. The method does not return any useful results - it is a tool for variable selection. Once you selected your variables, use estimate_linear_regression for further usage of the coefficients. Expects an entry 'outcome' in resources that provides the values of the dependent variable. 'data' is a 2D numpy array of the actual data (nobservations x ncoefficients), it can be created by Dataset.create_regression_data_for_estimation(...). 'regression' is an instance of a regression class. """ r = robjects.r if data.ndim < 2: raise StandardError, "Argument 'data' must be a 2D numpy array." nobs = data.shape[0] nvar = data.shape[1] constant_position = resources.get( "constant_position", array([], dtype='int32')) #position for intercept if constant_position.size == 0: #position for intercept constant_position = -1 nvalues = nvar else: constant_position = constant_position[0] nvalues = nvar + 1 beta = zeros(nvalues).astype(float32) coef_names = resources.get("coefficient_names", nvar * []) data_for_r = {} for icoef in range(len(coef_names)): data_for_r[coef_names[icoef]] = data[:, icoef] bma = importr("BMA") d = robjects.DataFrame(data_for_r) try: bma_params = { 'x': d, 'y': resources["outcome"], 'glm.family': "gaussian", 'strict': 1 } #fit = bma.bic_glm(x=d, y=resources["outcome"], glm_family="gaussian", strict=1) fit = bma.bic_glm(**bma_params) fit[20] = '' # to have less output in the summary r.summary(fit) filename = resources.get('bma_imageplot_filename', None) if filename is not None: r.pdf(file=filename) bma.imageplot_bma(fit) r['dev.off']() else: r.X11() bma.imageplot_bma(fit) except: logger.log_warning("Error in BMA procedure.") return {}
def MASKtest_mssql_create_drop_and_has_database(self): if 'mssql' in get_testable_engines(): if not 'MSSQLDEFAULTDB' in os.environ: logger.log_warning('MSSQLDEFAULTDB is not set in the environment variables. Skipping test_mssql_create_drop_and_has_database') else: server = self.get_mssql_server() self.helper_create_drop_and_has_database(server) server.close()
def hhagecat(a): if a >= 0 and a <= 64: return 1 if a > 64: return 2 else: logger.log_warning("Found age_of_head < 0") return -1
def remove(self, key): """Remove an entry 'key' from the dictionary. """ if self.has_key(key): del self[key] else: logger.log_warning("Key " + key + " not contained in the dictionary!", tags=["configuration"])
def _find_opus_test_cases_for_package(self, package, test_case_class): root = OpusPackage().get_path_for_package(package) modules_with_test_cases = [] for path, dirs, files in os.walk(root, topdown=True): for file in files: if not file.endswith(".py"): continue f = open(os.path.join(path, file), "r") import_pattern = re.compile("^\s*(import|from).*unittest") skip_pattern = re.compile("^.*#.*IGNORE_THIS_FILE") found_import = False for line in f: if skip_pattern.match(line): break if import_pattern.match(line): found_import = True break if not found_import: # No unittest import found in file. continue module_name = self._get_module_name(package, root, path, file) try: exec("import %s" % module_name) except Exception, val: logger.log_error("Could not import %s!" % module_name) traceback.print_exc() continue module = eval(module_name) if inspect.ismodule(module): members = inspect.getmembers(module) member_dict = {} for key, value in members: member_dict[key] = value for key in member_dict.keys(): try: is_subclass = issubclass(member_dict[key], test_case_class) except: pass else: if is_subclass: class_name = member_dict[key].__name__ modules_with_test_cases.append((module_name, class_name)) else: logger.log_warning("WARNING: %s is not a module!" % module)
def write(self, resources=None, out_storage=None, out_table_name=None): """ """ # TODO: insert docstring local_resources = Resources(resources) local_resources.merge_with_defaults({ "field_submodel_id": self.field_submodel_id, "field_coefficient_name": self.field_coefficient_name, "field_estimate": self.field_estimate, "field_standard_error": self.field_standard_error, "other_fields": self.other_fields, "out_table_name": out_table_name }) if out_storage <> None: self.out_storage = out_storage if not isinstance(self.out_storage, Storage): logger.log_warning( "out_storage has to be of type Storage. No coefficients written." ) return submodels = self.get_submodels() if submodels.size <= 0: submodels = resize(array([-2], dtype=int32), self.size()) values = { local_resources["field_submodel_id"]: submodels, local_resources["field_coefficient_name"]: self.get_names(), local_resources["field_estimate"]: self.get_values(), local_resources["field_standard_error"]: self.get_standard_errors() } for measure in self.other_measures.keys(): values[measure] = self.other_measures[measure] types = { local_resources["field_submodel_id"]: 'integer', local_resources["field_coefficient_name"]: 'text', local_resources["field_estimate"]: 'double', local_resources["field_standard_error"]: 'double' } attrtypes = { local_resources["field_submodel_id"]: AttributeType.PRIMARY, local_resources["field_coefficient_name"]: AttributeType.PRIMARY, local_resources["field_estimate"]: AttributeType.PRIMARY, local_resources["field_standard_error"]: AttributeType.PRIMARY } for measure in self.other_measures.keys(): types[measure] = 'double' attrtypes[measure] = AttributeType.PRIMARY local_resources.merge({ "values": values, 'valuetypes': types, "drop_table_flag": 1, "attrtype": attrtypes }) self.out_storage.write_table( table_name=local_resources['out_table_name'], table_data=local_resources['values'])
def _num_of_columns(self, table): num_of_columns = 0 for row in table: if len(row) > num_of_columns: num_of_columns = len(row) if (num_of_columns == 0): logger.log_warning('Table has no columns; creating a single empty column so LaTeX will not fail') num_of_columns = 1 return num_of_columns
def get_index_of_my_agents(self, dataset, index, dataset_pool=None, resources=None): agents_grouping_attr = self.get_agents_grouping_attribute() if agents_grouping_attr is None: logger.log_warning("'agents_grouping_attribute' wasn't set. No agent selection was done.") logger.log_note("Use method 'set_agents_grouping_attribute' for agents selection.") return arange(index.size) dataset.compute_variables(agents_grouping_attr, dataset_pool=dataset_pool, resources=resources) code_values = dataset.get_attribute_by_index(agents_grouping_attr, index) return where(code_values == self.get_member_code())[0]
def load_constants(self, in_storage, in_table_name): """Some of the constants are loaded from in_storage. """ result = in_storage.load_table(table_name=in_table_name) if result is None: logger.log_warning("No data in table '%s'" % in_table_name) else: for name in result: self[name] = result[name][0]
def _subtract(self, agents_pool, amount, agent_dataset, location_dataset, this_refinement, dataset_pool ): fit_index = self.get_fit_agents_index(agent_dataset, this_refinement.agent_expression, this_refinement.location_expression, dataset_pool) if amount > fit_index.size: logger.log_warning("Refinement requests to subtract %i agents, but there are %i agents in total satisfying %s;" \ "subtract %i agents instead" % (amount, fit_index.size, ' and '.join( [this_refinement.agent_expression, this_refinement.location_expression] ).strip(' and '), fit_index.size) ) amount = fit_index.size if amount == fit_index.size: movers_index = fit_index else: movers_index = sample_noreplace( fit_index, amount ) agents_pool += movers_index.tolist() ## modify location capacity attribute if specified if this_refinement.location_capacity_attribute is not None and len(this_refinement.location_capacity_attribute) > 0: location_dataset = dataset_pool.get_dataset( VariableName( this_refinement.location_expression ).get_dataset_name() ) movers_location_id = agent_dataset.get_attribute( location_dataset.get_id_name()[0] )[movers_index] movers_location_index = location_dataset.get_id_index( movers_location_id ) # backward compatability code for older versions of numpy -- no longer required since we need numpy 1.2.1 or greater # new=False argument to histogram tells it to use deprecated behavior for now (to be removed in numpy 1.3) # See numpy release notes -- search for histogram # if numpy.__version__ >= '1.2.0': # num_of_movers_by_location = histogram( movers_location_index, bins=arange(location_dataset.size()), new=False)[0] # else: # num_of_movers_by_location = histogram( movers_location_index, bins=arange(location_dataset.size()))[0] num_of_movers_by_location = histogram( movers_location_index, bins=arange(location_dataset.size() +1) )[0] num_of_agents_by_location = location_dataset.compute_variables( "number_of_agents=%s.number_of_agents(%s)" % \ (location_dataset.dataset_name, agent_dataset.dataset_name), dataset_pool=dataset_pool) shrink_factor = safe_array_divide( (num_of_agents_by_location - num_of_movers_by_location ).astype('float32'), num_of_agents_by_location, return_value_if_denominator_is_zero = 1.0 ) new_values = round_( shrink_factor * location_dataset.get_attribute(this_refinement.location_capacity_attribute) ) location_dataset.modify_attribute( this_refinement.location_capacity_attribute, new_values ) self._add_refinement_info_to_dataset(location_dataset, ("refinement_id", "transaction_id"), this_refinement, index=movers_location_index) agent_dataset.modify_attribute(location_dataset.get_id_name()[0], -1 * ones( movers_index.size, dtype='int32' ), index = movers_index ) self._add_refinement_info_to_dataset(agent_dataset, self.id_names, this_refinement, index=movers_index)
def clip_to_zero_if_needed(values, function=""): from numpy import clip global_min = values.min() if global_min >= 0: return values global_max = values.max() logger.log_warning("Negative values detected in function/method '%s'" % function) logger.log_warning("Minimum: %s. Negative values clipped to zero." % global_min)
def load(self): if os.path.exists(self.filename): stream = file(self.filename, 'r') #self.shifters = yaml.load(stream, OrderedDictYAMLLoader) yaml_dict = yaml.load(stream) self.shifters = OrderedDict(sorted(yaml_dict.items(),key=lambda x: x[0])) stream.close() else: logger.log_warning("File %s does not exist; return {}" % self.filename) self.shifters = {}