示例#1
0
 def get_all_data(self, submodel=-2):
     """Model must have a property 'data' which is a dictionary that has for each submodel
     some data. It returns data for the given submodel. Meant to be used for analyzing estimation data."""
     if submodel in self.data.keys():
         return self.data[submodel]
     logger.log_warning("No available data for submodel %s." % submodel)
     return None
示例#2
0
 def load(self, resources=None, in_storage=None, in_table_name=None):
     """
     """  # TODO: insert docstring
     local_resources = Resources(resources)
     local_resources.merge_with_defaults({
         "field_submodel_id": self.field_submodel_id,
         "field_coefficient_name": self.field_coefficient_name,
         "field_estimate": self.field_estimate,
         "field_standard_error": self.field_standard_error,
         "other_fields": self.other_fields
     })
     if in_storage <> None:
         self.in_storage = in_storage
     if not isinstance(self.in_storage, Storage):
         logger.log_warning(
             "in_storage has to be of type Storage. No coefficients loaded."
         )
     else:
         data = self.in_storage.load_table(table_name=in_table_name)
         submodels = data[local_resources["field_submodel_id"]]
         self.names = data[local_resources["field_coefficient_name"]]
         self.values = data[local_resources["field_estimate"]]
         self.standard_errors = data[
             local_resources["field_standard_error"]]
         for measure in local_resources["other_fields"]:
             if measure in data.keys():
                 self.other_measures[measure] = data[measure]
         if submodels.max() >= 0:
             self.submodels = submodels
         self.check_consistency()
    def _add(self,
             amount=0,
             attribute='',
             dataset=None,
             index=None,
             data_dict={},
             **kwargs):
        new_data = {}
        dataset_known_attributes = dataset.get_known_attribute_names()
        if index.size > 0:  # sample from agents
            lucky_index = sample_replace(index, amount)
            for attr in dataset_known_attributes:
                new_data[attr] = dataset.get_attribute_by_index(
                    attr, lucky_index)
        else:
            ## if attributes are not fully specified, the missing attributes will be filled with 0's
            for attr in dataset.get_primary_attribute_names():
                if data_dict.has_key(attr):
                    new_data[attr] = resize(array(data_dict[attr]), amount)
                else:
                    if attr == dataset.get_id_name()[0]:
                        new_data[attr] = zeros(
                            amount, dtype=dataset.get_id_attribute().dtype)
                    else:
                        logger.log_warning(
                            "Attribute %s is unspecified for 'add' event; its value will be sampled from all %s values of %s."
                            % (attr, attr, dataset.get_dataset_name()))
                        new_data[attr] = sample_replace(
                            dataset.get_attribute(attr), amount)

        dataset.add_elements(data=new_data, change_ids_if_not_unique=True)
示例#4
0
    def compute(self, dataset_pool):
        interaction_dataset = self.get_dataset()
        travel_data = dataset_pool.get_dataset('travel_data')
        travel_data_attr_mat = travel_data.get_attribute_as_matrix(
            self.travel_data_attribute, fill=self.default_value)

        var1 = interaction_dataset.get_dataset(1).get_attribute_by_index(
            self.agent_zone_id, interaction_dataset.get_2d_index_of_dataset1())
        var2 = interaction_dataset.get_2d_dataset_attribute(
            self.location_zone_id)
        if self.direction_from_home:
            home_zone = var1.astype("int32")
            work_zone = var2.astype("int32")
        else:
            home_zone = var2.astype("int32")
            work_zone = var1.astype("int32")

        results = resize(array([self.default_value], dtype=float32),
                         home_zone.shape)
        results = travel_data_attr_mat[home_zone, work_zone]

        missing_pairs_index = travel_data.get_od_pair_index_not_in_dataset(
            home_zone, work_zone)
        if missing_pairs_index[0].size > 0:
            results[missing_pairs_index] = self.default_value
            logger.log_warning(
                "zone pairs at index %s are not in travel data; value set to %s."
                % (str(missing_pairs_index), self.default_value))

        return results
    def create_logit_data(self, coefficients, index=None):
        """It creates a data array corresponding to specified coefficients
        (=coefficients connected to a specification) as one variable per column.
        'coefficients' is of type "SpecifiedCoefficientsFor1Submodel".
        If 'index' is not None, it is considered as index (1D array) of dataset1 determining
        which individuals should be considered.
        Return a 3D array (nobservations|len(index) x nequations x nvariables).
        """
        shape = coefficients.getshape()
        neqs, nvar = shape[0:2]
        other_dims = ()
        if len(shape) > 2:
            other_dims = shape[2:]
        nparenteqs = coefficients.parent.nequations()
        if (neqs <> self.get_reduced_m()) and (nparenteqs <>
                                               self.get_reduced_m()):
            self._raise_error(
                StandardError,
                "create_logit_data: Mismatch in number of equations and size of dataset2."
            )

        if index <> None:
            nobs = index.size
        else:
            nobs = self.get_reduced_n()
            index = arange(nobs)

        variables = coefficients.get_full_variable_names()
        mapping = coefficients.get_coefficient_mapping()
        # Fill the x array from data array
        data_shape = tuple([nobs, neqs, nvar] + list(other_dims))
        try:
            x = zeros(data_shape, dtype=float32)
        except:  # in case it fails due to memory allocation error
            logger.log_warning(
                "Not enough memory. Deleting not used attributes.",
                tags=["memory", "logit"])
            var_names = map(lambda x: x.get_alias(), variables)
            self.dataset1.unload_not_used_attributes(var_names)
            self.dataset2.unload_not_used_attributes(var_names)
            collect()
            x = zeros(data_shape, dtype=float32)
        if (len(variables) <= 0) or (nobs <= 0):
            return x
        for ivar in range(nvar):  # Iterate over variables
            if variables[ivar].is_constant_or_reserved_name():
                c = where(mapping[:, ivar] < 0, 0.0, 1)
                x[:, :, ivar] = c
            else:
                data = ma.filled(self.get_attribute(variables[ivar]),
                                 0.0)[index, ]
                if neqs < nparenteqs:
                    data = take(data,
                                coefficients.get_equations_index(),
                                axis=1)
                if x.ndim > 3:
                    data = resize(data,
                                  tuple(list(x.shape[0:2]) + list(other_dims)))
                x[:, :, ivar] = data
        return x
示例#6
0
 def compute(self, dataset_pool):
     interaction_dataset = self.get_dataset()
     travel_data = dataset_pool.get_dataset('travel_data')
     travel_data_attr_mat = travel_data.get_attribute_as_matrix(self.travel_data_attribute, 
                                                                fill=self.default_value)
     
     zone1 = interaction_dataset.get_dataset(1).get_attribute_by_index(self.agent_zone_id,
                                                                   interaction_dataset.get_2d_index_of_dataset1())
     zone2 = interaction_dataset.get_2d_dataset_attribute(self.location_zone_id)
     if self.direction_from_home:
         home_zone = zone1.astype("int32")
         work_zone = zone2.astype("int32")
     else:
         home_zone = zone2.astype("int32")
         work_zone = zone1.astype("int32")
     
     results = resize(array([self.default_value], dtype=float32), home_zone.shape)
     results = travel_data_attr_mat[home_zone, work_zone]
     
     missing_pairs_index = travel_data.get_od_pair_index_not_in_dataset(home_zone, work_zone)
     if missing_pairs_index[0].size > 0:
         results[missing_pairs_index] = self.default_value
         logger.log_warning("Values for %s O-D pairs in %s interaction dataset reset." % ( missing_pairs_index[0].size, home_zone.shape ) )
         logger.log_warning("O-D pairs below do not appear in travel data; their values set to %s:\n %s" % ( self.default_value,
                                                                                                      str(array(zip(home_zone[missing_pairs_index], 
                                                                                                                    work_zone[missing_pairs_index]))) ) )
     
     return results
 def compute(self, dataset_pool):
     zone_set = self.get_dataset()
     travel_data = dataset_pool.get_dataset('travel_data')
     from_zone_id = travel_data.get_attribute('from_zone_id')
     zone_ids = zone_set.get_attribute('zone_id')
     time = travel_data.get_attribute(self.time_attribute_name)
     trips = travel_data.get_attribute(self.trips_attribute_name)
     
     numerator = array(ndimage_sum(time * trips,
                                    labels = from_zone_id, index=zone_ids))
     denominator = array(ndimage_sum(trips,
                                      labels = from_zone_id, index=zone_ids), dtype=float32)
     
     # if there is a divide by zero then subsititute the values from the zone one below that one
     # if there are contigious places of zero division the values should propigate upon iteration
     no_trips_from_here = where(denominator == 0)[0]
     while no_trips_from_here.size != 0:
         if no_trips_from_here.size == denominator.size:
             logger.log_warning("%s attribute of travel_data is all zeros; %s returns all zeros" % (self.trips_attribute_name, 
                                                                                                    self.name()
                                                                                                    ))
             break
              
         substitute_locations = no_trips_from_here - 1    # a mapping, what zone the new data will come from
         if substitute_locations[0] < 0: substitute_locations[0] = 1
         numerator[no_trips_from_here] = numerator[substitute_locations]
         denominator[no_trips_from_here] = denominator[substitute_locations] 
         no_trips_from_here = where(denominator == 0)[0]
         
     return safe_array_divide(numerator, denominator)
示例#8
0
    def write(self, resources=None, out_storage=None, out_table_name=None):
        """
        """  # TODO: insert docstring
        local_resources = Resources(resources)
        local_resources.merge_with_defaults({
            "field_submodel_id": self.field_submodel_id,
            "field_equation_id": self.field_equation_id,
            "field_coefficient_name": self.field_coefficient_name,
            "field_variable_name": self.field_variable_name,
            "field_fixed_value": self.field_fixed_value,
            "out_table_name": out_table_name
        })
        if out_storage <> None:
            self.out_storage = out_storage
        if not isinstance(self.out_storage, Storage):
            logger.log_warning(
                "out_storage has to be of type Storage. No EquationSpecifications written."
            )
            return

        submodel_ids = self.get_submodels()
        if submodel_ids.size == 0:
            submodel_ids = resize(array(
                [-2], dtype="int32"), len(self.get_coefficient_names(
                )))  #set sub_model_id = -2 when there is no or 1 submodels

        equation_ids = self.get_equations()
        if equation_ids.size == 0:
            equation_ids = resize(array([-2], dtype="int32"),
                                  submodel_ids.size)

        values = {
            local_resources["field_submodel_id"]: submodel_ids,
            local_resources["field_equation_id"]: equation_ids,
            local_resources["field_coefficient_name"]:
            self.get_coefficient_names(),
            local_resources["field_variable_name"]:
            self.get_long_variable_names()
        }
        if self.fixed_values.size > 0:
            values[local_resources["field_fixed_value"]] = self.fixed_values
        for field in self.other_fields.keys():
            values[field] = self.other_fields[field]

        types = {
            local_resources["field_submodel_id"]: 'integer',
            local_resources["field_equation_id"]: 'integer',
            local_resources["field_coefficient_name"]: 'text',
            local_resources["field_variable_name"]: 'text'
        }

        local_resources.merge({
            "values": values,
            'valuetypes': types,
            "drop_table_flag": 1
        })

        self.out_storage.write_table(
            table_name=local_resources['out_table_name'],
            table_data=local_resources['values'])
示例#9
0
 def try_convert_to_float(x):
     try:
         return float(x)
     except:
         logger.log_warning('Invalid value in %s: %s' %
                            (full_file_name, x))
         return 0
 def _write_results(self,
                    indicators, 
                    source_data, 
                    file_name_for_indicator_results,
                    display_error_box):
     
     #generate a static html page for browsing outputted indicators and store the path to the html
     results_page_path = None
     results = IndicatorResults()
     try:            
         results_page_path = results.create_page(
             source_data = source_data,
             page_name = file_name_for_indicator_results,
             indicators = indicators)
     except:
         message = 'failed to generate indicator results page'
         if display_error_box:
             display_message_dialog(message)
         logger.enable_hidden_error_and_warning_words()
         logger.log_warning(message)
         logger.disable_hidden_error_and_warning_words()
 
     if results_page_path is not None:        
         results_page_path = 'file://' + results_page_path
         
     return results_page_path
 def _delete(self, agents_pool, amount, 
               agent_dataset, location_dataset, 
               this_refinement,
               dataset_pool ):
     """similar to subtract action, instead of unplacing agents delete remove agents from the agent dataset,
     those agents won't be available for later action
     """
     
     fit_index = self.get_fit_agents_index(agent_dataset, 
                                           this_refinement.agent_filter, 
                                           '',
                                           dataset_pool)
     
     if amount > fit_index.size or amount < 0:
         logger.log_warning("Request to delete %i agents,  but there are %i agents in total satisfying %s;" \
                            "delete %i agents instead" % (amount, fit_index.size, 
                                                            this_refinement.agent_filter,
                                                            fit_index.size) )
         amount = fit_index.size
     
     if amount == fit_index.size:
         movers_index = fit_index
     else:
         movers_index = sample_noreplace( fit_index, amount )
         
     agents_pool = list( set(agents_pool) - set(movers_index) )
         
     agent_dataset.remove_elements( array(movers_index) )
示例#12
0
    def correct_infinite_values(self,
                                dataset,
                                outcome_attribute_name,
                                maxvalue=1e+38,
                                clip_all_larger_values=False):
        """Check if the model resulted in infinite values. If yes,
        print warning and clip the values to maxvalue. 
        If clip_all_larger_values is True, all values larger than maxvalue are clip to maxvalue.
        """
        infidx = where(dataset.get_attribute(outcome_attribute_name) == inf)[0]

        if infidx.size > 0:
            logger.log_warning("Infinite values in %s. Clipped to %s." %
                               (outcome_attribute_name, maxvalue))
            dataset.set_values_of_one_attribute(outcome_attribute_name,
                                                maxvalue, infidx)
        if clip_all_larger_values:
            idx = where(
                dataset.get_attribute(outcome_attribute_name) > maxvalue)[0]
            if idx.size > 0:
                logger.log_warning(
                    "Values in %s larger than %s. Clipped to %s." %
                    (outcome_attribute_name, maxvalue, maxvalue))
                dataset.set_values_of_one_attribute(outcome_attribute_name,
                                                    maxvalue, idx)
示例#13
0
    def import_run_from_cache(self, cache_directory, run_info={}):
        baseyear = run_info.get('baseyear', -1)
        years = self.get_years_run(cache_directory, baseyear=baseyear)
                
        if years == []:
            msg = 'Cannot import run from %s: it contains no data for simulation years' % cache_directory
            logger.log_warning(msg)
            return (False, msg)
        else:
            run_id = run_manager._get_new_run_id()
            run_name = run_info.get('run_name', 
                                    os.path.basename(cache_directory))

            start_year, end_year = min(years), max(years)
            project_name = os.environ.get('OPUSPROJECTNAME', 
                                                      None)
            resources = {'cache_directory': cache_directory,
                         'description': 'run imported from cache',
                         'years': (start_year, end_year),
                         'project_name': project_name
                         }
            resources.update(run_info)

            self.add_row_to_history(run_id=run_id, 
                                    run_name=run_name, 
                                    resources=resources, 
                                    status='done',)
            return (True, '')
 def compute(self, dataset_pool):
     with logger.block(name="compute variable persons_within_DDD_of_parcel with DDD=%s" % self.radius, verbose=False):
         results = None
         with logger.block(name="trying to read cache file %s" % self.cache_file_name, verbose=False):
             try:
                 results = self._load_results()
             except IOError:
                 logger.log_warning("Cache file could not be loaded")
 
         with logger.block(name="initialize datasets", verbose=False):
             parcels = self.get_dataset()
             arr = self.get_dataset().sum_dataset_over_ids(dataset_pool.get_dataset('household'), attribute_name="persons")
 
         if not results:
             with logger.block(name="initialize coords", verbose=False):
                 coords = column_stack( (parcels.get_attribute("x_coord_sp"), parcels.get_attribute("y_coord_sp")) )
     
             with logger.block(name="build KDTree", verbose=False):
                 kd_tree = KDTree(coords, 100)
     
             with logger.block(name="compute"):
                 results = kd_tree.query_ball_tree(kd_tree, self.radius)
 
             with logger.block(name="cache"):
                 if not SimulationState().cache_directory_exists():
                     logger.log_warning("Cache does not exist and is created.")
                     SimulationState().create_cache_directory()
                 self._cache_results(results)
                 
         with logger.block(name="sum results", verbose=False):
             return_values = array(map(lambda l: arr[l].sum(), results))
         
     return return_values
示例#15
0
    def run(self, config, year, *args, **kwargs):
        """Runs the travel model, using appropriate info from config. 
        """
        tm_config = config["travel_model_configuration"]
        self.prepare_for_run(tm_config, year)
        
        project_year_dir = get_project_year_dir(tm_config, year)
#        year_dir = tm_config[year]  #'CoreEA0511202006\\urbansim\\2001'
#        dir_part1,dir_part2 = os.path.split(year_dir)
#        while dir_part1:
#            dir_part1, dir_part2 = os.path.split(dir_part1)
#        project_year_dir = os.path.join(tm_data_dir, dir_part2)   #C:/SEMCOG_baseline/CoreEA0511202006
        
        logger.log_status('Start travel model from directory %s for year %d' % (project_year_dir, year))
        #for macroname, ui_db_file in tm_config['macro']['run_semcog_travel_model'].iteritems():
            #pass 
        macroname, ui_db_file = tm_config['macro']['run_semcog_travel_model'], tm_config['ui_file']

        loops = 1
        logger.log_status('Running travel model ...')
        tcwcmd = win32api.GetShortPathName(tm_config['transcad_binary'])

        os.system('start /B "start TransCAD" %s' % tcwcmd)  #start TransCAD in background
        time.sleep(1)
        #os.system("%s -a %s -ai '%s'" % (tcwcmd, ui_db_file, macroname))
        run_transcad_macro(macroname, ui_db_file, loops)
        
        try:
            pass
            ##win32process.TerminateProcess(self.hProcess, 0)
        except:
            logger.log_warning("The code has problem to terminate the TransCAD it started.")
示例#16
0
def main():
    option_group = EstimationOptionGroup()
    parser = option_group.parser
    (options, args) = parser.parse_args()
    if options.model_name is None:
        raise StandardError, "Model name (argument -m) must be given."
    if (options.configuration_path is None) and (options.xml_configuration is None):
        raise StandardError, "Configuration path (argument -c) or XML configuration (argument -x) must be given."
    if (options.specification is None) and (options.xml_configuration is None):
        logger.log_warning("No specification given (arguments -s or -x). Specification taken from the cache.")
    if options.xml_configuration is not None:
        xconfig = XMLConfiguration(options.xml_configuration)
    else:
        xconfig = None
    if options.configuration_path is None:
        config = None
    else:
        config = get_config_from_opus_path(options.configuration_path)
    estimator = EstimationRunner(model=options.model_name, 
                                 specification_module=options.specification, 
                                 xml_configuration=xconfig, 
                                 model_group=options.model_group,
                                 configuration=config,
                                 save_estimation_results=options.save_results)
    estimator.estimate()       
    return estimator
 def compute(self, dataset_pool):
     interaction_dataset = self.get_dataset()
     travel_data = dataset_pool.get_dataset('travel_data')
     travel_data_attr_mat = travel_data.get_attribute_as_matrix(self.travel_data_attribute, 
                                                                fill=self.default_value)
     
     var1 = interaction_dataset.get_dataset(1).get_attribute_by_index(self.agent_zone_id,
                                                                   interaction_dataset.get_2d_index_of_dataset1())
     var2 = interaction_dataset.get_2d_dataset_attribute(self.location_zone_id)
     if self.direction_from_home:
         home_zone = var1.astype("int32")
         work_zone = var2.astype("int32")
     else:
         home_zone = var2.astype("int32")
         work_zone = var1.astype("int32")
     
     results = resize(array([self.default_value], dtype=float32), home_zone.shape)
     results = travel_data_attr_mat[home_zone, work_zone]
     
     missing_pairs_index = travel_data.get_od_pair_index_not_in_dataset(home_zone, work_zone)
     if missing_pairs_index[0].size > 0:
         results[missing_pairs_index] = self.default_value
         logger.log_warning("zone pairs at index %s are not in travel data; value set to %s." % ( str(missing_pairs_index), self.default_value) )
     
     return results
示例#18
0
 def export_dataset(self, dataset_name, in_storage, out_storage, overwrite=True, out_dataset_name=None, nchunks = 1, **kwargs):
     if not overwrite and dataset_name in out_storage.get_table_names():
         logger.log_note('Dataset %s ignored because it already exists in OPUS' % dataset_name)
         return
     with logger.block('Exporting dataset %s' % dataset_name):
         if out_dataset_name is None:
             out_dataset_name = dataset_name
         cols_in_this_chunk = in_storage.ALL_COLUMNS
         if nchunks > 1:
             colnames = in_storage.get_column_names(dataset_name)
             chunk_size = int(ceil(len(colnames) / float(nchunks)))
         for chunk in range(nchunks):
             if nchunks > 1:
                 cols_in_this_chunk = colnames[int(chunk*chunk_size):int((chunk+1)*chunk_size)]
             with logger.block('Loading %s - chunk %s out of %s' % (dataset_name, chunk+1, nchunks)):
                 values_from_storage = in_storage.load_table(dataset_name, column_names=cols_in_this_chunk)
                 length = len(values_from_storage) and len(values_from_storage.values()[0])
                 if  length == 0:
                     logger.log_warning("Dataset %s ignored because it's empty" % dataset_name)
                     return
             with logger.block('Storing %s' % dataset_name):
                 if chunk > 0:
                     kwargs['mode'] = out_storage.APPEND
                 out_storage.write_table(out_dataset_name, values_from_storage, **kwargs)
         logger.log_note("Exported %s records for dataset %s" % (length, dataset_name))
示例#19
0
    def _get_PER_YEAR_form(self,
                        dataset_name,
                        attributes,
                        primary_keys,
                        years):

        
        per_year_data = {}
        cols = [computed_name for name, computed_name in attributes]
        for year in years:
            
            table_data = self.input_stores[year].load_table(
                table_name = dataset_name,
                column_names = primary_keys + cols)
            
            data_subset = {}
            for col in cols: 
                col_name = self._get_year_replaced_attribute(attribute = col, 
                                                      year = year)
                if col in table_data:
                    data_subset[col_name] = table_data[col]
                else:
                    logger.log_warning('No indicator %s loaded!'%col)
            for key in primary_keys:
                data_subset[key] = table_data[key]

            per_year_data[year] = data_subset
        return per_year_data    
 def _delete(self, agents_pool, amount, 
               agent_dataset, location_dataset, 
               this_refinement,
               dataset_pool ):
     """similar to subtract action, instead of unplacing agents delete remove agents from the agent dataset,
     those agents won't be available for later action
     """
     
     fit_index = self.get_fit_agents_index(agent_dataset, 
                                           this_refinement.agent_filter, 
                                           '',
                                           dataset_pool)
     
     if amount > fit_index.size or amount < 0:
         logger.log_warning("Request to delete %i agents,  but there are %i agents in total satisfying %s;" \
                            "delete %i agents instead" % (amount, fit_index.size, 
                                                            this_refinement.agent_filter,
                                                            fit_index.size) )
         amount = fit_index.size
     
     if amount == fit_index.size:
         movers_index = fit_index
     else:
         movers_index = sample_noreplace( fit_index, amount )
         
     agents_pool = list( set(agents_pool) - set(movers_index) )
         
     agent_dataset.remove_elements( array(movers_index) )
 def load(self, resources=None, in_storage=None, in_table_name=None):
     """
     """ # TODO: insert docstring
     local_resources = Resources(resources)
     local_resources.merge_with_defaults({
         "field_submodel_id":self.field_submodel_id,
         "field_coefficient_name":self.field_coefficient_name,
         "field_estimate":self.field_estimate,
         "field_standard_error":self.field_standard_error,
         "other_fields":self.other_fields})
     if in_storage <> None:
         self.in_storage = in_storage
     if not isinstance(self.in_storage, Storage):
         logger.log_warning("in_storage has to be of type Storage. No coefficients loaded.")
     else:
         data = self.in_storage.load_table(table_name=in_table_name)
         submodels = data[local_resources["field_submodel_id"]]
         self.names = data[local_resources["field_coefficient_name"]]
         self.values = data[local_resources["field_estimate"]]
         self.standard_errors = data[local_resources["field_standard_error"]]
         for measure in local_resources["other_fields"]:
             if measure in data.keys():
                 self.other_measures[measure] = data[measure]
         if submodels.max() >= 0:
             self.submodels=submodels
         self.check_consistency()
    def compute(self, dataset_pool):
        interaction_dataset = self.get_dataset()
        zones = dataset_pool.get_dataset('zone')
        travel_data = dataset_pool.get_dataset('travel_data')
        travel_data_attr_mat = travel_data.get_attribute_as_matrix(self.travel_data_attribute, 
                                                                   fill=self.travel_data_attribute_default_value)
        
        agent_resource = interaction_dataset.get_dataset(1).get_attribute_by_index(self.agent_resource,
                                                                                    interaction_dataset.get_2d_index_of_dataset1())        
        var1 = interaction_dataset.get_dataset(1).get_attribute_by_index(self.agent_zone_id,
                                                                         interaction_dataset.get_2d_index_of_dataset1())
        var2 = interaction_dataset.get_2d_dataset_attribute(self.choice_zone_id)
        
        if self.direction_from_agent_to_choice:
            from_zone = var1.astype("int32")
            to_zone = var2.astype("int32")
        else:
            from_zone = var2.astype("int32")
            to_zone = var1.astype("int32")
    
        results = resize(array([self.default_value], dtype=self._return_type), from_zone.shape)
        zone_ids = zones.get_id_attribute()
        for zone in zone_ids:
            tmp_zone = zone * ones(from_zone.shape, dtype="int32")
            t1 = travel_data_attr_mat[from_zone, tmp_zone]
            t2 = travel_data_attr_mat[tmp_zone, to_zone]
            results[where( t1 + t2 <= agent_resource)] += zones.get_attribute_by_id(self.zone_attribute_to_access, zone)
        
        missing_pairs_index = travel_data.get_od_pair_index_not_in_dataset(from_zone, to_zone)
        if missing_pairs_index[0].size > 0:
            results[missing_pairs_index] = self.default_value
            logger.log_warning("zone pairs at index %s are not in travel data; value set to %s." % ( str(missing_pairs_index), self.default_value) )

        return results
示例#23
0
    def _find_opus_test_cases_for_package(self, package, test_case_class):
        root = OpusPackage().get_path_for_package(package)

        modules_with_test_cases = []

        for path, dirs, files in os.walk(root, topdown=True):
            for file in files:
                if not file.endswith('.py'):
                    continue

                f = open(os.path.join(path, file), 'r')
                import_pattern = re.compile('^\s*(import|from).*unittest')
                skip_pattern = re.compile('^.*#.*IGNORE_THIS_FILE')

                found_import = False
                for line in f:
                    if skip_pattern.match(line):
                        break
                    if import_pattern.match(line):
                        found_import = True
                        break

                if not found_import:  # No unittest import found in file.
                    continue

                module_name = self._get_module_name(package, root, path, file)

                try:
                    exec('import %s' % module_name)
                except Exception, val:
                    logger.log_error("Could not import %s!" % module_name)

                    traceback.print_exc()

                    continue

                module = eval(module_name)

                if inspect.ismodule(module):
                    members = inspect.getmembers(module)

                    member_dict = {}
                    for key, value in members:
                        member_dict[key] = value

                    for key in member_dict.keys():
                        try:
                            is_subclass = issubclass(member_dict[key],
                                                     test_case_class)
                        except:
                            pass
                        else:
                            if is_subclass:
                                class_name = member_dict[key].__name__

                                modules_with_test_cases.append(
                                    (module_name, class_name))

                else:
                    logger.log_warning('WARNING: %s is not a module!' % module)
    def _get_PER_YEAR_form(self,
                        dataset_name,
                        attributes,
                        primary_keys,
                        years):

        
        per_year_data = {}
        cols = [computed_name for name, computed_name in attributes]
        for year in years:
            
            table_data = self.input_stores[year].load_table(
                table_name = dataset_name,
                column_names = primary_keys + cols)
            
            data_subset = {}
            for col in cols: 
                col_name = self._get_year_replaced_attribute(attribute = col, 
                                                      year = year)
                if col in table_data:
                    data_subset[col_name] = table_data[col]
                else:
                    logger.log_warning('No indicator %s loaded!'%col)
            for key in primary_keys:
                data_subset[key] = table_data[key]

            per_year_data[year] = data_subset
        return per_year_data    
示例#25
0
 def _update_submodel(self, current_node, edited_node):
     ''' Updating a submodel node (current_node) based on an edited version of it (edited_node)'''
     # the effect of renaming a shadowing node is that a new (local) copy is created and
     # the inherited node is reinserted. If the user did not rename the node we overwrite
     # the old submodel with the new values.
     name_change = current_node.get('name') != edited_node.get('name')
     if self.project.is_shadowing(current_node) and name_change:
         parent_node = current_node.getparent()
         row = parent_node.index(current_node)
         new_submodel_node = self.project.insert_node(
             edited_node, parent_node, row)
         if new_submodel_node is None:
             msg = ('Tried to insert a new submodel (%s) but failed. '
                    'The recent submodel changes have been lost.' %
                    current_node.get('name'))
             logger.log_warning(msg)
         self.project.delete_node(current_node)
     else:
         for key in edited_node.attrib:
             current_node.attrib[key] = edited_node.attrib[key]
         for child in current_node:
             current_node.remove(child)
         for child in edited_node:
             current_node.append(child)
     self.project.dirty = True
示例#26
0
 def get_all_data(self, submodel=-2):
     """Model must have a property 'data' which is a dictionary that has for each submodel
     some data. It returns data for the given submodel. Meant to be used for analyzing estimation data."""
     if submodel in self.data.keys():
         return self.data[submodel]
     logger.log_warning("No available data for submodel %s." % submodel)
     return None
示例#27
0
    def run(self, utilities=None, resources=None):
        """ Return a probability array obtained from a RateDataset. 'resources' must contain
        an entry with name self.rate_set (a RateDataset dataset) and an entry self.agent_set dataset
        that is able to provide attributes defined in rate_set columns. Otherwise the method
        returns equal probability of 0.25.
        """
        if self.agent_set:
            agents = resources.get(self.agent_set, None)
        else:
            agents = resources.get('agent_set', None)
            #if agents is not None: self.agent_set = agents.get_dataset_name()
        if agents is None:
            raise RuntimeError("Unable to get agent set " + self.agent_set)

        if self.rate_set:
            rates = resources.get(self.rate_set, None)
        else:
            rates = resources.get('rate_set', None)
            #if rates is not None: self.rate_set = rates.get_dataset_name()
        
        if (rates is None) or (not isinstance(rates, RateDataset)):
            logger.log_warning("Rate table %s is not loaded; agents in agent_set %s will have probability of 0.0" % (self.rate_set, self.agent_set))
            return zeros(agents.size(), dtype="float32")

        probability = rates.get_rate(agents)
        return probability
示例#28
0
 def test_running_emme2_to_get_matrix(self):
     if self._has_travel_model:
         tm_output = TravelModelOutput()
         tm_output._get_matrix_into_data_file('au1tim', 80, self.real_bank_path)
     else:
         logger.log_warning('Test skipped. TRAVELMODELROOT environment '
             'variable not found.')
示例#29
0
 def _write_results(self,
                    indicators, 
                    source_data, 
                    file_name_for_indicator_results,
                    display_error_box):
     
     #generate a static html page for browsing outputted indicators and store the path to the html
     results_page_path = None
     results = IndicatorResults()
     try:            
         results_page_path = results.create_page(
             source_data = source_data,
             page_name = file_name_for_indicator_results,
             indicators = indicators)
     except:
         message = 'failed to generate indicator results page'
         if display_error_box:
             display_message_dialog(message)
         logger.enable_hidden_error_and_warning_words()
         logger.log_warning(message)
         logger.disable_hidden_error_and_warning_words()
 
     if results_page_path is not None:        
         results_page_path = 'file://' + results_page_path
         
     return results_page_path
示例#30
0
    def get_rate(self, dataset):
        probability_attribute = self.get_probability_attribute_name()
        column_names = set(self.get_known_attribute_names()) - set(
            [probability_attribute, 'rate_id', '_hidden_id_'])
        self.independent_variables = list(
            set([
                re.sub('_max$', '', re.sub('_min$', '', col))
                for col in column_names
            ]))
        ## rstip below could turn 'sex' --> 'se'
        ##self.independent_variables = list(set([col.rstrip('_min').rstrip('_max') for col in column_names]))
        self._compute_variables_for_dataset_if_needed(
            dataset, self.independent_variables)
        known_attributes = dataset.get_known_attribute_names()
        prob = -1 + zeros(dataset.size(), dtype='float64')
        for index in range(self.size()):
            indicator = ones(dataset.size(), dtype='bool')
            for attribute in self.independent_variables:
                alias = self.attribute_aliases.get(attribute)
                if attribute in known_attributes:
                    dataset_attribute = dataset.get_attribute(attribute)
                elif alias in known_attributes:
                    dataset_attribute = dataset.get_attribute(alias)
                else:
                    raise ValueError, "attribute %s used in rate dataset can not be found in dataset %s" % (
                        attribute, dataset.get_dataset_name())
                if attribute + '_min' in column_names and self.get_attribute(
                        attribute + '_min')[index] != -1:
                    indicator *= dataset_attribute >= self.get_attribute(
                        attribute + '_min')[index]
                if attribute + '_max' in column_names and self.get_attribute(
                        attribute + '_max')[index] != -1:
                    indicator *= dataset_attribute <= self.get_attribute(
                        attribute + '_max')[index]
                if attribute in column_names and self.get_attribute(
                        attribute)[index] != -1:
                    rate_attribute = self.get_attribute(attribute)
                    if rate_attribute[index] != -2:
                        indicator *= dataset_attribute == rate_attribute[index]
                    else:  ##all other values not appeared in this column, i.e. the complement set
                        complement_values = setdiff1d(dataset_attribute,
                                                      rate_attribute)
                        has_one_of_the_complement_value = zeros(
                            dataset_attribute.size, dtype='bool')
                        for value in complement_values:
                            has_one_of_the_complement_value += dataset_attribute == value
                        indicator *= has_one_of_the_complement_value

            prob[logical_and(
                indicator,
                prob < 0)] = self.get_attribute(probability_attribute)[index]

        if any(prob < 0):
            logger.log_warning('There are %i %ss whose probability is ' %
                               ((prob < 0).sum(), dataset.get_dataset_name()) +
                               'un-specified by the rate dataset. ' +
                               'Their probability is set to 0.')
            prob[prob < 0] = 0.0

        return prob
    def run(self, config, show_output = False):
        logger.log_status("Caching large SQL tables to: " + config['cache_directory'])
        self.show_output = show_output
        
        #import pydevd;pydevd.settrace()
        
        server_configuration = config['scenario_database_configuration']
        
        scenario_database_manager = ScenarioDatabaseManager(
            server_configuration = server_configuration, 
            base_scenario_database_name = server_configuration.database_name                                                         
        )
        
        self.database_server = DatabaseServer(server_configuration)
        
        database_to_table_mapping = scenario_database_manager.get_database_to_table_mapping()
        
        self.tables_to_cache = config['creating_baseyear_cache_configuration'].tables_to_cache
                
        simulation_state = SimulationState()
        if 'low_memory_run' in config:
            simulation_state.set_low_memory_run(config['low_memory_run'])
        simulation_state.set_cache_directory(config['cache_directory'])
        simulation_state.set_current_time(config['base_year'])
                  
        self.tables_cached = set()      
        for database_name, tables in database_to_table_mapping.items():
            self.cache_database_tables(config, database_name, tables)

        un_cached_tables = set(self.tables_to_cache) - self.tables_cached
        if un_cached_tables:
            logger.log_warning('The following requested tables were NOT cached:')
            for table_name in un_cached_tables:
                logger.log_warning('\t%s' % table_name)
    def run(self, utilities=None, resources=None):
        """ Return a probability array obtained from a RateDataset. 'resources' must contain
        an entry with name self.rate_set (a RateDataset dataset) and an entry self.agent_set dataset
        that is able to provide attributes defined in rate_set columns. Otherwise the method
        returns equal probability of 0.25.
        """
        if self.agent_set:
            agents = resources.get(self.agent_set, None)
        else:
            agents = resources.get('agent_set', None)
            if agents is not None: self.agent_set = agents.get_dataset_name()
        if agents == None:
            raise RuntimeError("Unable to get agent set " + self.agent_set)

        if self.rate_set:
            rates = resources.get(self.rate_set, None)
        else:
            rates = resources.get('relocation_rate', None)
            if rates is not None: self.rate_set = rates.get_dataset_name()
        if (rates is None) or (not isinstance(rates, RateDataset)):
            logger.log_warning(
                "Rate table %s is not loaded; agents in agent_set %s will have probability of 0.0"
                % (self.rate_set, self.agent_set))
            return zeros(agents.size(), dtype="float32")

        probability = rates.get_rate(agents)
        return probability
示例#33
0
    def setUp(self):
        db_configs = []
        for engine in _get_installed_database_engines():
            config = TestDatabaseConfiguration(protocol=engine)
            db_configs.append(config)

        self.test_db = 'OpusDatabaseTestDatabase'
        test_table = 'test_table'

        self.dbs = []
        for config in db_configs:
            try:
                server = DatabaseServer(config)
                if server.has_database(self.test_db):
                    server.drop_database(self.test_db)
                server.create_database(self.test_db)
                self.assertTrue(
                    server.has_database(database_name=self.test_db))
                db = OpusDatabase(database_server_configuration=config,
                                  database_name=self.test_db)
                self.assertFalse(db.table_exists(test_table))
                self.dbs.append((db, server))
            except:
                import traceback
                traceback.print_exc()

                logger.log_warning('Could not start server for protocol %s' %
                                   config.protocol)
示例#34
0
def invoke_run_travel_model(config, year):
    """ 
    """

    tm_config = config['travel_model_configuration']
    scenario = tm_config['travel_model_scenario'] 
    try:
        travel_model_year = travel_model_year_mapping[year]
    except KeyError:
        logger.log_warning("no travel model year mapping for %d." % year)
        travel_model_year = year

    # put the travel model input data in place
    data_exchange_dir = mtc_common.tm_get_data_exchange_dir(config, year)
    cache_directory = config['cache_directory']
    mtc_dir = os.path.join(cache_directory, "mtc_data")
    for f in glob.glob(os.path.join(mtc_dir, '*' + str(year) + '*')):
        logger.log_status("Copying over travel model input " + f + " to " + data_exchange_dir)
        shutil.copy(f, data_exchange_dir)

    my_location = os.path.split(__file__)[0]
    script_filepath = os.path.join(my_location, "run_travel_model.py")
    cmd = "%s %s -s %s -y %s -n" % (sys.executable, script_filepath, scenario, travel_model_year)

    # form the desired output dir for the travel model data.  Make it look
    # like the urbansim run cache for easy association.  Note that we
    # explicitly use the forward slash instead of os.sep and friends
    # because the travel model is managed via ssh on a cygwin machine, not
    # run on the local machine.
    outdir = "runs/" + config['cache_directory'].split(os.sep)[-1]
    outdir = outdir + "/%d_%s" % (year, scenario)
    cmd = cmd + " -o " + outdir

    logger.log_status("Launching %s" % cmd)
    if os.system(cmd) != 0:
        raise TravelModelError

    # Run the emfac report

    # TODO: the travel model server should come from the configuration.  But
    # for this we must migrate the configuration from mtc_config.py to the
    # top-level xml.  So for now we just hard-code it :-/ Same for
    # travel_model_home.
    tm_server = "*****@*****.**"
    travel_model_home = "/cygdrive/e/mtc_travel_model/"
    server_model = winssh.winssh(tm_server, "OPUS_MTC_SERVER_PASSWD")
    (rc, emfac_windir) = server_model.cmd("cygpath -w " + outdir)
    if rc != 0:
        logger.log_error("Failed to find windows path for emfac dir " + outdir)
        sys.exit(1)
    emfac_windir = emfac_windir.replace('\r', '').replace('\n','')
    logger.log_status("Attempting to generate EMFAC report...")
    cmd = 'cd ' + travel_model_home + 'model_support_files/EMFAC_Files'
    logger.log_status(cmd)
    server_model.cmd_or_fail(cmd)
    cmd = "cmd /c 'RunEmfac.bat " + emfac_windir + " " + str(year) + "' | tee emfac.log"
    logger.log_status(cmd)
    (rc, out) = server_model.cmd(cmd, supress_output=False, pipe_position=0)
    if rc != 0:
        logger.log_warning("WARNING: Failed to prepare emfac report")
示例#35
0
    def test_getting_several_emme2_data_into_travel_data_set(self):
        if self._has_travel_model:
            num_zones = 30

            zone_storage = StorageFactory().get_storage('dict_storage')
            zone_table_name = 'zone'
            zone_storage.write_table(
                    table_name=zone_table_name,
                    table_data={
                        'zone_id':array(range(num_zones))+1
                        },
                )
            
            zone_set = ZoneDataset(in_storage=zone_storage, in_table_name=zone_table_name)
            matrix_attribute_map = {'au1tim':'single_vehicle_to_work_travel_time',
                                    'biketm':'bike_to_work_travel_time'}
            tm_output = TravelModelOutput()
            travel_data_set = tm_output.get_travel_data_set(zone_set, matrix_attribute_map, self.real_bank_path)
            self.assertEqual(travel_data_set.get_attribute('single_vehicle_to_work_travel_time').size, num_zones*num_zones)
            self.assertEqual(travel_data_set.get_attribute('bike_to_work_travel_time').size, num_zones*num_zones)
            from numpy import ma
            self.assertEqual(False,
                             ma.allclose(travel_data_set.get_attribute('single_vehicle_to_work_travel_time'), 
                                      travel_data_set.get_attribute('bike_to_work_travel_time')))
        else:
            logger.log_warning('Test skipped. TRAVELMODELROOT environment '
                'variable not found.')
示例#36
0
def main():
    option_group = EstimationOptionGroup()
    parser = option_group.parser
    (options, args) = parser.parse_args()
    if options.model_name is None:
        raise StandardError, "Model name (argument -m) must be given."
    if (options.configuration_path is None) and (options.xml_configuration is
                                                 None):
        raise StandardError, "Configuration path (argument -c) or XML configuration (argument -x) must be given."
    if (options.specification is None) and (options.xml_configuration is None):
        logger.log_warning(
            "No specification given (arguments -s or -x). Specification taken from the cache."
        )
    if options.xml_configuration is not None:
        xconfig = XMLConfiguration(options.xml_configuration)
    else:
        xconfig = None
    if options.configuration_path is None:
        config = None
    else:
        config = get_config_from_opus_path(options.configuration_path)
    estimator = EstimationRunner(model=options.model_name,
                                 specification_module=options.specification,
                                 xml_configuration=xconfig,
                                 model_group=options.model_group,
                                 configuration=config,
                                 save_estimation_results=options.save_results)
    estimator.estimate()
    return estimator
    def setUp(self):
        db_configs = []
        for engine in _get_installed_database_engines():
            config = TestDatabaseConfiguration(protocol=engine)
            db_configs.append(config)

        self.test_db = "OpusDatabaseTestDatabase"
        test_table = "test_table"

        self.dbs = []
        for config in db_configs:
            try:
                server = DatabaseServer(config)
                if server.has_database(self.test_db):
                    server.drop_database(self.test_db)
                server.create_database(self.test_db)
                self.assertTrue(server.has_database(database_name=self.test_db))
                db = OpusDatabase(database_server_configuration=config, database_name=self.test_db)
                self.assertFalse(db.table_exists(test_table))
                self.dbs.append((db, server))
            except:
                import traceback

                traceback.print_exc()

                logger.log_warning("Could not start server for protocol %s" % config.protocol)
示例#38
0
    def import_run_from_cache(self, cache_directory, run_info={}):
        baseyear = run_info.get('baseyear', -1)
        years = self.get_years_run(cache_directory, baseyear=baseyear)

        if years == []:
            msg = 'Cannot import run from %s: it contains no data for simulation years' % cache_directory
            logger.log_warning(msg)
            return (False, msg)
        else:
            run_id = run_manager._get_new_run_id()
            run_name = run_info.get('run_name',
                                    os.path.basename(cache_directory))

            start_year, end_year = min(years), max(years)
            project_name = os.environ.get('OPUSPROJECTNAME', None)
            resources = {
                'cache_directory': cache_directory,
                'description': 'run imported from cache',
                'years': (start_year, end_year),
                'project_name': project_name
            }
            resources.update(run_info)

            self.add_row_to_history(
                run_id=run_id,
                run_name=run_name,
                resources=resources,
                status='done',
            )
            return (True, '')
示例#39
0
    def run_chunk(self, index, dataset, specification, coefficients):
        self.specified_coefficients = SpecifiedCoefficients().create(
            coefficients, specification, neqs=1)
        compute_resources = Resources({"debug": self.debug})
        submodels = self.specified_coefficients.get_submodels()
        self.get_status_for_gui().update_pieces_using_submodels(
            submodels=submodels, leave_pieces=2)
        self.map_agents_to_submodels(submodels,
                                     self.submodel_string,
                                     dataset,
                                     index,
                                     dataset_pool=self.dataset_pool,
                                     resources=compute_resources)
        variables = self.specified_coefficients.get_full_variable_names_without_constants(
        )
        self.debug.print_debug("Compute variables ...", 4)
        self.increment_current_status_piece()
        dataset.compute_variables(variables,
                                  dataset_pool=self.dataset_pool,
                                  resources=compute_resources)
        data = {}
        coef = {}
        outcome = self.initial_values[index].copy()
        for submodel in submodels:
            coef[submodel] = SpecifiedCoefficientsFor1Submodel(
                self.specified_coefficients, submodel)
            self.coefficient_names[submodel] = coef[
                submodel].get_coefficient_names_without_constant()[0, :]
            self.debug.print_debug(
                "Compute regression for submodel " + str(submodel), 4)
            self.increment_current_status_piece()
            self.data[submodel] = dataset.create_regression_data(
                coef[submodel],
                index=index[self.observations_mapping[submodel]])
            nan_index = where(isnan(self.data[submodel]))[1]
            inf_index = where(isinf(self.data[submodel]))[1]
            vnames = asarray(coef[submodel].get_variable_names())
            if nan_index.size > 0:
                nan_var_index = unique(nan_index)
                self.data[submodel] = nan_to_num(self.data[submodel])
                logger.log_warning(
                    "NaN(Not A Number) is returned from variable %s; it is replaced with %s."
                    % (vnames[nan_var_index], nan_to_num(nan)))
                #raise ValueError, "NaN(Not A Number) is returned from variable %s; check the model specification table and/or attribute values used in the computation for the variable." % vnames[nan_var_index]
            if inf_index.size > 0:
                inf_var_index = unique(inf_index)
                self.data[submodel] = nan_to_num(self.data[submodel])
                logger.log_warning(
                    "Inf is returned from variable %s; it is replaced with %s."
                    % (vnames[inf_var_index], nan_to_num(inf)))
                #raise ValueError, "Inf is returned from variable %s; check the model specification table and/or attribute values used in the computation for the variable." % vnames[inf_var_index]

            if (self.data[submodel].shape[0] >
                    0) and (self.data[submodel].size >
                            0):  # observations for this submodel available
                outcome[self.observations_mapping[submodel]] = \
                    self.regression.run(self.data[submodel], coef[submodel].get_coefficient_values()[0,:],
                        resources=self.run_config).astype(outcome.dtype)
        return outcome
示例#40
0
    def _sample_by_agent_and_stratum(self, index1, index2, stratum, prob_array,
                                     chosen_choice_index,
                                     strata_sample_setting):
        """agent by agent and stratum by stratum stratified sampling, suitable for 2d prob_array and/or sample_size varies for agents
        this method is slower than _sample_by_stratum, for simpler stratified sampling use _sample_by_stratum instead"""

        rank_of_prob = rank(prob_array)
        rank_of_strata = rank(strata_sample_setting)

        J = self.__determine_sampled_index_size(strata_sample_setting,
                                                rank_of_strata)
        sampled_index = zeros((index1.size, J), dtype=DTYPE) - 1
        self._sampling_probability = zeros((index1.size, J), dtype=float32)
        self._stratum_id = ones((index1.size, J), dtype=DTYPE) * NO_STRATUM_ID

        for i in range(index1.size):
            if rank_of_strata == 3:
                strata_sample_pairs = strata_sample_setting[i, :]
            else:
                strata_sample_pairs = strata_sample_setting

            if rank_of_prob == 2:
                prob = prob_array[i, :]
            else:
                prob = prob_array

            j = 0
            for (this_stratum, this_size) in strata_sample_pairs:
                if this_size <= 0: continue
                index_not_in_stratum = where(stratum != this_stratum)[0]
                this_prob = copy.copy(prob)

                this_prob[index_not_in_stratum] = 0.0
                this_prob = normalize(this_prob)

                if nonzerocounts(this_prob) < this_size:
                    logger.log_warning(
                        "weight array dosen't have enough non-zero counts, use sample with replacement"
                    )


#                chosen_index_to_index2 = where(index2 == chosen_choice_index[i])[0]
#exclude_index passed to probsample_noreplace needs to be indexed to index2
                this_sampled_index = probsample_noreplace(
                    index2,
                    sample_size=this_size,
                    prob_array=this_prob,
                    exclude_index=chosen_choice_index[i],
                    return_index=True)
                sampled_index[i, j:j + this_size] = this_sampled_index

                self._sampling_probability[
                    i, j:j + this_size] = this_prob[this_sampled_index]
                self._stratum_id[i, j:j + this_size] = ones(
                    (this_sampled_index.size, ), dtype=DTYPE) * this_stratum

                j += this_size

        return index2[sampled_index]
示例#41
0
    def _delete(self, agents_pool, amount, agent_dataset, location_dataset,
                this_refinement, dataset_pool):
        """similar to subtract action, instead of unplacing agents delete remove agents from the agent dataset,
        those agents won't be available for later action
        """

        fit_index = self.get_fit_agents_index(
            agent_dataset, this_refinement.agent_expression,
            this_refinement.location_expression, dataset_pool)

        if amount > fit_index.size or amount < 0:
            logger.log_warning("Refinement requests to delete %i agents,  but there are %i agents in total satisfying %s;" \
                               "delete %i agents instead" % (amount, fit_index.size,
                                                               ' and '.join( [this_refinement.agent_expression,
                                                                            this_refinement.location_expression] ).strip(' and '),
                                                               fit_index.size) )
            amount = fit_index.size

        if amount == fit_index.size:
            movers_index = fit_index
        else:
            movers_index = sample_noreplace(fit_index, amount)

        agents_pool = list(set(agents_pool) - set(movers_index))
        ## modify location capacity attribute if specified
        if this_refinement.location_capacity_attribute is not None and len(
                this_refinement.location_capacity_attribute) > 0:
            location_dataset = dataset_pool.get_dataset(
                VariableName(
                    this_refinement.location_expression).get_dataset_name())

            movers_location_id = agent_dataset.get_attribute(
                location_dataset.get_id_name()[0])[movers_index]
            movers_location_index = location_dataset.get_id_index(
                movers_location_id)
            # see previous comment about histogram function
            num_of_movers_by_location = histogram(
                movers_location_index,
                bins=arange(location_dataset.size() + 1))[0]
            num_of_agents_by_location = location_dataset.compute_variables( "number_of_agents=%s.number_of_agents(%s)" % \
                                                                            (location_dataset.dataset_name,
                                                                            agent_dataset.dataset_name),
                                                                            dataset_pool=dataset_pool)

            shrink_factor = safe_array_divide(
                (num_of_agents_by_location -
                 num_of_movers_by_location).astype('float32'),
                num_of_agents_by_location,
                return_value_if_denominator_is_zero=1.0)
            new_values = round_(shrink_factor * location_dataset.get_attribute(
                this_refinement.location_capacity_attribute))
            location_dataset.modify_attribute(
                this_refinement.location_capacity_attribute, new_values)
            self._add_refinement_info_to_dataset(location_dataset,
                                                 self.id_names,
                                                 this_refinement,
                                                 index=movers_location_index)

        agent_dataset.remove_elements(array(movers_index))
    def run(self, data, regression, resources=None):
        """
        The method prints out summary of the BMA procedure and creates an imageplot.
        If resources has an entry 'bma_imageplot_filename', the imageplot is sent to this file as pdf.
        The method does not return any useful results - it is a tool for variable selection.
        Once you selected your variables, use estimate_linear_regression for further usage of the coefficients. 
        
        Expects an entry 'outcome' in resources that provides the values of the dependent variable.
        'data' is a 2D numpy array of the actual data (nobservations x ncoefficients),
            it can be created by Dataset.create_regression_data_for_estimation(...).
        'regression' is an instance of a regression class.
        """
        r = robjects.r
        if data.ndim < 2:
            raise StandardError, "Argument 'data' must be a 2D numpy array."

        nobs = data.shape[0]
        nvar = data.shape[1]
        constant_position = resources.get(
            "constant_position", array([],
                                       dtype='int32'))  #position for intercept

        if constant_position.size == 0:  #position for intercept
            constant_position = -1
            nvalues = nvar
        else:
            constant_position = constant_position[0]
            nvalues = nvar + 1

        beta = zeros(nvalues).astype(float32)

        coef_names = resources.get("coefficient_names", nvar * [])
        data_for_r = {}
        for icoef in range(len(coef_names)):
            data_for_r[coef_names[icoef]] = data[:, icoef]
        bma = importr("BMA")
        d = robjects.DataFrame(data_for_r)
        try:
            bma_params = {
                'x': d,
                'y': resources["outcome"],
                'glm.family': "gaussian",
                'strict': 1
            }
            #fit = bma.bic_glm(x=d, y=resources["outcome"], glm_family="gaussian", strict=1)
            fit = bma.bic_glm(**bma_params)
            fit[20] = ''  # to have less output in the summary
            r.summary(fit)
            filename = resources.get('bma_imageplot_filename', None)
            if filename is not None:
                r.pdf(file=filename)
                bma.imageplot_bma(fit)
                r['dev.off']()
            else:
                r.X11()
                bma.imageplot_bma(fit)
        except:
            logger.log_warning("Error in BMA procedure.")
        return {}
示例#43
0
 def MASKtest_mssql_create_drop_and_has_database(self):
     if 'mssql' in get_testable_engines():
         if not 'MSSQLDEFAULTDB' in os.environ:
             logger.log_warning('MSSQLDEFAULTDB is not set in the environment variables. Skipping test_mssql_create_drop_and_has_database')
         else:
             server = self.get_mssql_server()
             self.helper_create_drop_and_has_database(server)
             server.close()
示例#44
0
 def hhagecat(a):
     if a >= 0  and a <= 64:
         return 1
     if a > 64:
         return 2
     else:
         logger.log_warning("Found age_of_head < 0")
         return -1
示例#45
0
 def test_running_emme2_to_get_matrix(self):
     if self._has_travel_model:
         tm_output = TravelModelOutput()
         tm_output._get_matrix_into_data_file('au1tim', 80,
                                              self.real_bank_path)
     else:
         logger.log_warning('Test skipped. TRAVELMODELROOT environment '
                            'variable not found.')
示例#46
0
 def remove(self, key):
     """Remove an entry 'key' from the dictionary.
     """ 
     if self.has_key(key):
         del self[key]
     else:
         logger.log_warning("Key " + key + " not contained in the dictionary!",
                             tags=["configuration"])
 def MASKtest_mssql_create_drop_and_has_database(self):
     if 'mssql' in get_testable_engines():
         if not 'MSSQLDEFAULTDB' in os.environ:
             logger.log_warning('MSSQLDEFAULTDB is not set in the environment variables. Skipping test_mssql_create_drop_and_has_database')
         else:
             server = self.get_mssql_server()
             self.helper_create_drop_and_has_database(server)
             server.close()
 def remove(self, key):
     """Remove an entry 'key' from the dictionary.
     """ 
     if self.has_key(key):
         del self[key]
     else:
         logger.log_warning("Key " + key + " not contained in the dictionary!",
                             tags=["configuration"])
    def _find_opus_test_cases_for_package(self, package, test_case_class):
        root = OpusPackage().get_path_for_package(package)

        modules_with_test_cases = []

        for path, dirs, files in os.walk(root, topdown=True):
            for file in files:
                if not file.endswith(".py"):
                    continue

                f = open(os.path.join(path, file), "r")
                import_pattern = re.compile("^\s*(import|from).*unittest")
                skip_pattern = re.compile("^.*#.*IGNORE_THIS_FILE")

                found_import = False
                for line in f:
                    if skip_pattern.match(line):
                        break
                    if import_pattern.match(line):
                        found_import = True
                        break

                if not found_import:  # No unittest import found in file.
                    continue

                module_name = self._get_module_name(package, root, path, file)

                try:
                    exec("import %s" % module_name)
                except Exception, val:
                    logger.log_error("Could not import %s!" % module_name)

                    traceback.print_exc()

                    continue

                module = eval(module_name)

                if inspect.ismodule(module):
                    members = inspect.getmembers(module)

                    member_dict = {}
                    for key, value in members:
                        member_dict[key] = value

                    for key in member_dict.keys():
                        try:
                            is_subclass = issubclass(member_dict[key], test_case_class)
                        except:
                            pass
                        else:
                            if is_subclass:
                                class_name = member_dict[key].__name__

                                modules_with_test_cases.append((module_name, class_name))

                else:
                    logger.log_warning("WARNING: %s is not a module!" % module)
示例#50
0
    def write(self, resources=None, out_storage=None, out_table_name=None):
        """
        """  # TODO: insert docstring
        local_resources = Resources(resources)
        local_resources.merge_with_defaults({
            "field_submodel_id": self.field_submodel_id,
            "field_coefficient_name": self.field_coefficient_name,
            "field_estimate": self.field_estimate,
            "field_standard_error": self.field_standard_error,
            "other_fields": self.other_fields,
            "out_table_name": out_table_name
        })
        if out_storage <> None:
            self.out_storage = out_storage
        if not isinstance(self.out_storage, Storage):
            logger.log_warning(
                "out_storage has to be of type Storage. No coefficients written."
            )
            return

        submodels = self.get_submodels()
        if submodels.size <= 0:
            submodels = resize(array([-2], dtype=int32), self.size())
        values = {
            local_resources["field_submodel_id"]: submodels,
            local_resources["field_coefficient_name"]: self.get_names(),
            local_resources["field_estimate"]: self.get_values(),
            local_resources["field_standard_error"]:
            self.get_standard_errors()
        }
        for measure in self.other_measures.keys():
            values[measure] = self.other_measures[measure]
        types = {
            local_resources["field_submodel_id"]: 'integer',
            local_resources["field_coefficient_name"]: 'text',
            local_resources["field_estimate"]: 'double',
            local_resources["field_standard_error"]: 'double'
        }
        attrtypes = {
            local_resources["field_submodel_id"]: AttributeType.PRIMARY,
            local_resources["field_coefficient_name"]: AttributeType.PRIMARY,
            local_resources["field_estimate"]: AttributeType.PRIMARY,
            local_resources["field_standard_error"]: AttributeType.PRIMARY
        }
        for measure in self.other_measures.keys():
            types[measure] = 'double'
            attrtypes[measure] = AttributeType.PRIMARY
        local_resources.merge({
            "values": values,
            'valuetypes': types,
            "drop_table_flag": 1,
            "attrtype": attrtypes
        })

        self.out_storage.write_table(
            table_name=local_resources['out_table_name'],
            table_data=local_resources['values'])
示例#51
0
 def _num_of_columns(self, table):
     num_of_columns = 0
     for row in table:
         if len(row) > num_of_columns:
             num_of_columns = len(row)
     if (num_of_columns == 0):
         logger.log_warning('Table has no columns; creating a single empty column so LaTeX will not fail')
         num_of_columns = 1
     return num_of_columns
示例#52
0
 def get_index_of_my_agents(self, dataset, index, dataset_pool=None, resources=None):
     agents_grouping_attr = self.get_agents_grouping_attribute()
     if agents_grouping_attr is None:
         logger.log_warning("'agents_grouping_attribute' wasn't set. No agent selection was done.")
         logger.log_note("Use method 'set_agents_grouping_attribute' for agents selection.")
         return arange(index.size)
     dataset.compute_variables(agents_grouping_attr, dataset_pool=dataset_pool, resources=resources)
     code_values = dataset.get_attribute_by_index(agents_grouping_attr, index)
     return where(code_values == self.get_member_code())[0]
示例#53
0
 def load_constants(self, in_storage, in_table_name):
     """Some of the constants are loaded from in_storage.
     """
     result = in_storage.load_table(table_name=in_table_name)
     if result is None:
         logger.log_warning("No data in table '%s'" % in_table_name)
     else:
         for name in result:
             self[name] = result[name][0]
示例#54
0
 def _num_of_columns(self, table):
     num_of_columns = 0
     for row in table:
         if len(row) > num_of_columns:
             num_of_columns = len(row)
     if (num_of_columns == 0):
         logger.log_warning('Table has no columns; creating a single empty column so LaTeX will not fail')
         num_of_columns = 1
     return num_of_columns
示例#55
0
    def _subtract(self, agents_pool, amount, 
                  agent_dataset, location_dataset, 
                  this_refinement,
                  dataset_pool ):
        
        fit_index = self.get_fit_agents_index(agent_dataset, 
                                              this_refinement.agent_expression, 
                                              this_refinement.location_expression,
                                              dataset_pool)
        
        if amount > fit_index.size:
            logger.log_warning("Refinement requests to subtract %i agents,  but there are %i agents in total satisfying %s;" \
                               "subtract %i agents instead" % (amount, fit_index.size, 
                                                               ' and '.join( [this_refinement.agent_expression, 
                                                                            this_refinement.location_expression] ).strip(' and '),
                                                               fit_index.size) )
            amount = fit_index.size
        
        if amount == fit_index.size:
            movers_index = fit_index
        else:
            movers_index = sample_noreplace( fit_index, amount )
            
        agents_pool += movers_index.tolist()
        ## modify location capacity attribute if specified
        if this_refinement.location_capacity_attribute is not None and len(this_refinement.location_capacity_attribute) > 0:
            location_dataset = dataset_pool.get_dataset( VariableName( this_refinement.location_expression ).get_dataset_name() )

            movers_location_id = agent_dataset.get_attribute( location_dataset.get_id_name()[0] )[movers_index]
            movers_location_index = location_dataset.get_id_index( movers_location_id )
            # backward compatability code for older versions of numpy -- no longer required since we need numpy 1.2.1 or greater
            # new=False argument to histogram tells it to use deprecated behavior for now (to be removed in numpy 1.3)
            # See numpy release notes -- search for histogram
            # if numpy.__version__ >= '1.2.0':
            #    num_of_movers_by_location = histogram( movers_location_index, bins=arange(location_dataset.size()), new=False)[0]
            # else:
            #    num_of_movers_by_location = histogram( movers_location_index, bins=arange(location_dataset.size()))[0]
            num_of_movers_by_location = histogram( movers_location_index, bins=arange(location_dataset.size() +1) )[0]
            num_of_agents_by_location = location_dataset.compute_variables( "number_of_agents=%s.number_of_agents(%s)" % \
                                                                            (location_dataset.dataset_name,
                                                                            agent_dataset.dataset_name),
                                                                            dataset_pool=dataset_pool)
            
            shrink_factor = safe_array_divide( (num_of_agents_by_location - num_of_movers_by_location ).astype('float32'),
                                                num_of_agents_by_location, return_value_if_denominator_is_zero = 1.0  )
            new_values = round_( shrink_factor * location_dataset.get_attribute(this_refinement.location_capacity_attribute) )
            location_dataset.modify_attribute( this_refinement.location_capacity_attribute, 
                                               new_values
                                               )
            self._add_refinement_info_to_dataset(location_dataset, ("refinement_id", "transaction_id"), this_refinement, index=movers_location_index)
            
        agent_dataset.modify_attribute(location_dataset.get_id_name()[0], 
                                       -1 * ones( movers_index.size, dtype='int32' ),
                                       index = movers_index
                                       )
        self._add_refinement_info_to_dataset(agent_dataset, self.id_names, this_refinement, index=movers_index)
示例#56
0
def clip_to_zero_if_needed(values, function=""):
    from numpy import clip

    global_min = values.min()
    if global_min >= 0:
        return values
    global_max = values.max()
    logger.log_warning("Negative values detected in function/method '%s'" % function)
    logger.log_warning("Minimum: %s. Negative values clipped to zero." % global_min)
示例#57
0
 def load(self):
     if os.path.exists(self.filename):
         stream = file(self.filename, 'r')
         #self.shifters = yaml.load(stream, OrderedDictYAMLLoader)
         yaml_dict = yaml.load(stream)
         self.shifters = OrderedDict(sorted(yaml_dict.items(),key=lambda x: x[0]))
         stream.close()
     else:
         logger.log_warning("File %s does not exist; return {}" % self.filename)
         self.shifters = {}