示例#1
0
def prepare_specification_and_coefficients(specification_storage=None, 
                                           specification_table=None, 
                                           coefficients_storage=None,
                                           coefficients_table=None, 
                                           sample_coefficients=False, 
                                           cache_storage=None, 
                                           **kwargs):
    """ Load specification and coefficients from given tables in given storages.
    If 'sample_coefficients' is True, coefficients are sampled from given distribution. In such a case,
    either an argument 'distribution' should be given (equal either 'normal' or 'uniform'), 
    or argument distribution_dictionary should be given containing details about sampling specific coefficients
    (see docstring for method sample_values in opus_core/coefficients.py).
    If coefficients are sampled, the new values are flushed into cache.
    """
    specification = None
    if specification_storage is not None and specification_table is not None:
        specification = EquationSpecification(in_storage=specification_storage)
        specification.load(in_table_name=specification_table)
    coefficients = None
    if (coefficients_storage is not None) and (coefficients_table is not None):
        coefficients = Coefficients(in_storage=coefficients_storage)
        coefficients.load(in_table_name=coefficients_table)
        if sample_coefficients:
            coefficients = coefficients.sample_values(**kwargs)
            coefficients.flush_coefficients(coefficients_table, cache_storage)

    return (specification, coefficients)
示例#2
0
def prepare_specification_and_coefficients(specification_storage=None,
                                           specification_table=None,
                                           coefficients_storage=None,
                                           coefficients_table=None,
                                           sample_coefficients=False,
                                           cache_storage=None,
                                           **kwargs):
    """ Load specification and coefficients from given tables in given storages.
    If 'sample_coefficients' is True, coefficients are sampled from given distribution. In such a case,
    either an argument 'distribution' should be given (equal either 'normal' or 'uniform'), 
    or argument distribution_dictionary should be given containing details about sampling specific coefficients
    (see docstring for method sample_values in opus_core/coefficients.py).
    If coefficients are sampled, the new values are flushed into cache.
    """
    specification = None
    if specification_storage is not None and specification_table is not None:
        specification = EquationSpecification(in_storage=specification_storage)
        specification.load(in_table_name=specification_table)
    coefficients = None
    if (coefficients_storage is not None) and (coefficients_table is not None):
        coefficients = Coefficients(in_storage=coefficients_storage)
        coefficients.load(in_table_name=coefficients_table)
        if sample_coefficients:
            coefficients = coefficients.sample_values(**kwargs)
            coefficients.flush_coefficients(coefficients_table, cache_storage)

    return (specification, coefficients)
示例#3
0
class HLCMEstimator(Estimator):

    def estimate(self, spec_var=None, spec_py=None,
            submodel_string = "workers", 
            agent_sample_rate=0.005, alt_sample_size=None):
        """

        """
        CLOSE = 0.001
        sampler = "opus_core.samplers.weighted_sampler"
        if alt_sample_size==None:
            sampler = None
        
        date_time_str=strftime("%Y_%m_%d__%H_%M", localtime())
        agent_sample_rate_str = "__ASR_" + str(agent_sample_rate)
        alt_sample_size_str = "_ALT_" + str(alt_sample_size)
        info_file = date_time_str + agent_sample_rate_str + alt_sample_size_str + "__info.txt"
        logger.enable_file_logging(date_time_str + agent_sample_rate_str + alt_sample_size_str + "__run.txt")
        logger.enable_memory_logging()
        logger.log_status("Constrained Estimation with agent sample rate of %s and alternatvie sample size %s\n" % \
                          (agent_sample_rate, alt_sample_size))
                
        t1 = time()
        
        SimulationState().set_current_time(2000)

        self.nbs = SessionConfiguration().get_dataset_from_pool("neighborhood")
        self.hhs = SessionConfiguration().get_dataset_from_pool('household')

        depts, lambda_value = compute_lambda(self.nbs)
        supply, vacancy_rate = compute_supply_and_vacancy_rate(self.nbs, depts, lambda_value)
        self.nbs.set_values_of_one_attribute("supply", supply)
        dataset_pool = SessionConfiguration().get_dataset_pool()
        dataset_pool.add_datasets_if_not_included({'vacancy_rate': vacancy_rate,
                                                   'sample_rate':agent_sample_rate
                                                   })
        SessionConfiguration()["CLOSE"] = CLOSE
        SessionConfiguration()['info_file'] = info_file
        
        if self.save_estimation_results:
            out_storage = StorageFactory().build_storage_for_dataset(type='sql_storage', 
                storage_location=self.out_con)
        
        if spec_py is not None:
            reload(spec_py)
            spec_var = spec_py.specification
        
        if spec_var is not None:
            self.specification = load_specification_from_dictionary(spec_var)
        else:
            in_storage = StorageFactory().build_storage_for_dataset(type='sql_storage', 
                storage_location=self.in_con)
            self.specification = EquationSpecification(in_storage=in_storage)
            self.specification.load(in_table_name="household_location_choice_model_specification")

        #submodel_string = "workers"
        
        seed(71) # was: seed(71,110)
        self.model_name = "household_location_choice_model"

        model = HouseholdLocationChoiceModelCreator().get_model(location_set=self.nbs, 
                                                                submodel_string=submodel_string,
                                                                sampler = sampler,
                                                                estimation_size_agents = agent_sample_rate * 100/20,    
                                                                # proportion of the agent set that should be used for the estimation,
                                                                # 
                                                                sample_size_locations = alt_sample_size,  # choice set size (includes current location)
                                                                compute_capacity_flag = True,
                                                                probabilities = "opus_core.mnl_probabilities",
                                                                choices = "urbansim.lottery_choices",
                                                                run_config = Resources({"capacity_string":"supply"}), 
                                                                estimate_config = Resources({"capacity_string":"supply","compute_capacity_flag":True}))

        #TODO: since households_for_estimation currently is the same as households, create_households_for_estimation
        #becomes unnecesarry
        #agent_set, agents_index_for_estimation  =  create_households_for_estimation(self.hhs, self.in_con)
        agent_set = self.hhs; agents_index_for_estimation = arange(self.hhs.size())
        self.result = model.estimate(self.specification, 
                                     agent_set=agent_set, 
                                     agents_index=agents_index_for_estimation, 
                                     debuglevel=self.debuglevel,
                                     procedure="urbansim.constrain_estimation_bhhh_two_loops" ) #"urbansim.constrain_estimation_bhhh"

        #save estimation results
        if self.save_estimation_results:    
            self.save_results(out_storage)
            
        logger.log_status("Estimation done. " + str(time()-t1) + " s")
class HLCMEstimator(Estimator):

    def estimate(self, spec_var=None, spec_py=None,
                 movers_index = None,
                 submodel_string = "", 
                 alt_sample_size=None,
                 sampler = "opus_core.samplers.weighted_sampler",
                 weight_string = "supply",
                 aggregate_demand = False,
                 submarket_definition = ('zone', 'building_type_id'),
                 sample_size_from_each_stratum = 50
                 ):        
        """

        """
        
        t1 = time()        
        SimulationState().set_current_time(2000)

        dataset_pool=SessionConfiguration().get_dataset_pool()
        
        buildings = dataset_pool.get_dataset("building")
        agent_set = dataset_pool.get_dataset('household')
        #buildings.load_dataset()

        submarket_geography = dataset_pool.get_dataset(submarket_definition[0])
        intermediates = '[]'
        if submarket_geography.dataset_name == 'zone':
            intermediates = '[parcel]'
        elif submarket_geography.dataset_name == 'faz':
            intermediates = '[zone, parcel]'
        elif submarket_geography.dataset_name == 'large_area':
            intermediates = '[faz, zone, parcel]'
        
        submarket_id_expression = 'building.disaggregate(%s.%s, intermediates=%s) * 100' % \
                                                (submarket_geography.dataset_name, submarket_geography.get_id_name()[0],
                                                 intermediates)
        submarket_variables = ['%s=numpy.ceil(submarket.submarket_id / 100)' % submarket_geography.get_id_name()[0]]

        if submarket_definition[1] == 'residential_building_type_id':
            set_residential_building_types(dataset_pool.get_dataset("building_type"), dataset_pool.get_dataset("building"))
        if submarket_definition[1] != '':
            submarket_id_expression = submarket_id_expression + ' + building.%s'  % submarket_definition[1] 
            submarket_variables.append(submarket_definition[1] + '=submarket.submarket_id % 100' ) 
            
        submarkets = define_submarket(buildings, 
                                      submarket_id_expression,
                                      #"urbansim_parcel.building.zone_id*100 + building.residential_building_type_id",
                                      #"building.disaggregate(faz.large_area_id, intermediates=[zone, parcel]) * 100 + building.residential_building_type_id",
                                      compute_variables=submarket_variables + [
                                          "residential_units=submarket.aggregate(building.residential_units)",
                                          "number_of_buildings_with_non_zero_units=submarket.aggregate(building.residential_units > 0 )",
                                          "number_of_surveyed_households=submarket.aggregate(household.household_id > 5000000, intermediates=[building])",                                                     
                                                     ],
                                      #filter = 'numpy.logical_and(submarket.number_of_surveyed_households > 0, submarket.residential_units>0)',
                                      #filter = 'submarket.supply > 0',
                                      #"psrc_parcel.building.large_area_id*100 + building.residential_building_type_id",
                                      #compute_variables=['residential_building_type_id=submarket.submarket_id % 100',
                                                         #'large_area_id=numpy.ceil(submarket.submarket_id / 100)']
                                      #"psrc_parcel.building.large_area_id",
                                      #compute_variables=[#'residential_building_type_id=submarket.submarket_id % 100',
                                                         #'large_area_id=numpy.ceil(submarket.submarket_id)']

                                  )

        dataset_pool.add_datasets_if_not_included({'submarket':submarkets})        
        compute_lambda_and_supply(buildings, agent_set, movers_index, submarkets)

        submarket_filter = 'submarket.supply > 0'
        if submarket_filter is not None:
            from numpy import logical_not
            submarkets.remove_elements(index= where( logical_not(submarkets.compute_variables(submarket_filter)) )[0])
            submarkets.touch_attribute(submarkets.get_id_name()[0])
            buildings.touch_attribute(submarkets.get_id_name()[0])
            
        if self.save_estimation_results:
            out_storage = StorageFactory().build_storage_for_dataset(type='sql_storage', 
                storage_location=self.out_con)
        
        if spec_py is not None:
            reload(spec_py)
            spec_var = spec_py.specification
        
        if spec_var is not None:
            self.specification = load_specification_from_dictionary(spec_var)
        else:
            in_storage = StorageFactory().build_storage_for_dataset(type='sql_storage', 
                                                                    storage_location=self.in_con)
            self.specification = EquationSpecification(in_storage=in_storage)
            self.specification.load(in_table_name="household_location_choice_model_specification")
        
        self.model_name = "household_location_choice_model"

        agent_set, agents_index_for_estimation = get_households_for_estimation(agent_set,
                                                                               AttributeCache(),
                                                                               "households_for_estimation",
                                                                               exclude_condition="household.disaggregate(submarket.submarket_id, intermediates=[building])<=0",
                                                                           )
        agent_set.compute_variables("submarket_id=household.disaggregate(building.submarket_id)")
        agent_sample_rate = agents_index_for_estimation.size / float(movers_index.size)
        dataset_pool.add_datasets_if_not_included({'sample_rate': agent_sample_rate })

        if aggregate_demand:
            location_set = buildings
            aggregate_dataset = 'submarket'
            #weight_string = 'inv_submarket_supply = 1.0 / (building.disaggregate(submarket.number_of_agents(building))).astype(float32) * (building.disaggregate(submarket.submarket_id) > 0)'
            #weight_string = 'submarket_supply = (building.disaggregate(submarket.supply) > 0).astype(int32)'
            #weight_string = 'submarket_supply = building.disaggregate(submarket.supply) * (building.disaggregate(submarket.submarket_id) > 0).astype(float32)'
        else:
            location_set = submarkets
            aggregate_dataset = None
            #weight_string = 'supply'

        model = HouseholdLocationChoiceModelCreator().get_model(location_set=location_set,
                                                                #location_set=submarkets,  
                                                                #filter = 'building.disaggregate(submarket.submarket_id) > 0',
                                                                #filter = 'numpy.logical_and(submarket.number_of_surveyed_households > 0, submarket.residential_units>0)',
                                                                #filter = 'building.disaggregate(numpy.logical_and(submarket.number_of_buildings_with_non_zero_units > 5000, submarket.number_of_surveyed_households > 0))',
                                                                submodel_string=submodel_string,
                                                                sampler = sampler,
                                                                #estimation_size_agents = agent_sample_rate * 100/20,    
                                                                # proportion of the agent set that should be used for the estimation
                                                                sample_size_locations = alt_sample_size,
                                                                #sample_proportion_locations = 1.0/1000,
                                                                # choice set size (includes current location)
                                                                compute_capacity_flag = True,
                                                                probabilities = "opus_core.mnl_probabilities",
                                                                choices = "urbansim.lottery_choices",
                                                                #run_config = Resources({"capacity_string":"supply"}), 
                                                                estimate_config = Resources({"capacity_string":"supply",
                                                                                             "weights_for_estimation_string":weight_string,
                                                                                             "aggregate_to_dataset":aggregate_dataset,
                                                                                             "stratum": "building.disaggregate(submarket.submarket_id)",
                                                                                             "sample_size_from_each_stratum": sample_size_from_each_stratum,
                                                                                             #"index2":where(submarkets.compute_variables('submarket.number_of_surveyed_households > 0'))[0],
                                                                                             #"sample_rate": 1.0/5000,
                                                                                             #"sample_size_from_chosen_stratum": 0,
                                                                                             "include_chosen_choice": True
                                                                                             }))

        
        # was dataset_pool.add_datasets_if_not_included({'sample_rate':agent_sample_rate})        
        self.result = model.estimate(self.specification, 
                                     agent_set=agent_set, 
                                     agents_index=agents_index_for_estimation, 
                                     debuglevel=self.debuglevel,
                                     procedure="urbansim.constrain_estimation_bhhh_two_loops" ) #"urbansim.constrain_estimation_bhhh"

        #save estimation results
        if self.save_estimation_results:    
            self.save_results(out_storage)
            
        logger.log_status("Estimation done. " + str(time()-t1) + " s")
示例#5
0
    submodel_string = test_settings[model]['submodel_string']
    from opus_core.variables.variable_name import VariableName
    from opus_core.variables.attribute_type import AttributeType
    submodel_string_short_name = VariableName(submodel_string).get_short_name()
    if submodel_string_short_name in agent_set.get_known_attribute_names():
        agent_set.add_attribute(agent_set.get_attribute(submodel_string_short_name), "submodel",
                                metadata=AttributeType.PRIMARY)
    else:
        agent_set.compute_variables("submodel = %s" % submodel_string)

    specification_table = test_settings[model]['specification_table']
    coefficients_table = test_settings[model]['coefficient_table']

    base_cache_storage = AttributeCache().get_flt_storage_for_year(base_year)
    specification = EquationSpecification(in_storage=base_cache_storage)
    specification.load(in_table_name=specification_table)
    coefficients = Coefficients(in_storage=base_cache_storage)
    coefficients.load(in_table_name=coefficients_table)
    specified_coefficients = SpecifiedCoefficients().create(coefficients, specification, neqs=1)
    variables = specified_coefficients.get_full_variable_names_without_constants()

    choice_filter_index = None #where(datasets[choice_set_name].get_attribute('zone_id') == 742)[0]

    for year in years:
        SimulationState().set_current_time(year)
        SessionConfiguration().get_dataset_pool().remove_all_datasets()
        dataset_pool = DatasetPool(
            package_order=['psrc','urbansim','opus_core'],
            storage=AttributeCache())

        choice_set = dataset_pool.get_dataset(choice_set_name)
示例#6
0
    'flt_storage', storage_location="/home/hana/bandera/urbansim/data/GPSRC"),
                               in_table_name="hh")
agents_psrc.summary()

dbcon = []

config = ScenarioDatabaseConfiguration()
server = DatabaseServer(config)
db = server.get_database('PSRC_2000_baseyear')

storage = StorageFactory().get_storage('sql_storage', storage_location=db)

coefficients = Coefficients(in_storage=storage)
coefficients.load(in_table_name="household_location_choice_model_coefficients")
specification = EquationSpecification(in_storage=storage)
specification.load(
    in_table_name="household_location_choice_model_specification")
specification.get_variable_names()

hlcm_psrc = HouseholdLocationChoiceModelCreator().get_model(
    location_set=locations_psrc,
    sampler="opus_core.samplers.weighted_sampler",
    sample_size_locations=10,
    choices="urbansim.lottery_choices",
    compute_capacity_flag=True,
    run_config=Resources(
        {"capacity_string": "urbansim.gridcell.vacant_residential_units"}))

result = hlcm_psrc.run(specification,
                       coefficients,
                       agents_psrc,
                       agents_index=sample(range(agents_psrc.size()), 500),
示例#7
0
         content_coefficients+='{0:40s} {1:10s} {2:10s} {3:10s} {4:10s}\n'.format(names[i], estimates[i], std_errors[i], '-', t_stats[i])
     else:            
         #content_coefficients+='{0:40s} {1:10f} {2:10f} {3:10s} {4:10f}\n'.format(names[i], estimates[i], std_errors[i], sub_ids[i], t_stats[i]) #'{0:30s} {1:10f} {2:10f} {3:10s} {4:10f}\n'.format(names[i], estimates[i], std_errors[i], sub_ids[i], t_stats[i])
         content_coefficients+='{0:40s} {1:10s} {2:10s} {3:10f} {4:10s}\n'.format(names[i], estimates[i], std_errors[i], sub_ids[i], t_stats[i])
 content_coefficients+='\n\r'
 
 # now do the same for the specification ...
 
 model = model.replace('coefficients', 'specification')
 
 content_specification+='\n\r'  
 content_specification+='Model: %s\n\r' %model
 
 # get model specification ...
 specification = EquationSpecification(in_storage=storage)
 specification.load(in_table_name=model)
 
 # store specification directly as csv
 #from opus_core.store.csv_storage import csv_storage
 #out_path = os.path.join(path, model)
 #out_storage = csv_storage(storage_location = out_path)
 #specification.write(out_storage=out_storage, out_table_name=model) # writes out specifications as csv file
 
 # ... prepare for printing ...
 names_spec = specification.get_coefficient_names()
 submodels_spec = specification.get_submodels()
 long_var_names_spec = specification.get_long_variable_names()
 
 # ... finally print out all available data 
 content_specification+='{0:40s} {1:10s} {2:20s}\n\r'.format('coefficient_name', 'submodel_id', 'variable_name')
 for x in range(len(names_spec)):
    from opus_core.variables.attribute_type import AttributeType
    submodel_string_short_name = VariableName(submodel_string).get_short_name()
    if submodel_string_short_name in agent_set.get_known_attribute_names():
        agent_set.add_attribute(
            agent_set.get_attribute(submodel_string_short_name),
            "submodel",
            metadata=AttributeType.PRIMARY)
    else:
        agent_set.compute_variables("submodel = %s" % submodel_string)

    specification_table = test_settings[model]['specification_table']
    coefficients_table = test_settings[model]['coefficient_table']

    base_cache_storage = AttributeCache().get_flt_storage_for_year(base_year)
    specification = EquationSpecification(in_storage=base_cache_storage)
    specification.load(in_table_name=specification_table)
    coefficients = Coefficients(in_storage=base_cache_storage)
    coefficients.load(in_table_name=coefficients_table)
    specified_coefficients = SpecifiedCoefficients().create(coefficients,
                                                            specification,
                                                            neqs=1)
    variables = specified_coefficients.get_full_variable_names_without_constants(
    )

    choice_filter_index = None  #where(datasets[choice_set_name].get_attribute('zone_id') == 742)[0]

    for year in years:
        SimulationState().set_current_time(year)
        SessionConfiguration().get_dataset_pool().remove_all_datasets()
        dataset_pool = DatasetPool(
            package_order=['psrc', 'urbansim', 'opus_core'],
class HLCMEstimator(Estimator):
    def estimate(self,
                 spec_var=None,
                 spec_py=None,
                 submodel_string="workers",
                 agent_sample_rate=0.005,
                 alt_sample_size=None):
        """

        """
        CLOSE = 0.001
        sampler = "opus_core.samplers.weighted_sampler"
        if alt_sample_size == None:
            sampler = None

        date_time_str = strftime("%Y_%m_%d__%H_%M", localtime())
        agent_sample_rate_str = "__ASR_" + str(agent_sample_rate)
        alt_sample_size_str = "_ALT_" + str(alt_sample_size)
        info_file = date_time_str + agent_sample_rate_str + alt_sample_size_str + "__info.txt"
        logger.enable_file_logging(date_time_str + agent_sample_rate_str +
                                   alt_sample_size_str + "__run.txt")
        logger.enable_memory_logging()
        logger.log_status("Constrained Estimation with agent sample rate of %s and alternatvie sample size %s\n" % \
                          (agent_sample_rate, alt_sample_size))

        t1 = time()

        SimulationState().set_current_time(2000)

        self.nbs = SessionConfiguration().get_dataset_from_pool("neighborhood")
        self.hhs = SessionConfiguration().get_dataset_from_pool('household')

        depts, lambda_value = compute_lambda(self.nbs)
        supply, vacancy_rate = compute_supply_and_vacancy_rate(
            self.nbs, depts, lambda_value)
        self.nbs.set_values_of_one_attribute("supply", supply)
        dataset_pool = SessionConfiguration().get_dataset_pool()
        dataset_pool.add_datasets_if_not_included({
            'vacancy_rate':
            vacancy_rate,
            'sample_rate':
            agent_sample_rate
        })
        SessionConfiguration()["CLOSE"] = CLOSE
        SessionConfiguration()['info_file'] = info_file

        if self.save_estimation_results:
            out_storage = StorageFactory().build_storage_for_dataset(
                type='sql_storage', storage_location=self.out_con)

        if spec_py is not None:
            reload(spec_py)
            spec_var = spec_py.specification

        if spec_var is not None:
            self.specification = load_specification_from_dictionary(spec_var)
        else:
            in_storage = StorageFactory().build_storage_for_dataset(
                type='sql_storage', storage_location=self.in_con)
            self.specification = EquationSpecification(in_storage=in_storage)
            self.specification.load(
                in_table_name="household_location_choice_model_specification")

        #submodel_string = "workers"

        seed(71)  # was: seed(71,110)
        self.model_name = "household_location_choice_model"

        model = HouseholdLocationChoiceModelCreator().get_model(
            location_set=self.nbs,
            submodel_string=submodel_string,
            sampler=sampler,
            estimation_size_agents=agent_sample_rate * 100 / 20,
            # proportion of the agent set that should be used for the estimation,
            #
            sample_size_locations=
            alt_sample_size,  # choice set size (includes current location)
            compute_capacity_flag=True,
            probabilities="opus_core.mnl_probabilities",
            choices="urbansim.lottery_choices",
            run_config=Resources({"capacity_string": "supply"}),
            estimate_config=Resources({
                "capacity_string": "supply",
                "compute_capacity_flag": True
            }))

        #TODO: since households_for_estimation currently is the same as households, create_households_for_estimation
        #becomes unnecesarry
        #agent_set, agents_index_for_estimation  =  create_households_for_estimation(self.hhs, self.in_con)
        agent_set = self.hhs
        agents_index_for_estimation = arange(self.hhs.size())
        self.result = model.estimate(
            self.specification,
            agent_set=agent_set,
            agents_index=agents_index_for_estimation,
            debuglevel=self.debuglevel,
            procedure="urbansim.constrain_estimation_bhhh_two_loops"
        )  #"urbansim.constrain_estimation_bhhh"

        #save estimation results
        if self.save_estimation_results:
            self.save_results(out_storage)

        logger.log_status("Estimation done. " + str(time() - t1) + " s")
示例#10
0
agents_psrc.summary()

dbcon = []

config = ScenarioDatabaseConfiguration()
server = DatabaseServer(config)
db = server.get_database('PSRC_2000_baseyear')
                   
storage = StorageFactory().get_storage(
    'sql_storage',
    storage_location = db)

coefficients = Coefficients(in_storage=storage)
coefficients.load(in_table_name="household_location_choice_model_coefficients")
specification = EquationSpecification(in_storage=storage)
specification.load(in_table_name="household_location_choice_model_specification")
specification.get_variable_names()


hlcm_psrc = HouseholdLocationChoiceModelCreator().get_model(
    location_set = locations_psrc,
    sampler = "opus_core.samplers.weighted_sampler", 
    sample_size_locations=10,
    choices="urbansim.lottery_choices", 
    compute_capacity_flag=True, 
    run_config=Resources({"capacity_string":"urbansim.gridcell.vacant_residential_units"}))

result = hlcm_psrc.run(specification, coefficients, agents_psrc, 
                       agents_index=sample(range(agents_psrc.size()), 500),
                       debuglevel=4)
# run it again
class HLCMEstimator(Estimator):
    def estimate(self,
                 spec_var=None,
                 spec_py=None,
                 movers_index=None,
                 submodel_string="",
                 alt_sample_size=None,
                 sampler="opus_core.samplers.weighted_sampler",
                 weight_string="supply",
                 aggregate_demand=False,
                 submarket_definition=('zone', 'building_type_id'),
                 sample_size_from_each_stratum=50):
        """

        """

        t1 = time()
        SimulationState().set_current_time(2000)

        dataset_pool = SessionConfiguration().get_dataset_pool()

        buildings = dataset_pool.get_dataset("building")
        agent_set = dataset_pool.get_dataset('household')
        #buildings.load_dataset()

        submarket_geography = dataset_pool.get_dataset(submarket_definition[0])
        intermediates = '[]'
        if submarket_geography.dataset_name == 'zone':
            intermediates = '[parcel]'
        elif submarket_geography.dataset_name == 'faz':
            intermediates = '[zone, parcel]'
        elif submarket_geography.dataset_name == 'large_area':
            intermediates = '[faz, zone, parcel]'

        submarket_id_expression = 'building.disaggregate(%s.%s, intermediates=%s) * 100' % \
                                                (submarket_geography.dataset_name, submarket_geography.get_id_name()[0],
                                                 intermediates)
        submarket_variables = [
            '%s=numpy.ceil(submarket.submarket_id / 100)' %
            submarket_geography.get_id_name()[0]
        ]

        if submarket_definition[1] == 'residential_building_type_id':
            set_residential_building_types(
                dataset_pool.get_dataset("building_type"),
                dataset_pool.get_dataset("building"))
        if submarket_definition[1] != '':
            submarket_id_expression = submarket_id_expression + ' + building.%s' % submarket_definition[
                1]
            submarket_variables.append(submarket_definition[1] +
                                       '=submarket.submarket_id % 100')

        submarkets = define_submarket(
            buildings,
            submarket_id_expression,
            #"urbansim_parcel.building.zone_id*100 + building.residential_building_type_id",
            #"building.disaggregate(faz.large_area_id, intermediates=[zone, parcel]) * 100 + building.residential_building_type_id",
            compute_variables=submarket_variables + [
                "residential_units=submarket.aggregate(building.residential_units)",
                "number_of_buildings_with_non_zero_units=submarket.aggregate(building.residential_units > 0 )",
                "number_of_surveyed_households=submarket.aggregate(household.household_id > 5000000, intermediates=[building])",
            ],
            #filter = 'numpy.logical_and(submarket.number_of_surveyed_households > 0, submarket.residential_units>0)',
            #filter = 'submarket.supply > 0',
            #"psrc_parcel.building.large_area_id*100 + building.residential_building_type_id",
            #compute_variables=['residential_building_type_id=submarket.submarket_id % 100',
            #'large_area_id=numpy.ceil(submarket.submarket_id / 100)']
            #"psrc_parcel.building.large_area_id",
            #compute_variables=[#'residential_building_type_id=submarket.submarket_id % 100',
            #'large_area_id=numpy.ceil(submarket.submarket_id)']
        )

        dataset_pool.add_datasets_if_not_included({'submarket': submarkets})
        compute_lambda_and_supply(buildings, agent_set, movers_index,
                                  submarkets)

        submarket_filter = 'submarket.supply > 0'
        if submarket_filter is not None:
            from numpy import logical_not
            submarkets.remove_elements(index=where(
                logical_not(submarkets.compute_variables(submarket_filter)))
                                       [0])
            submarkets.touch_attribute(submarkets.get_id_name()[0])
            buildings.touch_attribute(submarkets.get_id_name()[0])

        if self.save_estimation_results:
            out_storage = StorageFactory().build_storage_for_dataset(
                type='sql_storage', storage_location=self.out_con)

        if spec_py is not None:
            reload(spec_py)
            spec_var = spec_py.specification

        if spec_var is not None:
            self.specification = load_specification_from_dictionary(spec_var)
        else:
            in_storage = StorageFactory().build_storage_for_dataset(
                type='sql_storage', storage_location=self.in_con)
            self.specification = EquationSpecification(in_storage=in_storage)
            self.specification.load(
                in_table_name="household_location_choice_model_specification")

        self.model_name = "household_location_choice_model"

        agent_set, agents_index_for_estimation = get_households_for_estimation(
            agent_set,
            AttributeCache(),
            "households_for_estimation",
            exclude_condition=
            "household.disaggregate(submarket.submarket_id, intermediates=[building])<=0",
        )
        agent_set.compute_variables(
            "submarket_id=household.disaggregate(building.submarket_id)")
        agent_sample_rate = agents_index_for_estimation.size / float(
            movers_index.size)
        dataset_pool.add_datasets_if_not_included(
            {'sample_rate': agent_sample_rate})

        if aggregate_demand:
            location_set = buildings
            aggregate_dataset = 'submarket'
            #weight_string = 'inv_submarket_supply = 1.0 / (building.disaggregate(submarket.number_of_agents(building))).astype(float32) * (building.disaggregate(submarket.submarket_id) > 0)'
            #weight_string = 'submarket_supply = (building.disaggregate(submarket.supply) > 0).astype(int32)'
            #weight_string = 'submarket_supply = building.disaggregate(submarket.supply) * (building.disaggregate(submarket.submarket_id) > 0).astype(float32)'
        else:
            location_set = submarkets
            aggregate_dataset = None
            #weight_string = 'supply'

        model = HouseholdLocationChoiceModelCreator().get_model(
            location_set=location_set,
            #location_set=submarkets,
            #filter = 'building.disaggregate(submarket.submarket_id) > 0',
            #filter = 'numpy.logical_and(submarket.number_of_surveyed_households > 0, submarket.residential_units>0)',
            #filter = 'building.disaggregate(numpy.logical_and(submarket.number_of_buildings_with_non_zero_units > 5000, submarket.number_of_surveyed_households > 0))',
            submodel_string=submodel_string,
            sampler=sampler,
            #estimation_size_agents = agent_sample_rate * 100/20,
            # proportion of the agent set that should be used for the estimation
            sample_size_locations=alt_sample_size,
            #sample_proportion_locations = 1.0/1000,
            # choice set size (includes current location)
            compute_capacity_flag=True,
            probabilities="opus_core.mnl_probabilities",
            choices="urbansim.lottery_choices",
            #run_config = Resources({"capacity_string":"supply"}),
            estimate_config=Resources({
                "capacity_string": "supply",
                "weights_for_estimation_string": weight_string,
                "aggregate_to_dataset": aggregate_dataset,
                "stratum": "building.disaggregate(submarket.submarket_id)",
                "sample_size_from_each_stratum": sample_size_from_each_stratum,
                #"index2":where(submarkets.compute_variables('submarket.number_of_surveyed_households > 0'))[0],
                #"sample_rate": 1.0/5000,
                #"sample_size_from_chosen_stratum": 0,
                "include_chosen_choice": True
            }))

        # was dataset_pool.add_datasets_if_not_included({'sample_rate':agent_sample_rate})
        self.result = model.estimate(
            self.specification,
            agent_set=agent_set,
            agents_index=agents_index_for_estimation,
            debuglevel=self.debuglevel,
            procedure="urbansim.constrain_estimation_bhhh_two_loops"
        )  #"urbansim.constrain_estimation_bhhh"

        #save estimation results
        if self.save_estimation_results:
            self.save_results(out_storage)

        logger.log_status("Estimation done. " + str(time() - t1) + " s")