def test_join_by_rows_for_char_arrays(self): from numpy import alltrue storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='dataset1', table_data={ 'id':array([2,4,6,8]), 'attr':array(['4','7','2','1']) } ) storage.write_table( table_name='dataset2', table_data={ 'id':array([1,5,9]), 'attr':array(['55','66','100']) } ) ds1 = Dataset(in_storage=storage, in_table_name='dataset1', id_name='id') ds2 = Dataset(in_storage=storage, in_table_name='dataset2', id_name='id') ds1.join_by_rows(ds2) self.assert_(alltrue(ds1.get_attribute('attr') == array(['4','7','2','1','55','66','100']))) self.assert_(alltrue(ds2.get_attribute('attr') == array(['55','66','100'])))
def create_edges(self, input_file_dir, input_file_name, output_file_name): storage = StorageFactory().get_storage(type='tab_storage', subdir='store', storage_location=input_file_dir) dataset = Dataset(in_storage = storage, id_name = ['stop_id','sch_time'], in_table_name = input_file_name) n = dataset.size() trip_ids = dataset.get_attribute("stop_id") unique_trip_ids = unique(trip_ids) source_list = list() target_list = list() time_list = list() for trip in unique_trip_ids: idx = where(dataset.get_attribute("stop_id") == trip)[0] nodes = dataset.get_attribute_by_index("node_id", idx) times = dataset.get_attribute_by_index("sch_time", idx) for inode in range(nodes.size-1): source_list.append(nodes[inode]) target_list.append(nodes[inode+1]) time_list.append(times[inode+1] - times[inode]) storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='edges', table_data={ 'edge_id': arange(len(source_list))+1, 'source': array(source_list), #type=int64), # <<<< OUTPUT FIELD, USE array 'target': array(target_list), #type=int64), # <<<< OUTPUT FIELD, USE array 'cost': array(time_list, dtype=int32) } ) edges = Dataset(in_storage=storage, in_table_name='edges', id_name = "edge_id") edges.write_dataset(attributes = ["source", "target", "cost"], out_storage = storage, out_table_name = output_file_name)
def test_join_by_rows(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='dataset1', table_data={ 'id':array([2,4,6,8]), 'attr':array([4,7,2,1]) } ) storage.write_table( table_name='dataset2', table_data={ 'id':array([1,5,9]), 'attr':array([55,66,100]) } ) ds1 = Dataset(in_storage=storage, in_table_name='dataset1', id_name='id') ds2 = Dataset(in_storage=storage, in_table_name='dataset2', id_name='id') ds1.join_by_rows(ds2) self.assert_(ma.allclose(ds1.get_attribute('attr'), array([4,7,2,1,55,66,100]))) self.assert_(ma.allclose(ds2.get_attribute('attr'), array([55,66,100])))
def test_change_three_elements(self): """3 values are in common - change them to -1. Other attributes stay unchanged.""" data = { 'my_id': array([1,2,3,4,5]), 'attr': array([10,2,3,50,2]), 'attr2': array([4,3,2,5,3]) } data2 = { 'attr': array([2,6,7,3]) } storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='dataset', table_data=data) dataset = Dataset(in_storage = storage, in_table_name='dataset', id_name='my_id' ) storage.write_table(table_name='dataset2', table_data=data2) dataset2 = Dataset(in_storage = storage, in_table_name='dataset2', id_name='attr' ) JoinAttributeModificationModel().run(dataset,dataset2, value=-1) self.assertEqual(ma.allequal(dataset.get_attribute('attr'), array([10,-1,-1,50,-1])), True) self.assertEqual(ma.allequal(dataset.get_attribute('attr2'), data['attr2']), True)
class SimpleModelTest(opus_unittest.OpusTestCase): def setUp(self): self.data = { 'id': arange(10) + 1, 'attribute': array([3000, 2800, 1000, 550, 600, 1000, 2000, 500, 100, 1000]) } storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='dataset', table_data=self.data) self.dataset = Dataset(in_storage=storage, in_table_name='dataset', id_name=['id']) def test_simple_model(self): m = SimpleModel() m.run(self.dataset, 'sqrt(dataset.attribute)', outcome_attribute='sqrtattr') self.assertEqual( ma.allclose(self.dataset.get_attribute('sqrtattr'), sqrt(self.data['attribute'])), True) self.assertEqual( 'sqrtattr' in self.dataset.get_primary_attribute_names(), True) def test_simple_model_without_outcome_attribute(self): m = SimpleModel() m.run(self.dataset, 'lattr = ln(dataset.attribute)') self.assertEqual( ma.allclose(self.dataset.get_attribute('lattr'), log(self.data['attribute'])), True) self.assertEqual('lattr' in self.dataset.get_primary_attribute_names(), True)
def test_simple_lag_variable(self): test_data = { 1000:{ 'tests':{ 'id':array([1,2,3]), 'attr1':array([10,20,30]), }, }, 1001:{ 'tests':{ 'id':array([1,2,3]), 'attr1':array([111,222,333]), }, }, } cache_creator = CreateTestAttributeCache() cache_creator.create_attribute_cache_with_data(self._temp_dir, test_data) SimulationState().set_current_time(1001) attribute_cache = AttributeCache() SessionConfiguration(new_instance=True, package_order=['opus_core'], in_storage=attribute_cache) ds = Dataset(in_storage = attribute_cache, in_table_name = 'tests', id_name = ['id'], dataset_name = 'tests') ds.compute_variables(['opus_core.tests.attr1']) self.assert_(ma.allequal(ds.get_attribute('attr1'), array([111,222,333]))) ds.compute_variables(['opus_core.tests.attr1_lag1']) self.assert_(ma.allequal(ds.get_attribute('attr1_lag1'), array([10,20,30])))
def test_flush_dataset_correct_data(self): job_set = Dataset(self.job_set_resources, dataset_name="jobs") job_set.add_attribute(self.job_id, "job_id", metadata=AttributeType.PRIMARY) job_set.add_attribute(self.expected_sic_data, "sic", metadata=AttributeType.COMPUTED) job_set.flush_dataset() returned_sic_data = job_set.get_attribute("sic") returned_id_data = job_set.get_attribute("job_id") self.assert_(ma.allequal(returned_id_data,self.job_id)) self.assert_(ma.allequal(returned_sic_data,self.expected_sic_data))
class SimpleModelTest(opus_unittest.OpusTestCase): def setUp(self): self.data = { 'id': arange(10)+1, 'attribute': array([3000,2800,1000,550,600,1000,2000,500,100,1000]), 'sqrt_outcome': zeros(10) } storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name = 'dataset', table_data = self.data) self.dataset = Dataset(in_storage=storage, in_table_name='dataset', id_name=['id']) def test_simple_model(self): m = SimpleModel() m.run(self.dataset, 'sqrt(dataset.attribute)', outcome_attribute='sqrtattr') self.assertEqual(ma.allclose(self.dataset.get_attribute('sqrtattr'), sqrt(self.data['attribute'])), True) self.assertEqual('sqrtattr' in self.dataset.get_primary_attribute_names(), True) def test_simple_model_with_filter(self): m = SimpleModel() m.run(self.dataset, 'sqrt(dataset.attribute)', outcome_attribute='sqrt_outcome', dataset_filter='dataset.attribute>1000') expected = array([1, 1, 0, 0, 0, 0, 1, 0, 0, 0]) * sqrt(self.data['attribute']) self.assertEqual(ma.allclose(self.dataset.get_attribute('sqrt_outcome'), expected), True) self.assertEqual('sqrt_outcome' in self.dataset.get_primary_attribute_names(), True) def MASKEDtest_simple_model_with_random_filter(self): m = SimpleModel() m.run(self.dataset, 'sqrt(dataset.attribute)', outcome_attribute='sqrt_outcome', dataset_filter='(dataset.attribute>=1000) & (random_like(dataset.attribute)<=0.5)', ) con_filter = self.dataset['attribute']>=1000 results = self.dataset['sqrt_outcome'][con_filter] expected = sqrt(self.data['attribute'])[con_filter] #test half of the elements passing filter are being sqrt self.assertEqual((results==expected).sum(), expected.size/2) self.assertEqual((results!=expected).sum(), expected.size/2) def test_simple_model_without_outcome_attribute(self): m = SimpleModel() m.run(self.dataset, 'lattr = ln(dataset.attribute)') self.assertEqual(ma.allclose(self.dataset.get_attribute('lattr'), log(self.data['attribute'])), True) self.assertEqual('lattr' in self.dataset.get_primary_attribute_names(), True) def test_simple_model_with_outcome_values(self): m = SimpleModel() m.run(self.dataset, outcome_attribute='iniattr', outcome_values=zeros(10)-1) self.assertEqual(ma.allclose(self.dataset.get_attribute('iniattr'), array(10*[-1])), True) self.assertEqual('iniattr' in self.dataset.get_primary_attribute_names(), True) # run with filter m.run(self.dataset, outcome_attribute='iniattr', outcome_values=arange(10)+1, dataset_filter='dataset.attribute>1000') expected = array([1, 2, -1, -1, -1, -1, 7, -1, -1, -1]) self.assertEqual(ma.allclose(self.dataset.get_attribute('iniattr'), expected), True)
def _determine_exogenous_attribute_names(self, resources): exogenous_attribute_names = {} try: storage = copy.deepcopy(resources['in_storage']) except: pass else: exogenous_relationships_dataset = Dataset( in_storage=storage, in_table_name='exogenous_relationships', id_name='exogenous_id') try: base_tables = exogenous_relationships_dataset.get_attribute( 'base_table') except: logger.log_warning( "An exogenous_relationships table was found, but did not contain the 'base_table' attribute." ) return {} try: exogenous_tables = exogenous_relationships_dataset.get_attribute( 'exogenous_table') except: logger.log_warning( "An exogenous_relationships table was found, but did not contain the 'exogenous_table' attribute." ) return {} relationships = zip(base_tables, exogenous_tables) for base_table, exogenous_table in relationships: if base_table == resources['in_table_name']: exogenous_attributes = self.determine_stored_attribute_names( in_storage=resources['in_storage'], in_table_name=exogenous_table) for exogenous_attribute in exogenous_attributes: try: exogenous_attribute_names[exogenous_attribute] except: exogenous_attribute_names[ exogenous_attribute] = exogenous_table else: raise AttributeError( "Duplicate exogenous " "attribute '%s' found in '%s' and '%s'." % (exogenous_attribute, exogenous_attribute_names[ exogenous_attribute], exogenous_table)) return exogenous_attribute_names
def test_estimation_without_procedure(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='dataset', table_data={ "id": array([1, 2, 3, 4]), "attr1": array([4, 7, 2, 1]), "attr2": array([6.8, 2.6, 0, 1]), "submodel_id": array([1, 2, 2, 1]), "outcome": array([0, 1, 0, 1]) }) ds = Dataset(in_storage=storage, in_table_name='dataset', id_name="id") specification = EquationSpecification( variables=array(["constant", "attr2", "attr1"]), coefficients=array(["constant", "ba2", "ba1"])) model = RegressionModel() model.estimate(specification, ds, "outcome") data_attr1 = model.get_data("ba1") self.assert_(ma.allequal(ds.get_attribute("attr1"), data_attr1), msg="Error in getting data from regression model") specification_2subm = EquationSpecification( variables=array(["constant", "attr2", "constant", "attr1"]), coefficients=array(["constant", "ba2", "constant", "ba1"]), submodels=array([1, 1, 2, 2])) model = RegressionModel(submodel_string="submodel_id") model.estimate(specification_2subm, ds, "outcome") data_attr1 = model.get_data("ba1", 1) self.assert_( data_attr1 == None, msg= "Error in getting data from regression model with multiple submodels." ) data_attr1 = model.get_data("ba1", 2) self.assert_( ma.allequal(ds.get_attribute("attr1")[1:3], data_attr1), msg= "Error in getting data from regression model with multiple submodels." ) d = model.get_data_as_dataset(2) self.assert_( ma.allequal( ds.get_attribute("attr1")[1:3], d.get_attribute("ba1")), msg= "Error in getting data from regression model with multiple submodels." )
def test_flush_dataset_correct_flags(self): job_set = Dataset(self.job_set_resources, dataset_name="jobs") self.assert_(not 'job_id' in job_set.attribute_boxes) job_set.get_attribute("job_id") self.assert_(job_set.attribute_boxes["job_id"].is_in_memory()) self.assert_(not job_set.attribute_boxes["job_id"].is_cached()) job_set.flush_dataset() self.assert_(not job_set.attribute_boxes["job_id"].is_in_memory()) self.assert_(job_set.attribute_boxes["job_id"].is_cached()) job_set.get_attribute("job_id") self.assert_(job_set.attribute_boxes["job_id"].is_in_memory()) self.assert_(job_set.attribute_boxes["job_id"].is_cached())
def test_aggregate_all_mean(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='zones', table_data={ 'my_variable': array([4, 8, 10, 1]), 'id': array([1, 2, 3, 4]), }) storage.write_table(table_name='regions', table_data={ "id": array([1]), }) ds = Dataset(in_storage=storage, in_table_name='zones', id_name="id", dataset_name="myzone") ds2 = Dataset(in_storage=storage, in_table_name='regions', id_name="id", dataset_name="myregion") dataset_pool = DatasetPool() dataset_pool._add_dataset('myzone', ds) dataset_pool._add_dataset('myregion', ds2) ds2.compute_variables([ "myvar = myregion.aggregate_all(myzone.my_variable, function=mean)" ], dataset_pool=dataset_pool) values = ds2.get_attribute("myvar") should_be = array([5.75]) self.assert_(ma.allclose(values, should_be, rtol=1e-6), "Error in aggregate_all_mean")
def test_change_with_index_and_filter(self): """The secondary dataset is restricted by index and filter.""" data = { 'my_id': array([1,2,3,4,5,6]), 'attr': array([10,20,30,50,46,100]), 'attr2': array(6*[1]) } data2 = { 'attr': array([20, 6, 7, 3, 10, 30, 100, 50]) } storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='dataset', table_data=data) dataset = Dataset(in_storage = storage, in_table_name='dataset', id_name='my_id' ) storage.write_table(table_name='dataset2', table_data=data2) dataset2 = Dataset(in_storage = storage, in_table_name='dataset2', id_name='attr' ) JoinAttributeModificationModel().run(dataset,dataset2, index=array([0,1,2,7]), attribute_to_be_modified='attr2', filter='attr > 20') self.assertEqual(ma.allequal(dataset.get_attribute('attr2'), array([1, 1, 1, 0, 1, 1])), True)
def test_change_with_index_and_filter(self): """The secondary dataset is restricted by index and filter.""" data = { 'my_id': array([1, 2, 3, 4, 5, 6]), 'attr': array([10, 20, 30, 50, 46, 100]), 'attr2': array(6 * [1]) } data2 = {'attr': array([20, 6, 7, 3, 10, 30, 100, 50])} storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='dataset', table_data=data) dataset = Dataset(in_storage=storage, in_table_name='dataset', id_name='my_id') storage.write_table(table_name='dataset2', table_data=data2) dataset2 = Dataset(in_storage=storage, in_table_name='dataset2', id_name='attr') JoinAttributeModificationModel().run(dataset, dataset2, index=array([0, 1, 2, 7]), attribute_to_be_modified='attr2', filter='attr > 20') self.assertEqual( ma.allequal(dataset.get_attribute('attr2'), array([1, 1, 1, 0, 1, 1])), True)
def test_compute_does_not_unload_from_memory(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='tests', table_data={ "a_dependent_variable": array([1, 5, 1000]), "id": array([1, 3, 4]) }) dataset = Dataset(in_storage=storage, in_table_name='tests', id_name="id", dataset_name="tests") values = dataset.get_attribute("a_dependent_variable") self.assert_( "a_dependent_variable" in dataset.get_attributes_in_memory()) dataset.compute_variables("opus_core.tests.a_test_variable") self.assert_( "a_dependent_variable" in dataset.get_attributes_in_memory()) self.assert_("a_test_variable" in dataset.get_attributes_in_memory()) # The type of values will be int32 on a 32-bit machine, and int64 on a 64 bit machine if platform.architecture()[0] == '64bit': self.assertEqual(values.dtype.type, int64) else: self.assertEqual(values.dtype.type, int32)
def test_estimation_with_restricted_submodel_size(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='dataset', table_data={ "id":array([1,2,3,4,5,6,7,8,9,10]), "attr1":array([4,7,2,1,5,4,5,3,2,1]), "attr2":array([6.8,2.6,0,1,0,4.3,2.1,6,8,7,]), "submodel_id": array([1,2,2,1,1,1,2,1,1,1]), "outcome": array([0,1,0,1,1,1,0,0,1,1]) } ) ds = Dataset(in_storage=storage, in_table_name='dataset', id_name="id") specification_2subm = EquationSpecification( variables=array(["constant", "attr2", "constant", "attr1"]), coefficients=array(["constant", "ba2", "constant", "ba1"]), submodels = array([1,1,2,2])) model = RegressionModel(submodel_string="submodel_id", estimate_config={'submodel_size_max': 5}) model.estimate(specification_2subm, ds, "outcome", procedure='opus_core.estimate_linear_regression') data_attr1 = model.get_data("ba1", 1) self.assert_(data_attr1 == None, msg = "Error in getting data from regression model with multiple submodels.") data_attr1 = model.get_data("ba2", 1) self.assert_(data_attr1.size == 5, msg = "Error in sub-sampling data in regression model.") data_attr1 = model.get_data("ba1", 2) self.assert_(ma.allequal(ds.get_attribute("attr1")[array([1, 2, 6])], data_attr1), msg = "Error in getting data from regression model with multiple submodels.")
def test_join_datasets_with_2_ids(self): from numpy import ma storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='data1', table_data={ 'id1':array([2,4,2]), 'id2':array([1,2,3]), 'attr1':array([4,7,1]), 'attr2':array([100,0,1000]), } ) storage.write_table( table_name='data2', table_data={ 'id1':array([4,2,2]), 'id2':array([2,3,1]), 'attr1':array([50,60,70]) } ) ds1 = Dataset(in_storage=storage, in_table_name='data1', id_name=['id1', 'id2'], dataset_name='data1') ds2 = Dataset(in_storage=storage, in_table_name='data2', id_name=['id1', 'id2'], dataset_name='data2') ds1.join(ds2, 'attr1') self.assertEqual(ma.allequal(ds1.get_attribute('attr1'), array([70,50,60])), True) self.assertEqual(ma.allequal(ds1.get_attribute('attr2'), array([100,0,1000])), True)
def _determine_exogenous_attribute_names(self, resources): exogenous_attribute_names = {} try: storage = copy.deepcopy(resources['in_storage']) except: pass else: exogenous_relationships_dataset = Dataset( in_storage = storage, in_table_name = 'exogenous_relationships', id_name = 'exogenous_id' ) try: base_tables = exogenous_relationships_dataset.get_attribute('base_table') except: logger.log_warning("An exogenous_relationships table was found, but did not contain the 'base_table' attribute.") return {} try: exogenous_tables = exogenous_relationships_dataset.get_attribute('exogenous_table') except: logger.log_warning("An exogenous_relationships table was found, but did not contain the 'exogenous_table' attribute.") return {} relationships = zip(base_tables, exogenous_tables) for base_table, exogenous_table in relationships: if base_table == resources['in_table_name']: exogenous_attributes = self.determine_stored_attribute_names( in_storage = resources['in_storage'], in_table_name = exogenous_table ) for exogenous_attribute in exogenous_attributes: try: exogenous_attribute_names[exogenous_attribute] except: exogenous_attribute_names[exogenous_attribute] = exogenous_table else: raise AttributeError("Duplicate exogenous " "attribute '%s' found in '%s' and '%s'." % (exogenous_attribute, exogenous_attribute_names[exogenous_attribute], exogenous_table ) ) return exogenous_attribute_names
def test_estimation_with_restricted_submodel_size(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='dataset', table_data={ "id": array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), "attr1": array([4, 7, 2, 1, 5, 4, 5, 3, 2, 1]), "attr2": array([ 6.8, 2.6, 0, 1, 0, 4.3, 2.1, 6, 8, 7, ]), "submodel_id": array([1, 2, 2, 1, 1, 1, 2, 1, 1, 1]), "outcome": array([0, 1, 0, 1, 1, 1, 0, 0, 1, 1]) }) ds = Dataset(in_storage=storage, in_table_name='dataset', id_name="id") specification_2subm = EquationSpecification( variables=array(["constant", "attr2", "constant", "attr1"]), coefficients=array(["constant", "ba2", "constant", "ba1"]), submodels=array([1, 1, 2, 2])) model = RegressionModel(submodel_string="submodel_id", estimate_config={'submodel_size_max': 5}) model.estimate(specification_2subm, ds, "outcome", procedure='opus_core.estimate_linear_regression') data_attr1 = model.get_data("ba1", 1) self.assert_( data_attr1 == None, msg= "Error in getting data from regression model with multiple submodels." ) data_attr1 = model.get_data("ba2", 1) self.assert_(data_attr1.size == 5, msg="Error in sub-sampling data in regression model.") data_attr1 = model.get_data("ba1", 2) self.assert_( ma.allequal( ds.get_attribute("attr1")[array([1, 2, 6])], data_attr1), msg= "Error in getting data from regression model with multiple submodels." )
def test_regression_model_with_constant_variation(self): """Estimate the model and run it on the same data as the estimation. The result should be equal to the original data. If there is a change in the explanatory variables, the result should not be equal. """ storage = StorageFactory().get_storage('dict_storage') table_name = 'dataset_table' data = { "attr1": array([30, 0, 90, 100, 65, 50]), "attr2": array([2002, 1968, 1880, 1921, 1956, 1989]), "attr3": array([0.5, 0.1, 0.3, 0.9, 0.2, 0.8]), "outcome": array([20, 40, 15, 5, 40, 30], dtype="int32"), "id": array([1, 2, 3, 4, 5, 6]) } storage.write_table(table_name=table_name, table_data=data) dataset = Dataset(in_storage=storage, in_table_name=table_name, id_name="id") specification = EquationSpecification(variables=("attr1", "attr2", "attr3", "constant"), coefficients=("b1", "b2", "b3", "constant")) model = RegressionModelWithAdditionInitialResiduals( outcome_attribute="outcome") coef, dummy = model.estimate( specification, dataset, outcome_attribute="outcome", procedure="opus_core.estimate_linear_regression") result = model.run(specification, coef, dataset) # if estimated and run on the same data, it should give the original outcome self.assertEqual(ma.allequal(result, data["outcome"]), True) # if some values changed it shoudn't be the same for those elements dataset.set_values_of_one_attribute("attr1", array([32, 10]), arange(2)) result2 = model.run(specification, coef, dataset) self.assertEqual(ma.allequal(result2[0:2], data["outcome"][0:2]), False) self.assertEqual(ma.allequal(result2[2:], data["outcome"][2:]), True) # check if exclusion of missing values is working dataset.set_values_of_one_attribute("outcome", array([0, 0]), array([2, 4])) dataset.delete_one_attribute("_init_error_outcome") model.run(specification, coef, dataset, run_config=Configuration( {'exclude_missing_values_from_initial_error': True})) initial_error = dataset.get_attribute("_init_error_outcome") self.assertEqual(ma.allequal(initial_error[array([2, 4])], 0), True) self.assertEqual(ma.allequal(initial_error[array([0, 1, 3, 4, 5])], 0), False)
def create_edges(self, input_file_dir, input_file_name, output_file_name): storage = StorageFactory().get_storage(type='tab_storage', subdir='store', storage_location=input_file_dir) dataset = Dataset(in_storage=storage, id_name=['stop_id', 'sch_time'], in_table_name=input_file_name) n = dataset.size() trip_ids = dataset.get_attribute("stop_id") unique_trip_ids = unique(trip_ids) source_list = list() target_list = list() time_list = list() for trip in unique_trip_ids: idx = where(dataset.get_attribute("stop_id") == trip)[0] nodes = dataset.get_attribute_by_index("node_id", idx) times = dataset.get_attribute_by_index("sch_time", idx) for inode in range(nodes.size - 1): source_list.append(nodes[inode]) target_list.append(nodes[inode + 1]) time_list.append(times[inode + 1] - times[inode]) storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='edges', table_data={ 'edge_id': arange(len(source_list)) + 1, 'source': array( source_list), #type=int64), # <<<< OUTPUT FIELD, USE array 'target': array( target_list), #type=int64), # <<<< OUTPUT FIELD, USE array 'cost': array(time_list, dtype=int32) }) edges = Dataset(in_storage=storage, in_table_name='edges', id_name="edge_id") edges.write_dataset(attributes=["source", "target", "cost"], out_storage=storage, out_table_name=output_file_name)
def prepare_for_estimate(self, specification_dict=None, specification_storage=None, specification_table=None, agent_set=None, agents_for_estimation_storage=None, agents_for_estimation_table=None, join_datasets=False, index_to_unplace=None, portion_to_unplace=1.0, agent_filter=None, data_objects={}): from opus_core.model import get_specification_for_estimation specification = get_specification_for_estimation( specification_dict, specification_storage, specification_table) if (agent_set is not None) and (index_to_unplace is not None): if self.location_id_string is not None: agent_set.compute_variables(self.location_id_string, resources=Resources(data_objects)) if portion_to_unplace < 1: unplace_size = int(portion_to_unplace * index_to_unplace.size) end_index_to_unplace = sample_noreplace( index_to_unplace, unplace_size) else: end_index_to_unplace = index_to_unplace logger.log_status("Unplace " + str(end_index_to_unplace.size) + " agents.") agent_set.modify_attribute(self.choice_set.get_id_name()[0], -1 * ones(end_index_to_unplace.size), end_index_to_unplace) # create agents for estimation if agents_for_estimation_storage is not None: estimation_set = Dataset(in_storage=agents_for_estimation_storage, in_table_name=agents_for_estimation_table, id_name=agent_set.get_id_name(), dataset_name=agent_set.get_dataset_name()) if agent_filter is not None: estimation_set.compute_variables( agent_filter, resources=Resources(data_objects)) index = where( estimation_set.get_attribute(agent_filter) > 0)[0] estimation_set.subset_by_index( index, flush_attributes_if_not_loaded=False) if join_datasets: agent_set.join_by_rows(estimation_set, require_all_attributes=False, change_ids_if_not_unique=True) index = arange(agent_set.size() - estimation_set.size(), agent_set.size()) else: index = agent_set.get_id_index( estimation_set.get_id_attribute()) else: index = arange(agent_set.size()) return (specification, index)
def prepare_for_estimate(specification_dict=None, specification_storage=None, specification_table=None, agent_set=None, household_set=None, agents_for_estimation_storage=None, agents_for_estimation_table=None, households_for_estimation_table=None, join_datasets=False, filter=None, data_objects=None): specification = get_specification_for_estimation(specification_dict, specification_storage, specification_table) if agents_for_estimation_storage is not None: estimation_set = Dataset(in_storage=agents_for_estimation_storage, in_table_name=agents_for_estimation_table, id_name=agent_set.get_id_name(), dataset_name=agent_set.get_dataset_name()) hh_estimation_set = None if households_for_estimation_table is not None: hh_estimation_set = Dataset( in_storage=agents_for_estimation_storage, in_table_name=households_for_estimation_table, id_name=household_set.get_id_name(), dataset_name=household_set.get_dataset_name()) filter_index = arange(estimation_set.size()) if filter: estimation_set.compute_variables(filter, resources=Resources(data_objects)) filter_index = where(estimation_set.get_attribute(filter) > 0)[0] #estimation_set.subset_by_index(index, flush_attributes_if_not_loaded=False) if join_datasets: if hh_estimation_set is not None: household_set.join_by_rows(hh_estimation_set, require_all_attributes=False, change_ids_if_not_unique=True) agent_set.join_by_rows(estimation_set, require_all_attributes=False, change_ids_if_not_unique=True) index = arange(agent_set.size() - estimation_set.size(), agent_set.size())[filter_index] else: index = agent_set.get_id_index( estimation_set.get_id_attribute()[filter_index]) else: if agent_set is not None: index = arange(agent_set.size()) else: index = None return (specification, index)
def test_simple_lag_variable2(self): test_data = { 1000: {"tests": {"id": array([1, 2, 3, 4]), "attr1": array([10, 20, 30, 40])}}, 1001: {"tests": {"id": array([1, 2, 3, 5]), "attr1": array([111, 222, 333, 555])}}, } cache_creator = CreateTestAttributeCache() cache_creator.create_attribute_cache_with_data(self._temp_dir, test_data) SimulationState().set_current_time(1001) attribute_cache = AttributeCache() SessionConfiguration(new_instance=True, package_order=["opus_core"], in_storage=attribute_cache) ds = Dataset(in_storage=attribute_cache, in_table_name="tests", id_name=["id"], dataset_name="tests") ds.compute_variables(["opus_core.tests.attr1"]) self.assert_(ma.allequal(ds.get_attribute("attr1"), array([111, 222, 333, 555]))) ds.compute_variables(["opus_core.tests.attr1_lag1"]) self.assert_(ma.allequal(ds.get_attribute("attr1_lag1"), array([10, 20, 30, 555])))
def test_compute_unloads_from_memory(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='tests', table_data={ 'a_dependent_variable':array([1,5,10]), 'id':array([1,3,4]) } ) dataset = Dataset(in_storage=storage, in_table_name='tests', id_name='id', dataset_name='tests') SessionConfiguration(in_storage=storage)["flush_variables"] = True dataset.get_attribute("a_dependent_variable") self.assert_("a_dependent_variable" in dataset.get_attributes_in_memory()) dataset.compute_variables("opus_core.tests.a_test_variable") self.assert_("a_dependent_variable" not in dataset.get_attributes_in_memory()) self.assert_("a_test_variable" in dataset.get_attributes_in_memory()) SimulationState().remove_singleton(delete_cache=True)
class SimpleModelTest(opus_unittest.OpusTestCase): def setUp(self): self.data = { 'id': arange(10)+1, 'attribute': array([3000,2800,1000,550,600,1000,2000,500,100,1000]) } storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name = 'dataset', table_data = self.data) self.dataset = Dataset(in_storage=storage, in_table_name='dataset', id_name=['id']) def test_simple_model(self): m = SimpleModel() m.run(self.dataset, 'sqrt(dataset.attribute)', outcome_attribute='sqrtattr') self.assertEqual(ma.allclose(self.dataset.get_attribute('sqrtattr'), sqrt(self.data['attribute'])), True) self.assertEqual('sqrtattr' in self.dataset.get_primary_attribute_names(), True) def test_simple_model_without_outcome_attribute(self): m = SimpleModel() m.run(self.dataset, 'lattr = ln(dataset.attribute)') self.assertEqual(ma.allclose(self.dataset.get_attribute('lattr'), log(self.data['attribute'])), True) self.assertEqual('lattr' in self.dataset.get_primary_attribute_names(), True)
def test_estimation_without_procedure(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='dataset', table_data={ "id":array([1,2,3,4]), "attr1":array([4,7,2,1]), "attr2":array([6.8,2.6,0,1]), "submodel_id": array([1,2,2,1]), "outcome": array([0,1,0,1]) } ) ds = Dataset(in_storage=storage, in_table_name='dataset', id_name="id") specification = EquationSpecification( variables=array(["constant", "attr2", "attr1"]), coefficients=array(["constant", "ba2", "ba1"])) model = RegressionModel() model.estimate(specification, ds, "outcome") data_attr1 = model.get_data("ba1") self.assert_(ma.allequal(ds.get_attribute("attr1"), data_attr1), msg = "Error in getting data from regression model") specification_2subm = EquationSpecification( variables=array(["constant", "attr2", "constant", "attr1"]), coefficients=array(["constant", "ba2", "constant", "ba1"]), submodels = array([1,1,2,2])) model = RegressionModel(submodel_string="submodel_id") model.estimate(specification_2subm, ds, "outcome") data_attr1 = model.get_data("ba1", 1) self.assert_(data_attr1 == None, msg = "Error in getting data from regression model with multiple submodels.") data_attr1 = model.get_data("ba1", 2) self.assert_(ma.allequal(ds.get_attribute("attr1")[1:3], data_attr1), msg = "Error in getting data from regression model with multiple submodels.") d = model.get_data_as_dataset(2) self.assert_(ma.allequal(ds.get_attribute("attr1")[1:3], d.get_attribute("ba1")), msg = "Error in getting data from regression model with multiple submodels.")
def test_simple_lag_variable2(self): test_data = { 1000: { 'tests': { 'id': array([1, 2, 3, 4]), 'attr1': array([10, 20, 30, 40]), }, }, 1001: { 'tests': { 'id': array([1, 2, 3, 5]), 'attr1': array([111, 222, 333, 555]), }, }, } cache_creator = CreateTestAttributeCache() cache_creator.create_attribute_cache_with_data(self._temp_dir, test_data) SimulationState().set_current_time(1001) attribute_cache = AttributeCache() SessionConfiguration(new_instance=True, package_order=['opus_core'], in_storage=attribute_cache) ds = Dataset(in_storage=attribute_cache, in_table_name='tests', id_name=['id'], dataset_name='tests') ds.compute_variables(['opus_core.tests.attr1']) self.assert_( ma.allequal(ds.get_attribute('attr1'), array([111, 222, 333, 555]))) ds.compute_variables(['opus_core.tests.attr1_lag1']) self.assert_( ma.allequal(ds.get_attribute('attr1_lag1'), array([10, 20, 30, 555])))
def prepare_for_estimate(specification_dict = None, specification_storage=None, specification_table=None, agent_set=None, household_set=None, agents_for_estimation_storage=None, agents_for_estimation_table=None, households_for_estimation_table=None, join_datasets=False, filter=None, data_objects=None): specification = get_specification_for_estimation(specification_dict, specification_storage, specification_table) if agents_for_estimation_storage is not None: estimation_set = Dataset(in_storage = agents_for_estimation_storage, in_table_name=agents_for_estimation_table, id_name=agent_set.get_id_name(), dataset_name=agent_set.get_dataset_name()) hh_estimation_set = None if households_for_estimation_table is not None: hh_estimation_set = Dataset(in_storage = agents_for_estimation_storage, in_table_name=households_for_estimation_table, id_name=household_set.get_id_name(), dataset_name=household_set.get_dataset_name()) filter_index = arange(estimation_set.size()) if filter: estimation_set.compute_variables(filter, resources=Resources(data_objects)) filter_index = where(estimation_set.get_attribute(filter) > 0)[0] #estimation_set.subset_by_index(index, flush_attributes_if_not_loaded=False) if join_datasets: if hh_estimation_set is not None: household_set.join_by_rows(hh_estimation_set, require_all_attributes=False, change_ids_if_not_unique=True) agent_set.join_by_rows(estimation_set, require_all_attributes=False, change_ids_if_not_unique=True) index = arange(agent_set.size() - estimation_set.size(), agent_set.size())[filter_index] else: index = agent_set.get_id_index(estimation_set.get_id_attribute()[filter_index]) else: if agent_set is not None: index = arange(agent_set.size()) else: index = None return (specification, index)
def test_compute_unloads_from_memory(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='tests', table_data={ 'a_dependent_variable': array([1, 5, 10]), 'id': array([1, 3, 4]) }) dataset = Dataset(in_storage=storage, in_table_name='tests', id_name='id', dataset_name='tests') SessionConfiguration(in_storage=storage)["flush_variables"] = True dataset.get_attribute("a_dependent_variable") self.assert_( "a_dependent_variable" in dataset.get_attributes_in_memory()) dataset.compute_variables("opus_core.tests.a_test_variable") self.assert_( "a_dependent_variable" not in dataset.get_attributes_in_memory()) self.assert_("a_test_variable" in dataset.get_attributes_in_memory()) SimulationState().remove_singleton(delete_cache=True)
def test_dict_dataset(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='dataset', table_data={ "id":array([1,2,3,4]), "attr":array([4,7,2,1]) } ) ds = Dataset(in_storage=storage, in_table_name='dataset', id_name="id") self.assert_(ds.get_attribute("attr").sum()==14, "Something is wrong with the dataset.") self.assert_(ds.size()==4, "Wrong size of dataset.")
def test_change_three_elements(self): """3 values are in common - change them to -1. Other attributes stay unchanged.""" data = { 'my_id': array([1, 2, 3, 4, 5]), 'attr': array([10, 2, 3, 50, 2]), 'attr2': array([4, 3, 2, 5, 3]) } data2 = {'attr': array([2, 6, 7, 3])} storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='dataset', table_data=data) dataset = Dataset(in_storage=storage, in_table_name='dataset', id_name='my_id') storage.write_table(table_name='dataset2', table_data=data2) dataset2 = Dataset(in_storage=storage, in_table_name='dataset2', id_name='attr') JoinAttributeModificationModel().run(dataset, dataset2, value=-1) self.assertEqual( ma.allequal(dataset.get_attribute('attr'), array([10, -1, -1, 50, -1])), True) self.assertEqual( ma.allequal(dataset.get_attribute('attr2'), data['attr2']), True)
def test_aggregate_all(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='zones', table_data={'my_variable': array([4,8,0.5,1]), 'id': array([1,2,3,4])}) storage.write_table(table_name='regions', table_data={'id': array([1])}) ds = Dataset(in_storage=storage, in_table_name='zones', id_name="id", dataset_name="myzone") ds2 = Dataset(in_storage=storage, in_table_name='regions', id_name="id", dataset_name="myregion") dataset_pool = DatasetPool() dataset_pool._add_dataset('myzone', ds) dataset_pool._add_dataset('myregion', ds2) ds2.compute_variables(["myvar = myregion.aggregate_all(myzone.my_variable)"], dataset_pool=dataset_pool) values = ds2.get_attribute("myvar") should_be = array([13.5]) self.assert_(ma.allclose(values, should_be, rtol=1e-6), "Error in aggregate_all")
def test_exogenous_attributes(self): base_dataset = Dataset(in_storage=self.storage, in_table_name=self.base_table_name, id_name=self.base_id) ExogenousAspectForDataset().apply(base_dataset) SimulationState().set_current_time(1980) exogenous_attribute1 = base_dataset.get_attribute( self.exogenous_attribute1) exogenous_attribute2 = base_dataset.get_attribute( self.exogenous_attribute2) self.assert_( ma.allequal(exogenous_attribute1, self.expected_exogenous_attribute_1980_1), "Exogenous attribute loaded incorrectly. Expected '%s'; received '%s'." % (self.expected_exogenous_attribute_1980_1, exogenous_attribute1)) self.assert_( ma.allequal(exogenous_attribute2, self.expected_exogenous_attribute_1980_2), "Exogenous attribute loaded incorrectly. Expected '%s'; received '%s'." % (self.expected_exogenous_attribute_1980_2, exogenous_attribute2)) SimulationState().set_current_time(1981) exogenous_attribute1 = base_dataset.get_attribute( self.exogenous_attribute1) exogenous_attribute2 = base_dataset.get_attribute( self.exogenous_attribute2) self.assert_( ma.allequal(exogenous_attribute1, self.expected_exogenous_attribute_1981_1), "Exogenous attribute loaded incorrectly. Expected '%s'; received '%s'." % (self.expected_exogenous_attribute_1981_1, exogenous_attribute1)) self.assert_( ma.allequal(exogenous_attribute2, self.expected_exogenous_attribute_1981_2), "Exogenous attribute loaded incorrectly. Expected '%s'; received '%s'." % (self.expected_exogenous_attribute_1981_2, exogenous_attribute2)) SimulationState().set_current_time(1982) exogenous_attribute1 = base_dataset.get_attribute( self.exogenous_attribute1) exogenous_attribute2 = base_dataset.get_attribute( self.exogenous_attribute2) self.assert_( ma.allequal(exogenous_attribute1, self.expected_exogenous_attribute_1982_1), "Exogenous attribute loaded incorrectly. Expected '%s'; received '%s'." % (self.expected_exogenous_attribute_1982_1, exogenous_attribute1)) self.assert_( ma.allequal(exogenous_attribute2, self.expected_exogenous_attribute_1982_2), "Exogenous attribute loaded incorrectly. Expected '%s'; received '%s'." % (self.expected_exogenous_attribute_1982_2, exogenous_attribute2))
def test_exogenous_attributes(self): base_dataset = Dataset(in_storage=self.storage, in_table_name=self.base_table_name, id_name=self.base_id) ExogenousAspectForDataset().apply(base_dataset) SimulationState().set_current_time(1980) exogenous_attribute1 = base_dataset.get_attribute(self.exogenous_attribute1) exogenous_attribute2 = base_dataset.get_attribute(self.exogenous_attribute2) self.assert_(ma.allequal(exogenous_attribute1, self.expected_exogenous_attribute_1980_1), "Exogenous attribute loaded incorrectly. Expected '%s'; received '%s'." % (self.expected_exogenous_attribute_1980_1, exogenous_attribute1)) self.assert_(ma.allequal(exogenous_attribute2, self.expected_exogenous_attribute_1980_2), "Exogenous attribute loaded incorrectly. Expected '%s'; received '%s'." % (self.expected_exogenous_attribute_1980_2, exogenous_attribute2)) SimulationState().set_current_time(1981) exogenous_attribute1 = base_dataset.get_attribute(self.exogenous_attribute1) exogenous_attribute2 = base_dataset.get_attribute(self.exogenous_attribute2) self.assert_(ma.allequal(exogenous_attribute1, self.expected_exogenous_attribute_1981_1), "Exogenous attribute loaded incorrectly. Expected '%s'; received '%s'." % (self.expected_exogenous_attribute_1981_1, exogenous_attribute1)) self.assert_(ma.allequal(exogenous_attribute2, self.expected_exogenous_attribute_1981_2), "Exogenous attribute loaded incorrectly. Expected '%s'; received '%s'." % (self.expected_exogenous_attribute_1981_2, exogenous_attribute2)) SimulationState().set_current_time(1982) exogenous_attribute1 = base_dataset.get_attribute(self.exogenous_attribute1) exogenous_attribute2 = base_dataset.get_attribute(self.exogenous_attribute2) self.assert_(ma.allequal(exogenous_attribute1, self.expected_exogenous_attribute_1982_1), "Exogenous attribute loaded incorrectly. Expected '%s'; received '%s'." % (self.expected_exogenous_attribute_1982_1, exogenous_attribute1)) self.assert_(ma.allequal(exogenous_attribute2, self.expected_exogenous_attribute_1982_2), "Exogenous attribute loaded incorrectly. Expected '%s'; received '%s'." % (self.expected_exogenous_attribute_1982_2, exogenous_attribute2))
def test_no_change(self): """No common values in the join_attribute, therefore no change.""" data = {'my_id': array([1, 2, 3, 4]), 'attr': array([10, 20, 30, 50])} data2 = {'attr': array([2, 6, 7, 3])} storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='dataset', table_data=data) dataset = Dataset(in_storage=storage, in_table_name='dataset', id_name='my_id') storage.write_table(table_name='dataset2', table_data=data2) dataset2 = Dataset(in_storage=storage, in_table_name='dataset2', id_name='attr') JoinAttributeModificationModel().run(dataset, dataset2) self.assertEqual( ma.allequal(dataset.get_attribute('attr'), data['attr']), True)
def test_regression_model_with_constant_variation(self): """Estimate the model and run it on the same data as the estimation. The result should be equal to the original data. If there is a change in the explanatory variables, the result should not be equal. """ storage = StorageFactory().get_storage('dict_storage') table_name = 'dataset_table' data = { "attr1":array([30, 0, 90, 100, 65, 50]), "attr2":array([2002, 1968, 1880, 1921, 1956, 1989]), "attr3":array([0.5, 0.1, 0.3, 0.9, 0.2, 0.8]), "outcome": array([20, 40, 15, 5, 40, 30], dtype="int32"), "id": array([1,2,3,4, 5, 6]) } storage.write_table( table_name=table_name, table_data=data ) dataset = Dataset(in_storage=storage, in_table_name=table_name, id_name= "id") specification = EquationSpecification(variables=( "attr1", "attr2", "attr3", "constant"), coefficients=("b1", "b2", "b3", "constant")) model = RegressionModelWithAdditionInitialResiduals(outcome_attribute = "outcome") coef, dummy = model.estimate(specification, dataset, outcome_attribute = "outcome", procedure = "opus_core.estimate_linear_regression") result = model.run(specification, coef, dataset) # if estimated and run on the same data, it should give the original outcome self.assertEqual(ma.allequal(result, data["outcome"]), True) # if some values changed it shoudn't be the same for those elements dataset.set_values_of_one_attribute("attr1", array([32, 10]), arange(2)) result2 = model.run(specification, coef, dataset) self.assertEqual(ma.allequal(result2[0:2], data["outcome"][0:2]), False) self.assertEqual(ma.allequal(result2[2:], data["outcome"][2:]), True) # check if exclusion of missing values is working dataset.set_values_of_one_attribute("outcome", array([0,0]), array([2,4])) dataset.delete_one_attribute("_init_error_outcome") model.run(specification, coef, dataset, run_config=Configuration({ 'exclude_missing_values_from_initial_error': True})) initial_error = dataset.get_attribute("_init_error_outcome") self.assertEqual(ma.allequal(initial_error[array([2,4])], 0), True) self.assertEqual(ma.allequal(initial_error[array([0,1,3,4,5])], 0), False)
def prepare_for_estimate(self, specification_dict = None, specification_storage=None, specification_table=None, agent_set=None, agents_for_estimation_storage=None, agents_for_estimation_table=None, join_datasets=False, index_to_unplace=None, portion_to_unplace=1.0, agent_filter=None, data_objects={}): from opus_core.models.model import get_specification_for_estimation specification = get_specification_for_estimation(specification_dict, specification_storage, specification_table) if (agent_set is not None) and (index_to_unplace is not None): if self.location_id_string is not None: agent_set.compute_variables(self.location_id_string, resources=Resources(data_objects)) if portion_to_unplace < 1: unplace_size = int(portion_to_unplace*index_to_unplace.size) end_index_to_unplace = sample_noreplace(index_to_unplace, unplace_size) else: end_index_to_unplace = index_to_unplace logger.log_status("Unplace " + str(end_index_to_unplace.size) + " agents.") agent_set.modify_attribute(self.choice_set.get_id_name()[0], -1*ones(end_index_to_unplace.size), end_index_to_unplace) # create agents for estimation if agents_for_estimation_storage is not None: estimation_set = Dataset(in_storage = agents_for_estimation_storage, in_table_name=agents_for_estimation_table, id_name=agent_set.get_id_name(), dataset_name=agent_set.get_dataset_name()) if agent_filter is not None: estimation_set.compute_variables(agent_filter, resources=Resources(data_objects)) index = where(estimation_set.get_attribute(agent_filter) > 0)[0] estimation_set.subset_by_index(index, flush_attributes_if_not_loaded=False) if join_datasets: agent_set.join_by_rows(estimation_set, require_all_attributes=False, change_ids_if_not_unique=True) index = arange(agent_set.size()-estimation_set.size(),agent_set.size()) else: index = agent_set.get_id_index(estimation_set.get_id_attribute()) else: index = arange(agent_set.size()) return (specification, index)
def test_casting(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='tests', table_data={ "a_dependent_variable":array([1,5,1000]), "id":array([1,3,4]) } ) dataset = Dataset(in_storage=storage, in_table_name='tests', id_name="id", dataset_name="tests") logger.enable_hidden_error_and_warning_words() # Next line should cause a 'WARNING' to be logged. dataset.compute_variables("opus_core.tests.a_test_variable", resources=Resources({"check_variables":"*"})) logger.disable_hidden_error_and_warning_words() values = dataset.get_attribute("a_test_variable") self.assertEqual(values.dtype.type, int8)
def test_casting(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='tests', table_data={ "a_dependent_variable": array([1, 5, 1000]), "id": array([1, 3, 4]) }) dataset = Dataset(in_storage=storage, in_table_name='tests', id_name="id", dataset_name="tests") logger.enable_hidden_error_and_warning_words() # Next line should cause a 'WARNING' to be logged. dataset.compute_variables("opus_core.tests.a_test_variable", resources=Resources({"check_variables": "*"})) logger.disable_hidden_error_and_warning_words() values = dataset.get_attribute("a_test_variable") self.assertEqual(values.dtype.type, int8)
def test_compute_does_not_unload_from_memory(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='tests', table_data={ "a_dependent_variable":array([1,5,1000]), "id":array([1,3,4]) } ) dataset = Dataset(in_storage=storage, in_table_name='tests', id_name="id", dataset_name="tests") values = dataset.get_attribute("a_dependent_variable") self.assert_("a_dependent_variable" in dataset.get_attributes_in_memory()) dataset.compute_variables("opus_core.tests.a_test_variable") self.assert_("a_dependent_variable" in dataset.get_attributes_in_memory()) self.assert_("a_test_variable" in dataset.get_attributes_in_memory()) # The type of values will be int32 on a 32-bit machine, and int64 on a 64 bit machine if platform.architecture()[0]=='64bit': self.assertEqual(values.dtype.type, int64) else: self.assertEqual(values.dtype.type, int32)
def test_no_change(self): """No common values in the join_attribute, therefore no change.""" data = { 'my_id': array([1,2,3,4]), 'attr': array([10,20,30,50]) } data2 = { 'attr': array([2,6,7,3]) } storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='dataset', table_data=data) dataset = Dataset(in_storage = storage, in_table_name='dataset', id_name='my_id' ) storage.write_table(table_name='dataset2', table_data=data2) dataset2 = Dataset(in_storage = storage, in_table_name='dataset2', id_name='attr' ) JoinAttributeModificationModel().run(dataset,dataset2) self.assertEqual(ma.allequal(dataset.get_attribute('attr'), data['attr']), True)
class Test(opus_unittest.OpusTestCase): def setUp(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='households', table_data={ 'household_id': arange(10) + 1, # 'household_id':array([1, 2, 3, 4, 5, 6, 7, 8]), # 'income' :array([1, 3, 2, 1, 3, 8, 5, 4]), # #'category_id' :array([1, 2, 2, 1, 2, 3, 3, 2]), # 'building_id' :array([1, 2, 4, 3, 3, 2, 4, 2]), ##'large_area_id':array([1, 1, 2, 3, 3, 1, 2, 1]), # 'grid_id': arange(-1, 9, 1) + 1, 'lucky': array([1, 0, 1, 0, 1, 1, 1, 1, 0, 0]) }) storage.write_table( table_name='gridcells', table_data={ #'building_id': array([1, 2, 3, 4]), #'large_area_id': array([1, 1, 3, 2]), 'grid_id': arange(15) + 1, 'filter': array([0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1]), 'weight': array([0.1, 9, 15, 2, 5, 1, 6, 2.1, .3, 4, 3, 1, 10, 8, 7]) }) dataset_pool = SessionConfiguration( in_storage=storage).get_dataset_pool() #create households self.households = Dataset(in_storage=storage, in_table_name='households', id_name="household_id", dataset_name="household") # create gridcells self.gridcells = Dataset(in_storage=storage, in_table_name='gridcells', id_name="grid_id", dataset_name="gridcell") dataset_pool.replace_dataset('household', self.households) dataset_pool.replace_dataset('gridcell', self.gridcells) def test_1(self): """""" sample_size = 5 # check the individual gridcells # This is a stochastic model, so it may legitimately fail occassionally. index1 = where(self.households.get_attribute("lucky"))[0] #index2 = where(self.gridcells.get_attribute("filter"))[0] weight = self.gridcells.get_attribute("weight") estimation_config = { "agent_category_definition": ["household.lucky"], "choice_category_definition": ["gridcell.filter+1"] } for icc in [0, 1]: #include_chosen_choice? #icc = sample([0,1],1) sampler_ret = weighted_sampler_by_category().run( dataset1=self.households, dataset2=self.gridcells, index1=index1, sample_size=sample_size, include_chosen_choice=icc, resources=estimation_config) # get results sampled_index = sampler_ret.get_2d_index() chosen_choices = UNPLACED_ID * ones(index1.size, dtype=DTYPE) where_chosen = where(sampler_ret.get_attribute("chosen_choice")) chosen_choices[where_chosen[0]] = where_chosen[1] self.assertEqual(sampled_index.shape, (index1.size, sample_size)) if icc: placed_agents_index = self.gridcells.try_get_id_index( self.households.get_attribute("grid_id")[index1], UNPLACED_ID) chosen_choice_index = resize(array([UNPLACED_ID], dtype=DTYPE), index1.shape) w = where(chosen_choices >= 0)[0] # for 64 bit machines, need to coerce the type to int32 -- on a # 32 bit machine the astype(int32) doesn't do anything chosen_choice_index[w] = sampled_index[ w, chosen_choices[w]].astype(int32) self.assert_( alltrue(equal(placed_agents_index, chosen_choice_index))) sampled_index = sampled_index[:, 1:] self.assert_( alltrue( lookup(sampled_index.ravel(), arange(self.gridcells.size()), index_if_not_found=UNPLACED_ID) != UNPLACED_ID)) self.assert_(all(not_equal(weight[sampled_index], 0.0)))
def prepare_for_estimate(self, add_member_prefix=True, specification_dict=None, specification_storage=None, specification_table=None, building_set=None, buildings_for_estimation_storage=None, buildings_for_estimation_table=None, constants=None, base_year=0, building_categories=None, location_id_variable=None, join_datasets=False, data_objects=None, **kwargs): # buildings = None if (building_set is not None): if location_id_variable is not None: building_set.compute_variables( location_id_variable, resources=Resources(data_objects)) # create agents for estimation if buildings_for_estimation_storage is not None: estimation_set = Dataset( in_storage=buildings_for_estimation_storage, in_table_name=buildings_for_estimation_table, id_name=building_set.get_id_name(), dataset_name=building_set.get_dataset_name()) if location_id_variable: estimation_set.compute_variables( location_id_variable, resources=Resources(data_objects)) # needs to be a primary attribute because of the join method below estimation_set.add_primary_attribute( estimation_set.get_attribute(location_id_variable), VariableName(location_id_variable).alias()) years = estimation_set.get_attribute("scheduled_year") recent_years = constants['recent_years'] indicator = zeros(estimation_set.size()) for year in range(base_year - recent_years, base_year + 1): indicator = logical_or(indicator, years == year) idx = where(logical_not(indicator))[0] estimation_set.remove_elements(idx) #if filter: #estimation_set.compute_variables(filter, resources=Resources(data_objects)) #index = where(estimation_set.get_attribute(filter) > 0)[0] #estimation_set.subset_by_index(index, flush_attributes_if_not_loaded=False) if join_datasets: building_set.join_by_rows(estimation_set, require_all_attributes=False, change_ids_if_not_unique=True) index = arange(building_set.size() - estimation_set.size(), agent_set.size()) else: index = building_set.get_id_index( estimation_set.get_id_attribute()) else: if building_set is not None: index = arange(building_set.size()) else: index = None if add_member_prefix: specification_table = self.group_member.add_member_prefix_to_table_names( [specification_table]) from opus_core.model import get_specification_for_estimation #from urbansim.functions import compute_supply_and_add_to_location_set specification = get_specification_for_estimation( specification_dict, specification_storage, specification_table) #specification, dummy = AgentLocationChoiceModelMember.prepare_for_estimate(self, add_member_prefix, #specification_dict, specification_storage, #specification_table, #location_id_variable=location_id_variable, #data_objects=data_objects, **kwargs) return (specification, index)
class Test(opus_unittest.OpusTestCase): def setUp(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='households', table_data={ 'household_id': arange(10) + 1, 'grid_id': arange(-1, 9, 1) + 1, 'lucky': array([1, 0, 1, 0, 1, 1, 1, 1, 0, 0]) }) storage.write_table( table_name='gridcells', table_data={ 'grid_id': arange(15) + 1, 'filter': array([0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1]), 'weight': array([0.1, 9, 15, 2, 5, 1, 6, 2.1, .3, 4, 3, 1, 10, 8, 7]) }) #create households self.households = Dataset(in_storage=storage, in_table_name='households', id_name="household_id", dataset_name="household") # create gridcells self.gridcells = Dataset(in_storage=storage, in_table_name='gridcells', id_name="grid_id", dataset_name="gridcell") def test_1d_weight_array(self): """""" sample_size = 5 # check the individual gridcells # This is a stochastic model, so it may legitimately fail occassionally. index1 = where(self.households.get_attribute("lucky"))[0] index2 = where(self.gridcells.get_attribute("filter"))[0] weight = self.gridcells.get_attribute("weight") for icc in [0, 1]: #include_chosen_choice? #icc = sample([0,1],1) sampler_ret = weighted_sampler().run(dataset1=self.households, dataset2=self.gridcells, index1=index1, index2=index2, sample_size=sample_size, weight="weight", include_chosen_choice=icc) # get results sampled_index = sampler_ret.get_2d_index() chosen_choices = UNPLACED_ID * ones(index1.size, dtype=DTYPE) where_chosen = where(sampler_ret.get_attribute("chosen_choice")) chosen_choices[where_chosen[0]] = where_chosen[1] sample_results = sampled_index, chosen_choices sampled_index = sample_results[0] self.assertEqual(sampled_index.shape, (index1.size, sample_size)) if icc: placed_agents_index = self.gridcells.try_get_id_index( self.households.get_attribute("grid_id")[index1], UNPLACED_ID) chosen_choice_index = resize(array([UNPLACED_ID], dtype=DTYPE), index1.shape) w = where(chosen_choices >= 0)[0] # for 64 bit machines, need to coerce the type to int32 -- on a # 32 bit machine the astype(int32) doesn't do anything chosen_choice_index[w] = sampled_index[w, chosen_choices[w]] self.assert_( alltrue(equal(placed_agents_index, chosen_choice_index))) sampled_index = sampled_index[:, 1:] self.assert_( alltrue( lookup(sampled_index.ravel(), index2, index_if_not_found=UNPLACED_ID) != UNPLACED_ID)) self.assert_(all(not_equal(weight[sampled_index], 0.0))) def test_2d_weight_array(self): #2d weight sample_size = 5 n = self.households.size() index1 = where(self.households.get_attribute("lucky"))[0] index2 = where(self.gridcells.get_attribute("filter"))[0] lucky = self.households.get_attribute("lucky") weight = repeat(self.gridcells.get_attribute("weight")[newaxis, :], n, axis=0) for i in range(n): weight[i, :] += lucky[i] for icc in [0, 1]: sampler_ret = weighted_sampler().run(dataset1=self.households, dataset2=self.gridcells, index1=index1, index2=index2, sample_size=sample_size, weight=weight, include_chosen_choice=icc) sampled_index = sampler_ret.get_2d_index() chosen_choices = UNPLACED_ID * ones(index1.size, dtype=DTYPE) where_chosen = where(sampler_ret.get_attribute("chosen_choice")) chosen_choices[where_chosen[0]] = where_chosen[1] self.assertEqual(sampled_index.shape, (index1.size, sample_size)) if icc: placed_agents_index = self.gridcells.try_get_id_index( self.households.get_attribute("grid_id")[index1], UNPLACED_ID) chosen_choice_index = resize(array([UNPLACED_ID], dtype=DTYPE), index1.shape) w = where(chosen_choices >= 0)[0] chosen_choice_index[w] = sampled_index[w, chosen_choices[w]] self.assert_( alltrue(equal(placed_agents_index, chosen_choice_index))) sampled_index = sampled_index[:, 1:] self.assert_( alltrue( lookup(sampled_index.ravel(), index2, index_if_not_found=UNPLACED_ID) != UNPLACED_ID)) for j in range(sample_size): self.assert_( all(not_equal(weight[j, sampled_index[j, :]], 0.0)))
#<=> 1=1*sin(phaseshift * peak + c) #<=> asin(1)= phaseshift * peak + c #<=> 1.57 = 1.125346622 * 2007 + c #<=> c = -2258.125221 econcycle = 67 #months in economic cycle peak = 2007.75 # peak year (late 2007) #for each cycle (2pi) x economic cycle years pass. period = (2 * math.pi) / (econcycle / 12.) #use to shift the peak relative to the calendar year phase = math.asin(1) - (period * peak) rows = [] for unit_name in unique(btclass.get_attribute( 'grouping_id')): #needs to be defined in building_type_classification for yr in range(2001, 2036): if unit_name == 3: #office; six group levels used here amp = 2.5 * .01 #amplitude determines the magnitude of the oscillation above base base = .06 elif unit_name == 2: #inst amp = 3 * .01 base = .15 elif unit_name == 1: #comm amp = 3 * .01 base = .06 elif unit_name == 5: #visit amp = 10 * .01 base = .3 elif unit_name == 4: #res amp = 2 * .01
def prepare_for_estimate( self, add_member_prefix=True, specification_dict=None, specification_storage=None, specification_table=None, building_set=None, buildings_for_estimation_storage=None, buildings_for_estimation_table=None, constants=None, base_year=0, building_categories=None, location_id_variable=None, join_datasets=False, data_objects=None, **kwargs ): # buildings = None if building_set is not None: if location_id_variable is not None: building_set.compute_variables(location_id_variable, resources=Resources(data_objects)) # create agents for estimation if buildings_for_estimation_storage is not None: estimation_set = Dataset( in_storage=buildings_for_estimation_storage, in_table_name=buildings_for_estimation_table, id_name=building_set.get_id_name(), dataset_name=building_set.get_dataset_name(), ) if location_id_variable: estimation_set.compute_variables(location_id_variable, resources=Resources(data_objects)) # needs to be a primary attribute because of the join method below estimation_set.add_primary_attribute( estimation_set.get_attribute(location_id_variable), VariableName(location_id_variable).alias() ) years = estimation_set.get_attribute("scheduled_year") recent_years = constants["recent_years"] indicator = zeros(estimation_set.size(), dtype="int32") for year in range(base_year - recent_years, base_year + 1): indicator = logical_or(indicator, years == year) idx = where(logical_not(indicator))[0] estimation_set.remove_elements(idx) # if filter: # estimation_set.compute_variables(filter, resources=Resources(data_objects)) # index = where(estimation_set.get_attribute(filter) > 0)[0] # estimation_set.subset_by_index(index, flush_attributes_if_not_loaded=False) if join_datasets: building_set.join_by_rows(estimation_set, require_all_attributes=False, change_ids_if_not_unique=True) index = arange(building_set.size() - estimation_set.size(), building_set.size()) else: index = building_set.get_id_index(estimation_set.get_id_attribute()) else: if building_set is not None: index = arange(building_set.size()) else: index = None if add_member_prefix: specification_table = self.group_member.add_member_prefix_to_table_names([specification_table]) from opus_core.model import get_specification_for_estimation # from urbansim.functions import compute_supply_and_add_to_location_set specification = get_specification_for_estimation(specification_dict, specification_storage, specification_table) # specification, dummy = AgentLocationChoiceModelMember.prepare_for_estimate(self, add_member_prefix, # specification_dict, specification_storage, # specification_table, # location_id_variable=location_id_variable, # data_objects=data_objects, **kwargs) return (specification, index)
class SimpleModelTest(opus_unittest.OpusTestCase): def setUp(self): self.data = { 'id': arange(10) + 1, 'attribute': array([3000, 2800, 1000, 550, 600, 1000, 2000, 500, 100, 1000]), 'sqrt_outcome': zeros(10) } storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='dataset', table_data=self.data) self.dataset = Dataset(in_storage=storage, in_table_name='dataset', id_name=['id']) def test_simple_model(self): m = SimpleModel() m.run(self.dataset, 'sqrt(dataset.attribute)', outcome_attribute='sqrtattr') self.assertEqual( ma.allclose(self.dataset.get_attribute('sqrtattr'), sqrt(self.data['attribute'])), True) self.assertEqual( 'sqrtattr' in self.dataset.get_primary_attribute_names(), True) def test_simple_model_with_filter(self): m = SimpleModel() m.run(self.dataset, 'sqrt(dataset.attribute)', outcome_attribute='sqrt_outcome', dataset_filter='dataset.attribute>1000') expected = array([1, 1, 0, 0, 0, 0, 1, 0, 0, 0]) * sqrt( self.data['attribute']) self.assertEqual( ma.allclose(self.dataset.get_attribute('sqrt_outcome'), expected), True) self.assertEqual( 'sqrt_outcome' in self.dataset.get_primary_attribute_names(), True) def MASKEDtest_simple_model_with_random_filter(self): m = SimpleModel() m.run( self.dataset, 'sqrt(dataset.attribute)', outcome_attribute='sqrt_outcome', dataset_filter= '(dataset.attribute>=1000) & (random_like(dataset.attribute)<=0.5)', ) con_filter = self.dataset['attribute'] >= 1000 results = self.dataset['sqrt_outcome'][con_filter] expected = sqrt(self.data['attribute'])[con_filter] #test half of the elements passing filter are being sqrt self.assertEqual((results == expected).sum(), expected.size / 2) self.assertEqual((results != expected).sum(), expected.size / 2) def test_simple_model_without_outcome_attribute(self): m = SimpleModel() m.run(self.dataset, 'lattr = ln(dataset.attribute)') self.assertEqual( ma.allclose(self.dataset.get_attribute('lattr'), log(self.data['attribute'])), True) self.assertEqual('lattr' in self.dataset.get_primary_attribute_names(), True) def test_simple_model_with_outcome_values(self): m = SimpleModel() m.run(self.dataset, outcome_attribute='iniattr', outcome_values=zeros(10) - 1) self.assertEqual( ma.allclose(self.dataset.get_attribute('iniattr'), array(10 * [-1])), True) self.assertEqual( 'iniattr' in self.dataset.get_primary_attribute_names(), True) # run with filter m.run(self.dataset, outcome_attribute='iniattr', outcome_values=arange(10) + 1, dataset_filter='dataset.attribute>1000') expected = array([1, 2, -1, -1, -1, -1, 7, -1, -1, -1]) self.assertEqual( ma.allclose(self.dataset.get_attribute('iniattr'), expected), True)
def test_err_when_asking_for_attribute_that_is_not_in_cache(self): job_set = Dataset(self.job_set_resources, dataset_name="jobs") job_set.add_attribute(self.job_id, "job_id", metadata=AttributeType.PRIMARY) job_set.flush_dataset() job_set.get_attribute('job_id') self.assertRaises(NameError, job_set.get_attribute, 'attribute_that_does_not_exist')
def prepare_for_estimate(self, specification_dict = None, specification_storage=None, specification_table=None, agent_set=None, agents_for_estimation_storage=None, agents_for_estimation_table=None, join_datasets=False, index_to_unplace=None, portion_to_unplace=1.0, compute_lambda=False, grouping_location_set=None, movers_variable=None, movers_index=None, filter=None, location_id_variable=None, data_objects={}): """Put 'location_id_variable' always in, if the location id is to be computed on the estimation set, i.e. if it is not a primary attribute of the estimation set. Set 'index_to_unplace' to None, if 'compute_lambda' is True. In such a case, the annual supply is estimated without unplacing agents. 'grouping_location_set', 'movers_variable' and 'movers_index' must be given, if 'compute_lambda' is True. """ from opus_core.model import get_specification_for_estimation from urbansim.functions import compute_supply_and_add_to_location_set specification = get_specification_for_estimation(specification_dict, specification_storage, specification_table) if (agent_set is not None) and (index_to_unplace is not None): if self.location_id_string is not None: agent_set.compute_variables(self.location_id_string, resources=Resources(data_objects)) if portion_to_unplace < 1: unplace_size = int(portion_to_unplace*index_to_unplace.size) end_index_to_unplace = sample_noreplace(index_to_unplace, unplace_size) else: end_index_to_unplace = index_to_unplace logger.log_status("Unplace " + str(end_index_to_unplace.size) + " agents.") agent_set.modify_attribute(self.choice_set.get_id_name()[0], resize(array([-1]), end_index_to_unplace.size), end_index_to_unplace) if compute_lambda: movers = zeros(agent_set.size(), dtype="bool8") if movers_index is not None: movers[movers_index] = 1 agent_set.add_primary_attribute(movers, "potential_movers") self.estimate_config["weights_for_estimation_string"] = self.estimate_config["weights_for_estimation_string"]+"_from_lambda" compute_supply_and_add_to_location_set(self.choice_set, grouping_location_set, self.run_config["number_of_units_string"], self.run_config["capacity_string"], movers_variable, self.estimate_config["weights_for_estimation_string"], resources=Resources(data_objects)) # create agents for estimation if (agents_for_estimation_storage is not None) and (agents_for_estimation_table is not None): estimation_set = Dataset(in_storage = agents_for_estimation_storage, in_table_name=agents_for_estimation_table, id_name=agent_set.get_id_name(), dataset_name=agent_set.get_dataset_name()) if location_id_variable is not None: estimation_set.compute_variables(location_id_variable, resources=Resources(data_objects)) # needs to be a primary attribute because of the join method below estimation_set.add_primary_attribute(estimation_set.get_attribute(location_id_variable), VariableName(location_id_variable).get_alias()) if filter: values = estimation_set.compute_variables(filter, resources=Resources(data_objects)) index = where(values > 0)[0] estimation_set.subset_by_index(index, flush_attributes_if_not_loaded=False) if join_datasets: agent_set.join_by_rows(estimation_set, require_all_attributes=False, change_ids_if_not_unique=True) index = arange(agent_set.size()-estimation_set.size(),agent_set.size()) else: index = agent_set.get_id_index(estimation_set.get_id_attribute()) else: if agent_set is not None: if filter is not None: values = agent_set.compute_variables(filter, resources=Resources(data_objects)) index = where(values > 0)[0] else: index = arange(agent_set.size()) else: index = None return (specification, index)
def viewDatasetAction(self): #print "viewDatasetAction" model = self.model table_name = str(model.fileName(self.currentIndex)) table_name_full = str(model.filePath(self.currentIndex)) parentIndex = model.parent(self.currentIndex) parent_name = str(model.fileName(parentIndex)) parent_name_full = str(model.filePath(parentIndex)) storage = StorageFactory().get_storage('flt_storage', storage_location=parent_name_full) columns = storage.get_column_names(table_name) # temporarily use the table name for the dataset name # dataset_name = DatasetFactory().dataset_name_for_table(table_name) # Aaron - please check this way of getting the XMLConfiguration -- is this the best way? # general = self.mainwindow.toolboxBase.opus_core_xml_configuration.get_section('general') # # problem: this gets the package order for the current project, but the viewer shows all the data # package_order = general['dataset_pool_configuration'].package_order # PREVIOUS HACK: # package_order = ['seattle_parcel','urbansim_parcel', 'eugene', 'urbansim', 'opus_core'] # temporary code: just use a generic dataset for now data = Dataset(in_storage=storage, dataset_name=table_name, in_table_name=table_name, id_name=[]) # code to get a more specialized dataset if possible (doesn't work with table names not ending in 's' # unless they are in the exceptions list in DatasetFactory) # data = DatasetFactory().search_for_dataset_with_hidden_id(dataset_name, package_order, # arguments={'in_storage': storage, 'in_table_name': table_name}) # Need to add a new tab to the main tabs for display of the data container = QWidget() widgetLayout = QVBoxLayout(container) summaryGroupBox = QGroupBox(container) summaryGroupBox.setTitle(QString("Year: %s Run name: %s" % (parent_name,table_name_full.split('/')[-3]))) summaryGroupBox.setFlat(True) summaryGroupBoxLayout = QVBoxLayout(summaryGroupBox) # Grab the summary data buffer = StringIO() data.summary(output=buffer) strng = buffer.getvalue() buffer.close() textBrowser = QTextBrowser() # textBrowser.insertPlainText(strng) textBrowser.insertHtml(self.parse_dataset_summary(strng)) summaryGroupBoxLayout.addWidget(textBrowser) widgetLayout.addWidget(summaryGroupBox) tableGroupBox = QGroupBox(container) tableGroupBox.setTitle(QString("Table View")) tableGroupBox.setFlat(True) tableGroupBoxLayout = QVBoxLayout(tableGroupBox) tv = QTableView() header = columns tabledata_tmp = [] for column in columns: tabledata_tmp.append(data.get_attribute(column)) # Transpose the lists tabledata = map(None,*tabledata_tmp) # If the table data is not empty then we display it if tabledata: #tv.resizeColumnsToContents() tm = TableModel(tabledata, header, container) tv.setModel(tm) tv.setSortingEnabled(True) tableGroupBoxLayout.addWidget(tv) widgetLayout.addWidget(tableGroupBox) container.tabIcon = IconLibrary.icon('inspect') container.tabLabel = QString(table_name) self.manager._attach_tab(container)
import os import urbansim us_path = urbansim.__path__[0] from opus_core.storage_factory import StorageFactory storage = StorageFactory().get_storage('tab_storage', storage_location = os.path.join(us_path, "data/tutorial")) from opus_core.datasets.dataset import Dataset households = Dataset(in_storage = storage, in_table_name = 'households', id_name='household_id', dataset_name='household') households.get_attribute_names() households.get_id_attribute() households.size() households.get_attribute("income") households.get_attribute_names() households.load_dataset() households.get_attribute_names() #households.plot_histogram("income", bins = 10) #households.r_histogram("income") #households.r_scatter("persons", "income") households.correlation_coefficient("persons", "income") households.correlation_matrix(["persons", "income"]) households.summary() households.add_primary_attribute(data=[4,6,9,2,4,8,2,1,3,2], name="location") households.get_attribute_names() households.modify_attribute(name="location", data=[0,0], index=[0,1]) households.get_attribute("location") households.get_data_element_by_id(5).location
def viewDatasetAction(self): #print "viewDatasetAction" model = self.model table_name = str(model.fileName(self.currentIndex)) table_name_full = str(model.filePath(self.currentIndex)) parentIndex = model.parent(self.currentIndex) parent_name = str(model.fileName(parentIndex)) parent_name_full = str(model.filePath(parentIndex)) storage = StorageFactory().get_storage( 'flt_storage', storage_location=parent_name_full) columns = storage.get_column_names(table_name) # temporarily use the table name for the dataset name # dataset_name = DatasetFactory().dataset_name_for_table(table_name) # Aaron - please check this way of getting the XMLConfiguration -- is this the best way? # general = self.mainwindow.toolboxBase.opus_core_xml_configuration.get_section('general') # # problem: this gets the package order for the current project, but the viewer shows all the data # package_order = general['dataset_pool_configuration'].package_order # PREVIOUS HACK: # package_order = ['seattle_parcel','urbansim_parcel', 'eugene', 'urbansim', 'opus_core'] # temporary code: just use a generic dataset for now data = Dataset(in_storage=storage, dataset_name=table_name, in_table_name=table_name, id_name=[]) # code to get a more specialized dataset if possible (doesn't work with table names not ending in 's' # unless they are in the exceptions list in DatasetFactory) # data = DatasetFactory().search_for_dataset_with_hidden_id(dataset_name, package_order, # arguments={'in_storage': storage, 'in_table_name': table_name}) # Need to add a new tab to the main tabs for display of the data container = QWidget() widgetLayout = QVBoxLayout(container) summaryGroupBox = QGroupBox(container) summaryGroupBox.setTitle( QString("Year: %s Run name: %s" % (parent_name, table_name_full.split('/')[-3]))) summaryGroupBox.setFlat(True) summaryGroupBoxLayout = QVBoxLayout(summaryGroupBox) # Grab the summary data buffer = StringIO() data.summary(output=buffer, unload_after_each_attribute=True) strng = buffer.getvalue() buffer.close() textBrowser = QTextBrowser() # textBrowser.insertPlainText(strng) textBrowser.insertHtml(self.parse_dataset_summary(strng)) summaryGroupBoxLayout.addWidget(textBrowser) widgetLayout.addWidget(summaryGroupBox) tableGroupBox = QGroupBox(container) tableGroupBox.setTitle(QString("Table View")) tableGroupBox.setFlat(True) tableGroupBoxLayout = QVBoxLayout(tableGroupBox) tv = QTableView() header = columns tabledata_tmp = [] for column in columns: tabledata_tmp.append(data.get_attribute(column)) # Transpose the lists tabledata = map(None, *tabledata_tmp) # If the table data is not empty then we display it if tabledata: #tv.resizeColumnsToContents() tm = TableModel(tabledata, header, container) tv.setModel(tm) tv.setSortingEnabled(True) tableGroupBoxLayout.addWidget(tv) widgetLayout.addWidget(tableGroupBox) container.tabIcon = IconLibrary.icon('inspect') container.tabLabel = QString(table_name) self.manager._attach_tab(container)