def setUp(self): db_configs = [] for engine in _get_installed_database_engines(): config = TestDatabaseConfiguration(protocol=engine) db_configs.append(config) self.test_db = "OpusDatabaseTestDatabase" test_table = "test_table" self.dbs = [] for config in db_configs: try: server = DatabaseServer(config) if server.has_database(self.test_db): server.drop_database(self.test_db) server.create_database(self.test_db) self.assertTrue(server.has_database(database_name=self.test_db)) db = OpusDatabase(database_server_configuration=config, database_name=self.test_db) self.assertFalse(db.table_exists(test_table)) self.dbs.append((db, server)) except: import traceback traceback.print_exc() logger.log_warning("Could not start server for protocol %s" % config.protocol)
def setUp(self): db_configs = [] for engine in get_testable_engines(): config = TestDatabaseConfiguration(protocol = engine) db_configs.append(config) self.database_name = 'test_database' self.dbs = [] for config in db_configs: try: server = DatabaseServer(config) if server.has_database(self.database_name): server.drop_database(self.database_name) server.create_database(self.database_name) self.assertTrue(server.has_database(database_name = self.database_name)) db = OpusDatabase(database_server_configuration = config, database_name = self.database_name) storage = sql_storage( storage_location = db ) self.dbs.append((db,server,storage)) self.storage = storage except: import traceback traceback.print_exc() print 'WARNING: could not start server for protocol %s'%config.protocol
def setUp(self): db_configs = [] for engine in _get_installed_database_engines(): config = TestDatabaseConfiguration(protocol=engine) db_configs.append(config) self.test_db = 'OpusDatabaseTestDatabase' test_table = 'test_table' self.dbs = [] for config in db_configs: try: server = DatabaseServer(config) if server.has_database(self.test_db): server.drop_database(self.test_db) server.create_database(self.test_db) self.assertTrue( server.has_database(database_name=self.test_db)) db = OpusDatabase(database_server_configuration=config, database_name=self.test_db) self.assertFalse(db.table_exists(test_table)) self.dbs.append((db, server)) except: import traceback traceback.print_exc() logger.log_warning('Could not start server for protocol %s' % config.protocol)
def setUp(self): db_configs = [] for engine in get_testable_engines(): config = TestDatabaseConfiguration(protocol=engine) db_configs.append(config) self.database_name = 'test_database' self.dbs = [] for config in db_configs: try: server = DatabaseServer(config) if server.has_database(self.database_name): server.drop_database(self.database_name) server.create_database(self.database_name) self.assertTrue( server.has_database(database_name=self.database_name)) db = OpusDatabase(database_server_configuration=config, database_name=self.database_name) storage = sql_storage(storage_location=db) self.dbs.append((db, server, storage)) self.storage = storage except: import traceback traceback.print_exc() print 'WARNING: could not start server for protocol %s' % config.protocol
class AbstractServiceTests(opus_unittest.OpusTestCase): def setUp(self): self.database_name = 'test_services_database' self.config = TestDatabaseConfiguration( database_name=self.database_name) self.db_server = DatabaseServer(self.config) def tearDown(self): self.db_server.drop_database(self.database_name) self.db_server.close() def test_create_when_already_exists(self): """Shouldn't do anything if the database already exists.""" self.db_server.create_database(self.database_name) db = self.db_server.get_database(self.database_name) self.assertFalse(db.table_exists('run_activity')) self.assertFalse(db.table_exists('computed_indicators')) services = AbstractService(self.config) services.services_db.close() self.assertTrue(db.table_exists('run_activity')) self.assertTrue(db.table_exists('computed_indicators')) def test_create(self): """Should create services tables if the database doesn't exist.""" services = AbstractService(self.config) services.services_db.close() self.assertTrue(self.db_server.has_database(self.database_name)) db = self.db_server.get_database(self.database_name) self.assertTrue(db.table_exists('run_activity')) self.assertTrue(db.table_exists('computed_indicators'))
def prepare_for_simulation(self, run_configuration, cache_directory=None): self.config = Resources(run_configuration) self.simulation_state = SimulationState(new_instance=True, base_cache_dir=cache_directory) ### TODO: Get rid of this! There is no good reason to be changing the ### Configuration. if self.config['cache_directory'] is None: self.config[ 'cache_directory'] = self.simulation_state.get_cache_directory( ) SessionConfiguration( new_instance=True, package_order=self.config['dataset_pool_configuration']. package_order, in_storage=AttributeCache()) ForkProcess().fork_new_process( self.config['creating_baseyear_cache_configuration']. cache_scenario_database, self.config) # Create output database (normally done by run manager) if 'estimation_database_configuration' in self.config: db_server = DatabaseServer( self.config['estimation_database_configuration']) if not db_server.has_database( self.config['estimation_database_configuration']. database_name): db_server.create_database( self.config['estimation_database_configuration']. database_name)
class AbstractServiceTests(opus_unittest.OpusTestCase): def setUp(self): self.database_name = 'test_services_database' self.config = TestDatabaseConfiguration(database_name = self.database_name) self.db_server = DatabaseServer(self.config) def tearDown(self): self.db_server.drop_database(self.database_name) self.db_server.close() def test_create_when_already_exists(self): """Shouldn't do anything if the database already exists.""" self.db_server.create_database(self.database_name) db = self.db_server.get_database(self.database_name) self.assertFalse(db.table_exists('run_activity')) self.assertFalse(db.table_exists('computed_indicators')) services = AbstractService(self.config) services.services_db.close() self.assertTrue(db.table_exists('run_activity')) self.assertTrue(db.table_exists('computed_indicators')) def test_create(self): """Should create services tables if the database doesn't exist.""" services = AbstractService(self.config) services.services_db.close() self.assertTrue(self.db_server.has_database(self.database_name)) db = self.db_server.get_database(self.database_name) self.assertTrue(db.table_exists('run_activity')) self.assertTrue(db.table_exists('computed_indicators'))
def create_storage(self): try: server = DatabaseServer(self.server_config) except: logger.log_error( 'Cannot connect to the database server that the services database is hosted on %s.' % self.server_config.database_name) raise if not server.has_database(self.server_config.database_name): server.create_database(self.server_config.database_name) try: services_db = server.get_database(self.server_config.database_name) except: logger.log_error('Cannot connect to a services database on %s.' % server.get_connection_string(scrub=True)) raise metadata.bind = services_db.engine setup_all() create_all() return services_db
def prepare_for_simulation(self, run_configuration, cache_directory=None): self.config = Resources(run_configuration) self.simulation_state = SimulationState( new_instance=True, base_cache_dir=cache_directory, start_time=self.config.get("base_year", 0) ) ### TODO: Get rid of this! There is no good reason to be changing the ### Configuration. if self.config["cache_directory"] is None: self.config["cache_directory"] = self.simulation_state.get_cache_directory() SessionConfiguration( new_instance=True, package_order=self.config["dataset_pool_configuration"].package_order, in_storage=AttributeCache(), ) ForkProcess().fork_new_process( self.config["creating_baseyear_cache_configuration"].cache_scenario_database, self.config ) # Create output database (normally done by run manager) if "estimation_database_configuration" in self.config: db_server = DatabaseServer(self.config["estimation_database_configuration"]) if not db_server.has_database(self.config["estimation_database_configuration"].database_name): db_server.create_database(self.config["estimation_database_configuration"].database_name)
def run_run(self, run_resources, run_name = None, run_as_multiprocess=True, run_in_background=False): """check run hasn't already been marked running log it in to run_activity run simulation mark run as done/failed """ if not self.ready_to_run: raise 'RunManager.setup_new_run must be execute before RunManager.run_run' if run_resources['cache_directory'] != self.current_cache_directory: raise 'The configuration and the RunManager conflict on the proper cache_directory' self.add_row_to_history(self.run_id, run_resources, "started", run_name = run_name) try: # Test pre-conditions model_system_class_path = run_resources.get('model_system', None) if model_system_class_path is None: raise TypeError, ("The configuration must specify model_system, the" " full Opus path to the model system to be used.") # Create baseyear cache self.create_baseyear_cache(run_resources) # Create brand-new output database (deletes any prior contents) if 'estimation_database_configuration' in run_resources: db_server = DatabaseServer(run_resources['estimation_database_configuration']) if not db_server.has_database(run_resources['estimation_database_configuration'].database_name): db_server.create_database(run_resources['estimation_database_configuration'].database_name) # Run simulation exec('from %s import ModelSystem' % model_system_class_path) model_system = ModelSystem() self.model_system = model_system if 'base_year' not in run_resources: run_resources['base_year'] = run_resources['years'][0] - 1 self._create_seed_dictionary(run_resources) # model_system.run_in_same_process(run_resources) if run_as_multiprocess: model_system.run_multiprocess(run_resources) else: model_system.run_in_one_process(run_resources, run_in_background=run_in_background, class_path=model_system_class_path) self.model_system = None except: self.add_row_to_history(self.run_id, run_resources, "failed", run_name = run_name) self.ready_to_run = False raise # This re-raises the last exception else: self.add_row_to_history(self.run_id, run_resources, "done", run_name = run_name) self.ready_to_run = False return self.run_id
def save_results(self, out_storage=None, model_name=None): if self.specification is None or self.coefficients is None: raise ValueError, "model specification or coefficient is None" #invalid = self.coefficients.is_invalid() if False: logger.log_warning('Invalid coefficients. Not saving results!') return if model_name is None: model_name = self.config.get('model_name_for_coefficients', None) if model_name is None: if self.model_name is not None: model_name = self.model_name else: raise ValueError, "model_name unspecified" out_storage_available = True if out_storage: pass elif 'estimation_database_configuration' in self.config: try: db_server = DatabaseServer(self.config['estimation_database_configuration']) database_name = self.config["estimation_database_configuration"].database_name if not db_server.has_database(database_name): db_server.create_database(database_name) output_db = db_server.get_database(database_name) out_storage = StorageFactory().get_storage( type='sql_storage', storage_location=output_db) except: logger.log_warning("Problem with connecting database given by 'estimation_database_configuration'.") out_storage_available = False else: logger.log_warning("No estimation_database_configuration given.") out_storage_available = False # the original model name of development_project_lcm is too long as a mysql db table name, truncate it if model_name.rfind("_development_project_location_choice_model") >=0: model_name = model_name.replace('_project', '') specification_table = '%s_specification' % model_name coefficients_table = '%s_coefficients' % model_name if out_storage_available: logger.start_block("Writing specification and coefficients into storage given by 'estimation_database_configuration'") self.specification.write(out_storage=out_storage, out_table_name=specification_table) self.coefficients.write(out_storage=out_storage, out_table_name=coefficients_table) logger.end_block() logger.start_block("Writing specification and coefficients into %s" % AttributeCache().get_storage_location()) self.specification.write(out_storage=AttributeCache(), out_table_name=specification_table) self.coefficients.write(out_storage=AttributeCache(), out_table_name=coefficients_table) logger.end_block()
def save_results(self, out_storage=None, model_name=None): if self.specification is None or self.coefficients is None: raise ValueError, "model specification or coefficient is None" #invalid = self.coefficients.is_invalid() if False: logger.log_warning('Invalid coefficients. Not saving results!') return if model_name is None: model_name = self.config.get('model_name_for_coefficients', None) if model_name is None: if self.model_name is not None: model_name = self.model_name else: raise ValueError, "model_name unspecified" out_storage_available = True if out_storage: pass elif 'estimation_database_configuration' in self.config: try: db_server = DatabaseServer(self.config['estimation_database_configuration']) database_name = self.config["estimation_database_configuration"].database_name if not db_server.has_database(database_name): db_server.create_database(database_name) output_db = db_server.get_database(database_name) out_storage = StorageFactory().get_storage( type='sql_storage', storage_location=output_db) except: logger.log_warning("Problem with connecting database given by 'estimation_database_configuration'.") out_storage_available = False else: logger.log_warning("No estimation_database_configuration given.") out_storage_available = False # the original model name of development_project_lcm is too long as a mysql db table name, truncate it if model_name.rfind("_development_project_location_choice_model") >=0: model_name = model_name.replace('_project', '') specification_table = '%s_specification' % model_name coefficients_table = '%s_coefficients' % model_name if out_storage_available: logger.start_block("Writing specification and coefficients into storage given by 'estimation_database_configuration'") self.specification.write(out_storage=out_storage, out_table_name=specification_table) self.coefficients.write(out_storage=out_storage, out_table_name=coefficients_table) logger.end_block() logger.start_block("Writing specification and coefficients into %s" % AttributeCache().get_storage_location()) self.specification.write(out_storage=AttributeCache(), out_table_name=specification_table) self.coefficients.write(out_storage=AttributeCache(), out_table_name=coefficients_table) logger.end_block()
def prepare_for_run(self, database_configuration, database_name): ## sql protocol, hostname, username and password are set in ## $OPUS_HOME/settings/database_server_setting.xml db_config = DatabaseConfiguration(database_name=database_name, database_configuration=database_configuration) db_server = DatabaseServer(db_config) if not db_server.has_database(database_name): db_server.create_database(database_name) db = db_server.get_database(database_name) self.out_storage = sql_storage(storage_location=db) return self.out_storage
def __init__(self, indicator_directory, name = None, output_type = None, storage_location = None, output_style = ALL, fixed_field_format = None # Only used with the 'fixed_field' output type ): if output_type == 'sql' and not isinstance(storage_location, DatabaseConfiguration): raise Exception("If Table output_type is 'sql', a Database object must be passed as storage_location.") elif output_type in ['dbf', 'csv', 'tab', 'esri', 'fixed_field'] and \ storage_location is not None and \ not isinstance(storage_location,str): raise Exception("If Table output_type is %s, storage_location must be a path to the output directory"%output_type) elif output_type not in ['dbf', 'csv', 'tab', 'sql', 'esri', 'fixed_field']: raise Exception("Table output_type must be either dbf, csv, tab, sql, esri, fixed_field, not %s"%output_type) if output_type == "fixed_field" and not fixed_field_format: raise ValueError("If Table output_type is 'fixed_field', an XML format string must be passed as fixed_field_format.") self.fixed_field_format = fixed_field_format if output_style not in [Table.ALL, Table.PER_YEAR, Table.PER_ATTRIBUTE]: raise Exception(('%s output_style is not appropriate.'%output_style, 'Choose from Table.ALL, Table.PER_YEAR, ', 'and Table.PER_ATTRIBUTE')) self.output_type = output_type self.output_style = output_style if storage_location is None: storage_location = indicator_directory elif output_type == 'sql': server = DatabaseServer(database_server_configuration = storage_location) if not server.has_database(database_name = storage_location.database_name): server.create_database(database_name = storage_location.database_name) storage_location = server.get_database( database_name = storage_location.database_name) self.storage_location = storage_location self.output_storage = StorageFactory().get_storage( type = '%s_storage'%(self.output_type), storage_location = storage_location ) self.name = name self.indicator_directory = indicator_directory
def create_storage(self): try: server = DatabaseServer(self.server_config) except: logger.log_error('Cannot connect to the database server that the services database is hosted on %s.' % self.server_config.database_name) raise if not server.has_database(self.server_config.database_name): server.create_database(self.server_config.database_name) try: services_db = server.get_database(self.server_config.database_name) except: logger.log_error('Cannot connect to a services database on %s.'% server.get_connection_string(scrub = True)) raise metadata.bind = services_db.engine setup_all() create_all() return services_db
(options, args) = parser.parse_args() cache_path = options.cache_path database_name = options.database_name if database_name is None or cache_path is None: parser.print_help() sys.exit(1) table_name = options.table_name logger.log_status('Initializing database...') db_server = DatabaseServer( EstimationDatabaseConfiguration( database_name=database_name, database_configuration=options.database_configuration)) if not db_server.has_database( database_name): # if only one table should be exported, db_server.create_database(database_name) # the database can exist db = db_server.get_database(database_name) input_storage = flt_storage(storage_location=cache_path) output_storage = sql_storage(storage_location=db) logger.start_block('Exporting cache to sql...') try: if table_name is None: ExportStorage().export(in_storage=input_storage, out_storage=output_storage) else: db.drop_table(table_name)
cache_path = options.cache_path database_name = options.database_name if database_name is None or cache_path is None: parser.print_help() sys.exit(1) table_name = options.table_name logger.log_status('Initializing database...') db_server = DatabaseServer(EstimationDatabaseConfiguration( database_name = database_name, database_configuration = options.database_configuration ) ) if not db_server.has_database(database_name): # if only one table should be exported, db_server.create_database(database_name) # the database can exist db = db_server.get_database(database_name) input_storage = flt_storage(storage_location = cache_path) output_storage = sql_storage( storage_location = db) with logger.block('Exporting cache to sql...'): if table_name is None: ExportStorage().export(in_storage=input_storage, out_storage=output_storage) else: db.drop_table(table_name) ExportStorage().export_dataset(table_name, in_storage=input_storage, out_storage=output_storage)
class AbstractFunctionalTest(object): protocol = '' def setUp(self): self.db_config = TestDatabaseConfiguration(protocol=self.protocol) self.db_config_node = self.db_config._database_configuration_node() self.db_server = DatabaseServer(self.db_config) self.test_db = 'OpusDatabaseTestDatabase' self.export_from_cache_opus_path = "opus_core.tools.do_export_cache_to_sql" self.export_to_cache_opus_path = "opus_core.tools.do_export_sql_to_cache" self.year = 1000 self.temp_dir = tempfile.mkdtemp(prefix='opus_tmp') self.test_data = { self.year: { 'table_a': { 'tablea_id': array([1, 2, 3]), 'tablea_id_name': array(['1', '2', '3']), 'value1': array([1.0, 2.001, 3], dtype='float'), 'value2': array([True, False, False], dtype='i' ), ## sqlit is having problem handling bool type }, 'table_b': { 'tableb_id': array([1, 2, 3]), 'tableb_id_name': array(['one', 'two', 'three']), 'value3': array([1.0, 2.001, 3], dtype='float'), }, }, } cache_creator = CreateTestAttributeCache() cache_creator.create_attribute_cache_with_data(self.temp_dir, self.test_data) def tearDown(self): if self.db_server.has_database(self.test_db): self.db_server.drop_database(self.test_db) self.db_server.close() if os.path.exists(self.temp_dir): rmtree(self.temp_dir) def test_export_all_tables(self): logger.log_status("Test export all tables for %s with %s" % (self.protocol, self.__class__)) optional_args = [ '-c', os.path.join(self.temp_dir, str(self.year)), '-d', self.test_db, '--database_configuration=%s' % self.db_config_node ] self._call_script(self.export_from_cache_opus_path, args=optional_args) self.assertTrue( self.db_server.has_database(database_name=self.test_db)) db = OpusDatabase(database_server_configuration=self.db_config, database_name=self.test_db) table_names = self.test_data[self.year].keys() existing_tables = db.get_tables_in_database() self.assertEqual(set(existing_tables), set(table_names)) ## export data from db to cache export_year = str(self.year + 100) exp_dir = os.path.join(self.temp_dir, export_year) optional_args = [ '-d', self.test_db, '-c', self.temp_dir, '-y', export_year, '--database_configuration=%s' % self.db_config_node ] self._call_script(self.export_to_cache_opus_path, args=optional_args) exported_datasets = [os.path.split(f)[1] for f in glob(exp_dir + '/*')] self.assertEqual(set(exported_datasets), set(table_names)) org_dir = os.path.join(self.temp_dir, str(self.year)) self._two_caches_are_identical(org_dir, exp_dir) db.close() self.db_server.drop_database(self.test_db) rmtree(exp_dir) def test_export_one_table(self): logger.log_status("Test export single table for %s with %s" % (self.protocol, self.__class__)) for table_name in self.test_data[self.year].keys(): self._test_export_one_table(table_name) def _test_export_one_table(self, table_name): optional_args = [ '-c', os.path.join(self.temp_dir, str(self.year)), '-d', self.test_db, '-t', table_name, '--database_configuration=%s' % self.db_config_node ] self._call_script(self.export_from_cache_opus_path, args=optional_args) self.assertTrue( self.db_server.has_database(database_name=self.test_db)) db = OpusDatabase(database_server_configuration=self.db_config, database_name=self.test_db) existing_tables = db.get_tables_in_database() self.assertEqual(set(existing_tables), set([table_name])) export_year = str(self.year + 100) exp_dir = os.path.join(self.temp_dir, export_year) optional_args = [ '-d', self.test_db, '-c', self.temp_dir, '-y', export_year, '-t', table_name, '--database_configuration=%s' % self.db_config_node ] self._call_script(self.export_to_cache_opus_path, args=optional_args) exported_datasets = [ os.path.split(f)[1] for f in glob(os.path.join(self.temp_dir, export_year) + '/*') ] self.assertEqual(set(exported_datasets), set([table_name])) org_dir = os.path.join(self.temp_dir, str(self.year)) self._two_caches_are_identical(org_dir, exp_dir, table_names=[table_name]) db.close() self.db_server.drop_database(self.test_db) rmtree(exp_dir) def _call_script(self, opus_path, args): Popen(" %s %s %s" % (sys.executable, module_path_from_opus_path(opus_path), ' '.join(args)), shell=True).communicate() def _two_caches_are_identical(self, cache_a, cache_b, table_names=None): """ Check to see if two caches contains identical datasets even though their data types can be different """ if table_names is None: table_names = os.listdir(cache_a) for table_name in table_names: field_names_a = glob(os.path.join(cache_a, table_name) + '/*') field_names_b = glob(os.path.join(cache_b, table_name) + '/*') self.assertEqual(len(field_names_a), len(field_names_b)) field_names_a.sort() field_names_b.sort() [ self.assertTrue(cmp(f_a, f_b)) for f_a, f_b in zip(field_names_a, field_names_b) ]
db_server = DatabaseServer(DatabaseConfiguration( database_name = database_name, database_configuration = options.database_configuration ) ) db = db_server.get_database(database_name) directory = db if storage_outtype == 'sql': database_name = output_file db_server = DatabaseServer(DatabaseConfiguration( database_name = database_name, database_configuration = options.database_configuration ) ) if not db_server.has_database(database_name): db_server.create_database(database_name) db = db_server.get_database(database_name) output_file = db input_storage = StorageFactory().get_storage('%s_storage' % storage_intype, storage_location = directory) output_storage = StorageFactory().get_storage('%s_storage' % storage_outtype, storage_location = output_file) if storage_outtype in create_output_directory and not os.path.exists(output_file): os.makedirs(output_file) logger.start_block("Converting table '%s' from %s into %s ..." % (table_name, storage_intype, storage_outtype)) kwargs = {'nchunks': nchunks} for arg in arg_list.get(storage_outtype, []): kwargs[arg] = getattr(options, arg, None) try:
if storage_intype == 'sql': database_name = directory db_server = DatabaseServer( DatabaseConfiguration( database_name=database_name, database_configuration=options.database_configuration)) db = db_server.get_database(database_name) directory = db if storage_outtype == 'sql': database_name = output_file db_server = DatabaseServer( DatabaseConfiguration( database_name=database_name, database_configuration=options.database_configuration)) if not db_server.has_database(database_name): db_server.create_database(database_name) db = db_server.get_database(database_name) output_file = db input_storage = StorageFactory().get_storage('%s_storage' % storage_intype, storage_location=directory) output_storage = StorageFactory().get_storage('%s_storage' % storage_outtype, storage_location=output_file) if storage_outtype in create_output_directory and not os.path.exists( output_file): os.makedirs(output_file) logger.start_block("Converting table '%s' from %s into %s ..." %
def test__output_types(self): output_types = ['csv', 'tab', 'fixed_field'] try: import dbfpy except ImportError: pass else: output_types.append('dbf') try: test_db_name = 'test_db_for_indicator_framework' database_config = DatabaseConfiguration( database_name=test_db_name, test=True, ) server = DatabaseServer(database_config) server.drop_database(database_name=test_db_name) server.create_database(database_name=test_db_name) except: has_sql = False else: has_sql = True output_types.append('sql') indicator = Indicator(dataset_name='test', attribute='opus_core.test.attribute') maker = Maker(project_name='test', test=True) computed_indicators = maker.create_batch( indicators={'attr1': indicator}, source_data=self.source_data) for output_type in output_types: kwargs = {} if output_type == 'sql': kwargs['storage_location'] = database_config elif output_type == 'fixed_field': kwargs[ 'fixed_field_format'] = '<fixed_field><field name="attribute_1980" format="10f" /></fixed_field>' table = Table( indicator_directory=self.source_data.get_indicator_directory(), output_type=output_type, **kwargs) table._create_input_stores(self.source_data.years) viz_result = table.visualize( indicators_to_visualize=['attr1'], computed_indicators=computed_indicators)[0] if output_type in ['csv', 'dbf', 'tab', 'fixed_field']: self.assertTrue( os.path.exists( os.path.join( viz_result.storage_location, viz_result.table_name + '.' + viz_result.file_extension))) elif output_type == 'sql': self.assertTrue(server.has_database(test_db_name)) db = server.get_database(test_db_name) self.assertTrue( db.table_exists(table_name=viz_result.table_name)) if has_sql: server.drop_database(database_name=test_db_name)
def __init__( self, indicator_directory, name=None, output_type=None, storage_location=None, output_style=ALL, fixed_field_format=None # Only used with the 'fixed_field' output type ): if output_type == 'sql' and not isinstance(storage_location, DatabaseConfiguration): raise Exception( "If Table output_type is 'sql', a Database object must be passed as storage_location." ) elif output_type in ['dbf', 'csv', 'tab', 'esri', 'fixed_field'] and \ storage_location is not None and \ not isinstance(storage_location,str): raise Exception( "If Table output_type is %s, storage_location must be a path to the output directory" % output_type) elif output_type not in [ 'dbf', 'csv', 'tab', 'sql', 'esri', 'fixed_field' ]: raise Exception( "Table output_type must be either dbf, csv, tab, sql, esri, fixed_field, not %s" % output_type) if output_type == "fixed_field" and not fixed_field_format: raise ValueError( "If Table output_type is 'fixed_field', an XML format string must be passed as fixed_field_format." ) self.fixed_field_format = fixed_field_format if output_style not in [ Table.ALL, Table.PER_YEAR, Table.PER_ATTRIBUTE ]: raise Exception( ('%s output_style is not appropriate.' % output_style, 'Choose from Table.ALL, Table.PER_YEAR, ', 'and Table.PER_ATTRIBUTE')) self.output_type = output_type self.output_style = output_style if storage_location is None: storage_location = indicator_directory elif output_type == 'sql': server = DatabaseServer( database_server_configuration=storage_location) if not server.has_database( database_name=storage_location.database_name): server.create_database( database_name=storage_location.database_name) storage_location = server.get_database( database_name=storage_location.database_name) self.storage_location = storage_location self.output_storage = StorageFactory().get_storage( type='%s_storage' % (self.output_type), storage_location=storage_location) self.name = name self.indicator_directory = indicator_directory
def __init__( self, indicator_directory, name=None, output_type=None, storage_location=None, output_style=ALL, fixed_field_format=None, # Only used with the 'fixed_field' output type **kwargs): if output_type == 'sql' and not isinstance(storage_location, DatabaseConfiguration): raise Exception( "If Table output_type is 'sql', a Database object must be passed as storage_location." ) elif output_type in ['dbf', 'csv', 'tab', 'esri', 'fixed_field', 'xls'] and \ storage_location is not None and \ not isinstance(storage_location,str): raise Exception( "If Table output_type is %s, storage_location must be a path to the output directory" % output_type) elif output_type not in [ 'dbf', 'csv', 'tab', 'sql', 'esri', 'fixed_field', 'xls' ]: raise Exception( "Table output_type must be either dbf, csv, tab, sql, esri, fixed_field, xls, not %s" % output_type) if output_type == "fixed_field" and not fixed_field_format: raise ValueError( "If Table output_type is 'fixed_field', an XML format string must be passed as fixed_field_format." ) self.fixed_field_format = fixed_field_format if output_style not in [ Table.ALL, Table.PER_YEAR, Table.PER_ATTRIBUTE ]: raise Exception( ('%s output_style is not appropriate.' % output_style, 'Choose from Table.ALL, Table.PER_YEAR, ', 'and Table.PER_ATTRIBUTE')) self.output_type = output_type self.output_style = output_style if storage_location is None: storage_location = indicator_directory elif output_type == 'sql': server = DatabaseServer( database_server_configuration=storage_location) if not server.has_database( database_name=storage_location.database_name): server.create_database( database_name=storage_location.database_name) storage_location = server.get_database( database_name=storage_location.database_name) elif output_type == 'xls': storage_location = os.path.join(indicator_directory, storage_location) # We want clean output. So remove the file if it exists if os.path.exists(storage_location): os.remove(storage_location) self.storage_location = storage_location self.output_storage = StorageFactory().get_storage( type='%s_storage' % (self.output_type), storage_location=storage_location) self.name = name self.indicator_directory = indicator_directory #checking for new append_col_type argument if kwargs: try: self.append_col_type = kwargs['append_col_type'] except: self.append_col_type = 'True' else: self.append_col_type = False
def run_run( self, run_resources, run_name=None, scenario_name=None, run_as_multiprocess=True, run_in_background=False ): """check run hasn't already been marked running log it in to run_activity run simulation mark run as done/failed """ if not self.ready_to_run: raise "RunManager.setup_new_run must be execute before RunManager.run_run" if run_resources["cache_directory"] != self.current_cache_directory: raise "The configuration and the RunManager conflict on the proper cache_directory" run_resources["run_id"] = self.run_id if scenario_name is not None: run_resources["scenario_name"] = scenario_name self.add_row_to_history(self.run_id, run_resources, "started", run_name=run_name, scenario_name=scenario_name) try: # Test pre-conditions model_system_class_path = run_resources.get("model_system", None) if model_system_class_path is None: raise TypeError, ( "The configuration must specify model_system, the" " full Opus path to the model system to be used." ) # Create baseyear cache self.create_baseyear_cache(run_resources) # Create brand-new output database (deletes any prior contents) if "estimation_database_configuration" in run_resources: db_server = DatabaseServer(run_resources["estimation_database_configuration"]) if not db_server.has_database(run_resources["estimation_database_configuration"].database_name): db_server.create_database(run_resources["estimation_database_configuration"].database_name) # Run simulation exec("from %s import ModelSystem" % model_system_class_path) model_system = ModelSystem() self.model_system = model_system if "base_year" not in run_resources: run_resources["base_year"] = run_resources["years"][0] - 1 base_year = run_resources["base_year"] ## create a large enough seed_array so that a restarted run ## can still have seed when running pass the original end_year ## the size needed to store seed_dict of 100 seeds is about 12568 Bytes self._create_seed_dictionary(run_resources, start_year=base_year, end_year=base_year + 100) if "run_in_same_process" in run_resources and run_resources["run_in_same_process"]: model_system.run_in_same_process(run_resources) elif run_as_multiprocess: model_system.run_multiprocess(run_resources) else: model_system.run_in_one_process( run_resources, run_in_background=run_in_background, class_path=model_system_class_path ) self.model_system = None except: self.add_row_to_history( self.run_id, run_resources, "failed", run_name=run_name, scenario_name=scenario_name ) self.ready_to_run = False raise # This re-raises the last exception else: self.add_row_to_history(self.run_id, run_resources, "done", run_name=run_name, scenario_name=scenario_name) self.ready_to_run = False return self.run_id
class AbstractFunctionalTest(object): protocol = '' def setUp(self): self.db_config = TestDatabaseConfiguration(protocol = self.protocol) self.db_config_node = self.db_config._database_configuration_node() self.db_server = DatabaseServer(self.db_config) self.test_db = 'OpusDatabaseTestDatabase' self.export_from_cache_opus_path = "opus_core.tools.do_export_cache_to_sql" self.export_to_cache_opus_path = "opus_core.tools.do_export_sql_to_cache" self.year = 1000 self.temp_dir = tempfile.mkdtemp(prefix='opus_tmp') self.test_data = { self.year:{ 'table_a':{ 'tablea_id':array([1,2,3]), 'tablea_id_name': array(['1','2','3']), 'value1': array([1.0, 2.001, 3], dtype='float'), 'value2': array([True, False, False], dtype='i'), ## sqlit is having problem handling bool type }, 'table_b':{ 'tableb_id':array([1,2,3]), 'tableb_id_name': array(['one','two','three']), 'value3': array([1.0, 2.001, 3], dtype='float'), }, }, } cache_creator = CreateTestAttributeCache() cache_creator.create_attribute_cache_with_data(self.temp_dir, self.test_data) def tearDown(self): if self.db_server.has_database(self.test_db): self.db_server.drop_database(self.test_db) self.db_server.close() if os.path.exists(self.temp_dir): rmtree(self.temp_dir) def test_export_all_tables(self): logger.log_status("Test export all tables for %s with %s" % (self.protocol, self.__class__)) optional_args = ['-c', os.path.join(self.temp_dir, str(self.year)), '-d', self.test_db, '--database_configuration=%s' % self.db_config_node ] self._call_script(self.export_from_cache_opus_path, args = optional_args) self.assertTrue(self.db_server.has_database(database_name = self.test_db)) db = OpusDatabase(database_server_configuration = self.db_config, database_name = self.test_db) table_names = self.test_data[self.year].keys() existing_tables = db.get_tables_in_database() self.assertEqual( set(existing_tables), set(table_names) ) ## export data from db to cache export_year = str(self.year + 100) exp_dir = os.path.join(self.temp_dir, export_year) optional_args = ['-d', self.test_db, '-c', self.temp_dir, '-y', export_year, '--database_configuration=%s' % self.db_config_node ] self._call_script(self.export_to_cache_opus_path, args = optional_args) exported_datasets = [os.path.split(f)[1] for f in glob(exp_dir + '/*')] self.assertEqual( set(exported_datasets), set(table_names)) org_dir = os.path.join(self.temp_dir, str(self.year)) self._two_caches_are_identical(org_dir, exp_dir) db.close() self.db_server.drop_database(self.test_db) rmtree(exp_dir) def test_export_one_table(self): logger.log_status("Test export single table for %s with %s" % (self.protocol, self.__class__)) for table_name in self.test_data[self.year].keys(): self._test_export_one_table(table_name) def _test_export_one_table(self, table_name): optional_args = ['-c', os.path.join(self.temp_dir, str(self.year)), '-d', self.test_db, '-t', table_name, '--database_configuration=%s' % self.db_config_node ] self._call_script(self.export_from_cache_opus_path, args = optional_args) self.assertTrue(self.db_server.has_database(database_name = self.test_db)) db = OpusDatabase(database_server_configuration = self.db_config, database_name = self.test_db) existing_tables = db.get_tables_in_database() self.assertEqual( set(existing_tables), set([table_name]) ) export_year = str(self.year + 100) exp_dir = os.path.join(self.temp_dir, export_year) optional_args = ['-d', self.test_db, '-c', self.temp_dir, '-y', export_year, '-t', table_name, '--database_configuration=%s' % self.db_config_node ] self._call_script(self.export_to_cache_opus_path, args = optional_args) exported_datasets = [os.path.split(f)[1] for f in glob(os.path.join(self.temp_dir, export_year) + '/*')] self.assertEqual( set(exported_datasets), set([table_name])) org_dir = os.path.join(self.temp_dir, str(self.year)) self._two_caches_are_identical(org_dir, exp_dir, table_names=[table_name]) db.close() self.db_server.drop_database(self.test_db) rmtree(exp_dir) def _call_script(self, opus_path, args): Popen( " %s %s %s" % (sys.executable, module_path_from_opus_path(opus_path), ' '.join(args)), shell = True ).communicate() def _two_caches_are_identical(self, cache_a, cache_b, table_names=None): """ Check to see if two caches contains identical datasets even though their data types can be different """ if table_names is None: table_names = os.listdir(cache_a) for table_name in table_names: field_names_a = glob(os.path.join(cache_a, table_name) + '/*') field_names_b = glob(os.path.join(cache_b, table_name) + '/*') self.assertEqual(len(field_names_a), len(field_names_b)) field_names_a.sort(); field_names_b.sort() [self.assertTrue(cmp(f_a, f_b)) for f_a, f_b in zip(field_names_a, field_names_b)]
def run(self, config, year, storage_type='sql'): """ """ tm_config = config['travel_model_configuration'] database_server_config = tm_config.get("database_server_configuration", 'simtravel_database_server') database_name = tm_config.get("database_name", 'mag_zone_baseyear') cache_directory = config['cache_directory'] simulation_state = SimulationState() simulation_state.set_cache_directory(cache_directory) simulation_state.set_current_time(year) attribute_cache = AttributeCache() dataset_pool = SessionConfiguration(new_instance=True, package_order=config['dataset_pool_configuration'].package_order, in_storage=attribute_cache).get_dataset_pool() if storage_type == 'sql': db_server = DatabaseServer(DatabaseConfiguration( database_name = database_name, database_configuration = database_server_config ) ) if not db_server.has_database(database_name): print "Db doesn't exist creating one" db_server.create_database(database_name) db = db_server.get_database(database_name) output_storage = sql_storage(storage_location = db) elif storage_type == 'csv': csv_directory = os.path.join(cache_directory, 'csv', str(year)) output_storage = csv_storage(storage_location=csv_directory) else: raise ValueError, "Unsupported output storage type {}".format(storage_type) logger.start_block('Compute and export data to openAMOS...') hh = dataset_pool.get_dataset('household') hh_recs = dataset_pool.get_dataset('households_recs') #hh_recs.add_attribute(0,"htaz1") #hh_recs.flush_dataset() #syn_hh = dataset_pool.get_dataset('synthetic_household') hh_variables = ['houseid=household.household_id', "hhsize=household.number_of_agents(person)", "one=(household.household_id>0).astype('i')", "inclt35k=(household.income<35000).astype('i')", "incge35k=(household.income>=35000).astype('i')", "incge50k=(household.income>=50000).astype('i')", "incge75k=(household.income>=75000).astype('i')", "incge100k=(household.income>=100000).astype('i')", "inc35t50=((household.income>=35000) & (household.income<50000)).astype('i')", "inc50t75=((household.income>=50000) & (household.income<75000)).astype('i')", "inc75t100=((household.income>=75000) & (household.income<100000)).astype('i')", 'htaz = ((houseid>0)*(household.disaggregate(building.zone_id)-100) + (houseid<=0)*0)', #'htaz = ((houseid>0) & (htaz1>100))*(htaz1-100)+((houseid>0) & (htaz1==-1))*1122', "withchild = (household.aggregate(person.age<18)>0).astype('i')", "noc = household.aggregate(person.age<18)", "numadlt = household.aggregate(person.age>=18)", "hinc=household.income", "wif=household.workers", #"wif=household.aggregate(mag_zone.person.is_employed)", 'numwrkr=household.workers', #'numwrkr=household.aggregate(mag_zone.person.is_employed)', 'nwrkcnt=household.number_of_agents(person) - household.workers', #'nwrkcnt=household.number_of_agents(person) - household.aggregate(mag_zone.person.is_employed)', 'yrbuilt=mag_zone.household.yrbuilt', 'mag_zone.household.sparent', 'mag_zone.household.rur', 'mag_zone.household.urb', 'zonetid = household.disaggregate(building.zone_id)', ] self.prepare_attributes(hh, hh_variables) attrs_to_export = hh_recs.get_known_attribute_names() self.write_dataset(hh, attrs_to_export, output_storage) dataset_pool._remove_dataset(hh.dataset_name) persons = dataset_pool.get_dataset('person') persons.out_table_name_default = 'persons' # Recoding invalid work and school locations to some random valid values persons_recs = dataset_pool.get_dataset('persons_recs') persons_recs.add_attribute(persons['person_id'],"personuniqueid") persons_recs.add_attribute(persons['marriage_status'],"marstat") persons_recs.add_attribute(persons['student_status'],"schstat") """ persons_recs.add_attribute(persons['wtaz0'],"htaz_act") persons_recs.add_attribute(0,"wtaz_rec") persons_recs.add_attribute(0,"wtaz_rec1") persons_recs.add_attribute(0,"wtaz_rec2") persons_recs.add_attribute(0,"wtaz1_1") persons_recs.add_attribute(0,"wtaz1_2") persons_recs.add_attribute(0,"wtaz1_3") #persons_recs.add_attribute(persons['student_status'],"schstat") """ persons_recs.add_attribute(0,"wtaz1") persons_recs.add_attribute(0,"htaz") persons_recs.add_attribute(0,"schtaz1") persons_recs.flush_dataset() #syn_persons = dataset_pool.get_dataset('synthetic_person') persons_variables = ['personid=mag_zone.person.member_id', 'personuniqueid=person.person_id', 'houseid=person.household_id', "one=(person.person_id>0).astype('i')", 'trvtime=mag_zone.person.travel_time_from_home_to_work', 'timetowk=mag_zone.person.travel_time_from_home_to_work', #'mag_zone.person.tmtowrk', #'tmtowrk=person.disaggregate(synthetic_person.tmtowrk)', "ag5t10=((person.age>=5) & (person.age<=10)).astype('i')", "ag11t14=((person.age>=11) & (person.age<=14)).astype('i')", "ag15t17=((person.age>=15) & (person.age<=17)).astype('i')", "ag18t24=((person.age>=18) & (person.age<=24)).astype('i')", "ag25t34=((person.age>=25) & (person.age<=34)).astype('i')", "ag35t44=((person.age>=35) & (person.age<=44)).astype('i')", "ag45t54=((person.age>=45) & (person.age<=54)).astype('i')", "ag55t64=((person.age>=55) & (person.age<=64)).astype('i')", "agge65=(person.age>=65).astype('i')", "ag12t17=((person.age>=12) & (person.age<=17)).astype('i')", "ag5t14=((person.age>=5) & (person.age<=14)).astype('i')", "agge15=(person.age>=15).astype('i')", "wrkr=(person.employment_status==1).astype('i')", "isemploy=(person.employment_status==1).astype('i')", "fulltim=(mag_zone.person.full_time==1).astype('i')", 'parttim=mag_zone.person.part_time', 'htaz = ((houseid>0)*(person.disaggregate(building.zone_id, intermediates=[household])-100) + (houseid<=0)*0)', 'wtaz1=(person.wtaz <= 0)*0 + (person.wtaz > 0)*(person.wtaz-100)', "presch = ((person.age < 5)&(houseid>0)).astype('i')", "mag_zone.person.schstat", 'schtaz1 = (person.schtaz <= 0)*0 + (person.schtaz > 0)*(person.schtaz-100)', 'marstat = person.marriage_status', 'enroll = person.student_status', 'grade = person.student_status & person.education', 'educ = person.education', "male = (person.sex==1).astype('i')", "female = (person.sex==2).astype('i')", "coled = (person.education >= 10).astype('i')", 'race1 = person.race', "white = (person.race == 1).astype('i')", 'person.hispanic' ] self.prepare_attributes(persons, persons_variables) attrs_to_export = persons_recs.get_known_attribute_names() self.write_dataset(persons, attrs_to_export, output_storage) dataset_pool._remove_dataset(persons.dataset_name) zones = dataset_pool.get_dataset('zone') zones_variables = [ "retail_employment=zone.aggregate(mag_zone.job.sector_group=='retail')", "public_employment=zone.aggregate(mag_zone.job.sector_group=='public')", "office_employment=zone.aggregate(mag_zone.job.sector_group=='office')", "industrial_employment=zone.aggregate(mag_zone.job.sector_group=='individual')", "other_employment=zone.aggregate(mag_zone.job.sector_group=='other')", "retail_employment_density=zone.aggregate(mag_zone.job.sector_group=='retail')/zone.acres", "public_employment_density=zone.aggregate(mag_zone.job.sector_group=='public')/zone.acres", "office_employment_density=zone.aggregate(mag_zone.job.sector_group=='office')/zone.acres", "industrial_employment_density=zone.aggregate(mag_zone.job.sector_group=='individual')/zone.acres", "other_employment_density=zone.aggregate(mag_zone.job.sector_group=='other')/zone.acres", "total_area=zone.acres", "lowest_income=zone.aggregate(household.income < scoreatpercentile(household.income, 20))", "low_income=zone.aggregate(household.income < scoreatpercentile(household.income, 40))", "high_income=zone.aggregate(household.income > scoreatpercentile(household.income, 80))", #"institutional_population=zone.disaggregate(locations.institutional_population)", #"groupquarter_households=zone.disaggregate(locations.groupquarter_households)", "residential_households=zone.number_of_agents(household)", "locationid=zone.zone_id", ] locations = dataset_pool['locations'] self.prepare_attributes(zones, zones_variables, dataset2=locations) attrs_to_export = locations.get_known_attribute_names() self.write_dataset(locations, attrs_to_export, output_storage) dataset_pool._remove_dataset(locations.dataset_name) #raw_input("check location block") logger.end_block()
def __init__(self, indicator_directory, name = None, output_type = None, storage_location = None, output_style = ALL, fixed_field_format = None, # Only used with the 'fixed_field' output type **kwargs ): if output_type == 'sql' and not isinstance(storage_location, DatabaseConfiguration): raise Exception("If Table output_type is 'sql', a Database object must be passed as storage_location.") elif output_type in ['dbf', 'csv', 'tab', 'esri', 'fixed_field', 'xls'] and \ storage_location is not None and \ not isinstance(storage_location,str): raise Exception("If Table output_type is %s, storage_location must be a path to the output directory"%output_type) elif output_type not in ['dbf', 'csv', 'tab', 'sql', 'esri', 'fixed_field', 'xls']: raise Exception("Table output_type must be either dbf, csv, tab, sql, esri, fixed_field, xls, not %s"%output_type) if output_type == "fixed_field" and not fixed_field_format: raise ValueError("If Table output_type is 'fixed_field', an XML format string must be passed as fixed_field_format.") self.fixed_field_format = fixed_field_format if output_style not in [Table.ALL, Table.PER_YEAR, Table.PER_ATTRIBUTE]: raise Exception(('%s output_style is not appropriate.'%output_style, 'Choose from Table.ALL, Table.PER_YEAR, ', 'and Table.PER_ATTRIBUTE')) self.output_type = output_type self.output_style = output_style if storage_location is None: storage_location = indicator_directory elif output_type == 'sql': server = DatabaseServer(database_server_configuration = storage_location) if not server.has_database(database_name = storage_location.database_name): server.create_database(database_name = storage_location.database_name) storage_location = server.get_database( database_name = storage_location.database_name) elif output_type == 'xls': storage_location = os.path.join(indicator_directory, storage_location) # We want clean output. So remove the file if it exists if os.path.exists(storage_location): os.remove(storage_location) self.storage_location = storage_location self.output_storage = StorageFactory().get_storage( type = '%s_storage'%(self.output_type), storage_location = storage_location ) self.name = name self.indicator_directory = indicator_directory #checking for new append_col_type argument if kwargs: try: self.append_col_type = kwargs['append_col_type'] except: self.append_col_type = 'True' else: self.append_col_type = False
def test__output_types(self): output_types = ['csv','tab','fixed_field'] try: import dbfpy except ImportError: pass else: output_types.append('dbf') try: test_db_name = 'test_db_for_indicator_framework' database_config = DatabaseConfiguration( database_name = test_db_name, test = True, ) server = DatabaseServer(database_config) server.drop_database(database_name = test_db_name) server.create_database(database_name = test_db_name) except: has_sql = False else: has_sql = True output_types.append('sql') indicator = Indicator( dataset_name = 'test', attribute = 'opus_core.test.attribute' ) maker = Maker(project_name = 'test', test = True) computed_indicators = maker.create_batch( indicators = {'attr1':indicator}, source_data = self.source_data) for output_type in output_types: kwargs = {} if output_type == 'sql': kwargs['storage_location'] = database_config elif output_type == 'fixed_field': kwargs['fixed_field_format'] = '<fixed_field><field name="attribute_1980" format="10f" /></fixed_field>' table = Table( indicator_directory = self.source_data.get_indicator_directory(), output_type = output_type, **kwargs) table._create_input_stores(self.source_data.years) viz_result = table.visualize( indicators_to_visualize = ['attr1'], computed_indicators = computed_indicators)[0] if output_type in ['csv','dbf','tab','fixed_field']: self.assertTrue(os.path.exists( os.path.join(viz_result.storage_location, viz_result.table_name + '.' + viz_result.file_extension))) elif output_type == 'sql': self.assertTrue(server.has_database(test_db_name)) db = server.get_database(test_db_name) self.assertTrue(db.table_exists(table_name = viz_result.table_name)) if has_sql: server.drop_database(database_name = test_db_name)