def test_read_db_gz(self): road_test_scope_file = emat.package_file('model', 'tests', 'road_test.yaml') with pytest.raises(FileNotFoundError): emat.Scope(emat.package_file('nope.yaml')) s = emat.Scope(road_test_scope_file) with pytest.raises(FileNotFoundError): emat.SQLiteDB(emat.package_file('nope.db.gz')) db = emat.SQLiteDB(emat.package_file("examples", "roadtest.db.gz")) assert repr(db) == '<emat.SQLiteDB with scope "EMAT Road Test">' assert db.get_db_info()[:9] == 'SQLite @ ' assert db.get_db_info()[-11:] == 'roadtest.db' assert db.read_scope_names() == ['EMAT Road Test'] s1 = db.read_scope('EMAT Road Test') assert type(s1) == type(s) for k in ('_x_list', '_l_list', '_c_list', '_m_list', 'name', 'desc'): assert getattr(s, k) == getattr(s1, k), k assert s == s1 experiments = db.read_experiment_all('EMAT Road Test', 'lhs') assert experiments.shape == (110, 20) assert list(experiments.columns) == [ 'free_flow_time', 'initial_capacity', 'alpha', 'beta', 'input_flow', 'value_of_time', 'unit_cost_expansion', 'interest_rate', 'yield_curve', 'expand_capacity', 'amortization_period', 'debt_type', 'interest_rate_lock', 'no_build_travel_time', 'build_travel_time', 'time_savings', 'value_of_time_savings', 'net_benefits', 'cost_of_capacity_expansion', 'present_cost_expansion', ] from emat.model.core_python import Road_Capacity_Investment m = emat.PythonCoreModel(Road_Capacity_Investment, scope=s, db=db) assert m.metamodel_id == None
def test_feature_scoring_with_nan(): road_scope = emat.Scope(emat.package_file('model','tests','road_test_bogus.yaml')) road_test = PythonCoreModel(_Road_Capacity_Investment_with_Bogus_Output, scope=road_scope) road_test_design = road_test.design_experiments(n_samples=5000, sampler='lhs') road_test_results = road_test.run_experiments(design=road_test_design) fs = feature_scores(road_scope, road_test_results, random_state=234) assert isinstance(fs, pd.io.formats.style.Styler) stable_df("./road_test_feature_scores_bogus_1.pkl.gz", fs.data)
def test_database_scope_updating(): scope = emat.Scope("fake_filename.yaml", scope_yaml) db = emat.SQLiteDB() db.store_scope(scope) assert db.read_scope(scope.name) == scope scope.add_measure("plus1") db.update_scope(scope) assert db.read_scope(scope.name) == scope assert len(scope.get_measures()) == 5 scope.add_measure("plus2", db=db) assert db.read_scope(scope.name) == scope assert len(scope.get_measures()) == 6
def test_multiple_connections(): import tempfile with tempfile.TemporaryDirectory() as tempdir: tempdbfile = os.path.join(tempdir, "test_db_file.db") db_test = SQLiteDB(tempdbfile, initialize=True) road_test_scope_file = emat.package_file("model", "tests", "road_test.yaml") s = emat.Scope(road_test_scope_file) db_test.store_scope(s) assert db_test.read_scope_names() == ["EMAT Road Test"] db_test2 = SQLiteDB(tempdbfile, initialize=False) with pytest.raises(KeyError): db_test2.store_scope(s) # Neither database is in a transaction assert not db_test.conn.in_transaction assert not db_test2.conn.in_transaction from emat.model.core_python import Road_Capacity_Investment m1 = emat.PythonCoreModel(Road_Capacity_Investment, scope=s, db=db_test) m2 = emat.PythonCoreModel(Road_Capacity_Investment, scope=s, db=db_test2) d1 = m1.design_experiments(n_samples=3, random_seed=1, design_name="d1") d2 = m2.design_experiments(n_samples=3, random_seed=2, design_name="d2") r1 = m1.run_experiments(design_name="d1") r2 = m2.run_experiments(design_name="d2") # Check each model can load the other's results pd.testing.assert_frame_equal( r1, m2.db.read_experiment_all(scope_name=s.name, design_name="d1", ensure_dtypes=True)[r1.columns], ) pd.testing.assert_frame_equal( r2, m1.db.read_experiment_all(scope_name=s.name, design_name="d2", ensure_dtypes=True)[r2.columns], )
def test_feature_scoring_and_prim(): road_scope = emat.Scope(emat.package_file('model','tests','road_test.yaml')) road_test = PythonCoreModel(Road_Capacity_Investment, scope=road_scope) road_test_design = road_test.design_experiments(n_samples=5000, sampler='lhs') road_test_results = road_test.run_experiments(design=road_test_design) fs = feature_scores(road_scope, road_test_results, random_state=123) assert isinstance(fs, pd.io.formats.style.Styler) stable_df("./road_test_feature_scores_1.pkl.gz", fs.data) prim1 = road_test_results.prim(target="net_benefits >= 0") pbox1 = prim1.find_box() assert pbox1._cur_box == 64 ts1 = prim1.tradeoff_selector() assert len(ts1.data) == 1 assert ts1.data[0]['x'] == approx(np.asarray([ 1., 1., 1., 1., 1., 0.99928315, 0.99856631, 0.99784946, 0.99569892, 0.99283154, 0.98924731, 0.98351254, 0.97921147, 0.97491039, 0.96702509, 0.95555556, 0.94982079, 0.94336918, 0.92903226, 0.91182796, 0.89749104, 0.87598566, 0.85304659, 0.83942652, 0.83225806, 0.82078853, 0.79713262, 0.77706093, 0.76415771, 0.75483871, 0.74480287, 0.73261649, 0.71827957, 0.70394265, 0.68100358, 0.65663082, 0.63225806, 0.61003584, 0.59569892, 0.57992832, 0.55770609, 0.54193548, 0.52759857, 0.51111111, 0.49892473, 0.48960573, 0.4781362, 0.45878136, 0.44229391, 0.42365591, 0.409319, 0.39498208, 0.38064516, 0.36487455, 0.34767025, 0.33261649, 0.31756272, 0.30322581, 0.28888889, 0.27741935, 0.26379928, 0.25089606, 0.23942652, 0.22795699, 0.2172043])) pbox1.select(40) assert pbox1._cur_box == 40 assert ts1.data[0]['marker']['symbol'] == approx(np.asarray([ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])) ebox1_40 = pbox1.to_emat_box() assert ebox1_40.coverage == approx(0.5577060931899641) assert ebox1_40.density == approx(0.8356605800214822) assert ebox1_40.mass == approx(0.1862) from emat import Bounds assert ebox1_40.thresholds == {'beta': Bounds(lowerbound=3.597806324946271, upperbound=None), 'input_flow': Bounds(lowerbound=125, upperbound=None), 'value_of_time': Bounds(lowerbound=0.07705746291056698, upperbound=None), 'expand_capacity': Bounds(lowerbound=None, upperbound=95.01870815358643)} pbox1.splom() pbox1.hmm()
def db_setup(): db_test = SQLiteDB(config.get("test_db_filename", ":memory:"), initialize=True) # load experiment variables and performance measures scp_xl = [("constant", "constant"), ("exp_var1", "risk"), ("exp_var2", "strategy")] scp_m = [("pm_1", "none"), ("pm_2", "ln")] db_test.init_xlm(scp_xl, scp_m) # create emat scope scope_name = "test" sheet = "emat_scope1.yaml" ex_xl = ["constant", "exp_var1", "exp_var2"] ex_m = ["pm_1", "pm_2"] db_test.delete_scope(scope_name) scope = emat.Scope(sheet, scope_yaml) db_test._write_scope( scope_name, sheet, ex_xl, ex_m, scope, ) yield Bunch( db_test=db_test, scope_name=scope_name, sheet=sheet, scp_xl=scp_xl, scp_m=scp_m, ex_m=ex_m, ex_xl=ex_xl, ) db_test.delete_scope(scope_name)
# %% [markdown] # # Interactive Explorer # # TMIP-EMAT includes an interactive visualizer, inspired by a # [similar tool](https://htmlpreview.github.io/?https://github.com/VisionEval/VisionEval/blob/master/sources/VEScenarioViewer/verpat.html) # provided with the [VisionEval](https://visioneval.org) package. # To demonstrate the interactive visualizer, we will use the Road Test example model. # First, we need to develop and run a design of experiments to have some # data to explore. We'll run 5,000 experiments to get a good size sample of # data points to visualize. # %% import emat.examples scope = emat.Scope("notebooks/scope.yaml") results = pd.read_csv("notebooks/results.csv") # %% [markdown] # One feature of the visualizer is the ability to display not only a number of results, # but also to contrast those results against a given "reference" model that represents # a more traditional single-point forecast of inputs and results. We'll prepare a # reference point here using the `run_reference_experiment` method of the `CoreModel` # class, which reads the input parameter defaults (as defined in the scope), # and returns both inputs and outputs in a DataFrame (essentially, an experimental # design with only a single experiment), suitable for use as the reference point marker in our # visualizations. # %% # %% [markdown]
# To demonstrate the feature scoring, we can define a scope to explore this # demo model: # %% demo_scope = emat.Scope(scope_file='', scope_def="""--- scope: name: demo inputs: A: ptype: exogenous uncertainty dtype: float min: 0 max: 1 B: ptype: exogenous uncertainty dtype: float min: 0 max: 1 C: ptype: exogenous uncertainty dtype: float min: 0 max: 1 outputs: Y: kind: info """) # %% [markdown] # And then we will design and run some experiments to generate data used for # feature scoring.
s.dump(filename="road_test_scope.yaml") # %% show_dir('.') # %% [markdown] # ### Reading In Raw Data # # Now, we're ready to begin anew, constructing a fresh database from scratch, # using only the raw formatted files. # # First, let's load our scope from the yaml file, and initialize a clean database # using that scope. # %% s2 = emat.Scope("road_test_scope.yaml") # %% db2 = emat.SQLiteDB("road_test_2.sqldb") # %% db2.store_scope(s2) # %% [markdown] # Just as we used pandas to save out our consolidated DataFrame of experimental results, # we can use it to read in a consolidated table of experiments. # %% df2 = pd.read_csv("road_test_1.csv.gz", index_col='experiment') df2
# %% [markdown] # ## Defining the Exploratory Scope # %% [raw] {"raw_mimetype": "text/restructuredtext"} # The model scope is defined in a YAML file. For this Road Test example, the scope file is named # :ref:`road_test.yaml <road_test_scope_file>` and is included in the model/tests directory. # %% road_test_scope_file = emat.package_file('model','tests','road_test.yaml') # %% [raw] {"raw_mimetype": "text/restructuredtext"} # The filename for the YAML file is the first argument when creating a :class:`Scope` # object, which will load and process the content of the file. # %% road_scope = emat.Scope(road_test_scope_file) road_scope # %% [markdown] # A short summary of the scope can be reviewed using the `info` method. # %% road_scope.info() # %% [markdown] # Alternatively, more detailed information about each part of the scope can be # accessed in four list attributes: # %% road_scope.get_constants()
def test_read_db_gz(self): road_test_scope_file = emat.package_file('model', 'tests', 'road_test.yaml') with pytest.raises(FileNotFoundError): emat.Scope(emat.package_file('nope.yaml')) s = emat.Scope(road_test_scope_file) with pytest.raises(FileNotFoundError): emat.SQLiteDB(emat.package_file('nope.db.gz')) if not os.path.exists(emat.package_file("examples", "roadtest.db.gz")): db_w = emat.SQLiteDB(emat.package_file("examples", "roadtest.db.tmp"), initialize=True) s.store_scope(db_w) s.design_experiments(n_samples=110, random_seed=1234, db=db_w, design_name='lhs') from emat.model.core_python import Road_Capacity_Investment m_w = emat.PythonCoreModel(Road_Capacity_Investment, scope=s, db=db_w) m_w.run_experiments(design_name='lhs', db=db_w) db_w.conn.close() import gzip import shutil with open(emat.package_file("examples", "roadtest.db.tmp"), 'rb') as f_in: with gzip.open(emat.package_file("examples", "roadtest.db.gz"), 'wb') as f_out: shutil.copyfileobj(f_in, f_out) db = emat.SQLiteDB(emat.package_file("examples", "roadtest.db.gz")) assert repr(db) == '<emat.SQLiteDB with scope "EMAT Road Test">' assert db.get_db_info()[:9] == 'SQLite @ ' assert db.get_db_info()[-11:] == 'roadtest.db' assert db.read_scope_names() == ['EMAT Road Test'] s1 = db.read_scope('EMAT Road Test') assert type(s1) == type(s) for k in ('_x_list', '_l_list', '_c_list', '_m_list', 'name', 'desc'): assert getattr(s, k) == getattr(s1, k), k assert s == s1 experiments = db.read_experiment_all('EMAT Road Test', 'lhs') assert experiments.shape == (110, 20) assert list(experiments.columns) == [ 'free_flow_time', 'initial_capacity', 'alpha', 'beta', 'input_flow', 'value_of_time', 'unit_cost_expansion', 'interest_rate', 'yield_curve', 'expand_capacity', 'amortization_period', 'debt_type', 'interest_rate_lock', 'no_build_travel_time', 'build_travel_time', 'time_savings', 'value_of_time_savings', 'net_benefits', 'cost_of_capacity_expansion', 'present_cost_expansion', ] from emat.model.core_python import Road_Capacity_Investment m = emat.PythonCoreModel(Road_Capacity_Investment, scope=s, db=db) assert m.metamodel_id == None
def test_road_test(self): import os test_dir = os.path.dirname(__file__) os.chdir(test_dir) road_test_scope_file = emat.package_file('model', 'tests', 'road_test.yaml') road_scope = emat.Scope(road_test_scope_file) # <emat.Scope with 2 constants, 7 uncertainties, 4 levers, 7 measures> assert len(road_scope.get_measures()) == 7 assert len(road_scope.get_levers()) == 4 assert len(road_scope.get_uncertainties()) == 7 assert len(road_scope.get_constants()) == 2 emat_db = emat.SQLiteDB() road_scope.store_scope(emat_db) with pytest.raises(KeyError): road_scope.store_scope(emat_db) assert emat_db.read_scope_names() == ['EMAT Road Test'] design = design_experiments(road_scope, db=emat_db, n_samples_per_factor=10, sampler='lhs') design.head() large_design = design_experiments(road_scope, db=emat_db, n_samples=5000, sampler='lhs', design_name='lhs_large') large_design.head() assert list(large_design.columns) == [ 'alpha', 'amortization_period', 'beta', 'debt_type', 'expand_capacity', 'input_flow', 'interest_rate', 'interest_rate_lock', 'unit_cost_expansion', 'value_of_time', 'yield_curve', 'free_flow_time', 'initial_capacity', ] assert list(large_design.head().index) == [111, 112, 113, 114, 115] assert emat_db.read_design_names('EMAT Road Test') == ['lhs', 'lhs_large'] m = PythonCoreModel(Road_Capacity_Investment, scope=road_scope, db=emat_db) with SequentialEvaluator(m) as eval_seq: lhs_results = m.run_experiments(design_name='lhs', evaluator=eval_seq) lhs_results.head() assert lhs_results.head()['present_cost_expansion'].values == approx( [2154.41598475, 12369.38053473, 4468.50683924, 6526.32517089, 2460.91070514]) assert lhs_results.head()['net_benefits'].values == approx( [ -22.29090499, -16.84301382, -113.98841188, 11.53956058, 78.03661612]) assert lhs_results.tail()['present_cost_expansion'].values == approx( [2720.51645703, 4000.91232689, 6887.83193063, 3739.47839941, 1582.52899124]) assert lhs_results.tail()['net_benefits'].values == approx( [841.46278175, -146.71279267, -112.5681036, 25.48055303, 127.31154155]) with SequentialEvaluator(m) as eval_seq: lhs_large_results = m.run_experiments(design_name='lhs_large', evaluator=eval_seq) lhs_large_results.head() assert lhs_large_results.head()['net_benefits'].values == approx( [-522.45283083, -355.1599307 , -178.6623215 , 23.46263498, -301.17700968]) lhs_outcomes = m.read_experiment_measures(design_name='lhs') assert lhs_outcomes.head()['time_savings'].values == approx( [13.4519273, 26.34172999, 12.48385198, 15.10165981, 15.48056139]) scores = m.get_feature_scores('lhs', random_state=123) stable_df("./road_test_feature_scores.pkl.gz", scores.data) from emat.workbench.analysis import prim x = m.read_experiment_parameters(design_name='lhs_large') prim_alg = prim.Prim( m.read_experiment_parameters(design_name='lhs_large'), m.read_experiment_measures(design_name='lhs_large')['net_benefits'] > 0, threshold=0.4, ) box1 = prim_alg.find_box() stable_df("./road_test_box1_peeling_trajectory.pkl.gz", box1.peeling_trajectory) from emat.util.xmle import Show from emat.util.xmle.elem import Elem assert isinstance(Show(box1.show_tradeoff()), Elem) from emat.workbench.analysis import cart cart_alg = cart.CART( m.read_experiment_parameters(design_name='lhs_large'), m.read_experiment_measures(design_name='lhs_large')['net_benefits'] > 0, ) cart_alg.build_tree() stable_df("./road_test_cart_box0.pkl.gz", cart_alg.boxes[0]) cart_dict = dict(cart_alg.boxes[0].iloc[0]) assert cart_dict['debt_type'] == {'GO Bond', 'Paygo', 'Rev Bond'} #assert cart_dict['interest_rate_lock'] == {False, True} assert isinstance(Show(cart_alg.show_tree(format='svg')), Elem) from emat import Measure MAXIMIZE = Measure.MAXIMIZE MINIMIZE = Measure.MINIMIZE robustness_functions = [ Measure( 'Expected Net Benefit', kind=Measure.INFO, variable_name='net_benefits', function=numpy.mean, # min=-150, # max=50, ), Measure( 'Probability of Net Loss', kind=MINIMIZE, variable_name='net_benefits', function=lambda x: numpy.mean(x < 0), min=0, max=1, ), Measure( '95%ile Travel Time', kind=MINIMIZE, variable_name='build_travel_time', function=functools.partial(numpy.percentile, q=95), min=60, max=150, ), Measure( '99%ile Present Cost', kind=Measure.INFO, variable_name='present_cost_expansion', function=functools.partial(numpy.percentile, q=99), # min=0, # max=10, ), Measure( 'Expected Present Cost', kind=Measure.INFO, variable_name='present_cost_expansion', function=numpy.mean, # min=0, # max=10, ), ] from emat import Constraint constraint_1 = Constraint( "Maximum Log Expected Present Cost", outcome_names="Expected Present Cost", function=Constraint.must_be_less_than(4000), ) constraint_2 = Constraint( "Minimum Capacity Expansion", parameter_names="expand_capacity", function=Constraint.must_be_greater_than(10), ) constraint_3 = Constraint( "Maximum Paygo", parameter_names='debt_type', outcome_names='99%ile Present Cost', function=lambda i, j: max(0, j - 1500) if i == 'Paygo' else 0, ) from emat.optimization import HyperVolume, EpsilonProgress, SolutionViewer, ConvergenceMetrics convergence_metrics = ConvergenceMetrics( HyperVolume.from_outcomes(robustness_functions), EpsilonProgress(), SolutionViewer.from_model_and_outcomes(m, robustness_functions), ) with SequentialEvaluator(m) as eval_seq: robust = m.robust_optimize( robustness_functions, scenarios=20, nfe=5, constraints=[ constraint_1, constraint_2, constraint_3, ], epsilons=[0.05, ] * len(robustness_functions), convergence=convergence_metrics, evaluator=eval_seq, ) robust_results, convergence = robust.result, robust.convergence assert isinstance(robust_results, pandas.DataFrame) mm = m.create_metamodel_from_design('lhs') design2 = design_experiments(road_scope, db=emat_db, n_samples_per_factor=10, sampler='lhs', random_seed=2) design2_results = mm.run_experiments(design2)
def test_road_test(self): road_test_scope_file = emat.package_file('model', 'tests', 'road_test.yaml') road_scope = emat.Scope(road_test_scope_file) # <emat.Scope with 2 constants, 7 uncertainties, 4 levers, 7 measures> assert len(road_scope.get_measures()) == 7 assert len(road_scope.get_levers()) == 4 assert len(road_scope.get_uncertainties()) == 7 assert len(road_scope.get_constants()) == 2 emat_db = emat.SQLiteDB() road_scope.store_scope(emat_db) with pytest.raises(KeyError): road_scope.store_scope(emat_db) assert emat_db.read_scope_names() == ['EMAT Road Test'] design = design_experiments(road_scope, db=emat_db, n_samples_per_factor=10, sampler='lhs') design.head() large_design = design_experiments(road_scope, db=emat_db, n_samples=5000, sampler='lhs', design_name='lhs_large') large_design.head() assert list(large_design.columns) == [ 'alpha', 'amortization_period', 'beta', 'debt_type', 'expand_capacity', 'input_flow', 'interest_rate', 'interest_rate_lock', 'unit_cost_expansion', 'value_of_time', 'yield_curve', 'free_flow_time', 'initial_capacity', ] assert list(large_design.head().index) == [111, 112, 113, 114, 115] assert emat_db.read_design_names('EMAT Road Test') == ['lhs', 'lhs_large'] m = PythonCoreModel(Road_Capacity_Investment, scope=road_scope, db=emat_db) with SequentialEvaluator(m) as eval_seq: lhs_results = m.run_experiments(design_name='lhs', evaluator=eval_seq) lhs_results.head() assert lhs_results.head()['present_cost_expansion'].values == approx( [2154.41598475, 12369.38053473, 4468.50683924, 6526.32517089, 2460.91070514]) assert lhs_results.head()['net_benefits'].values == approx( [-79.51551505, -205.32148044, -151.94431822, -167.62487134, -3.97293985]) with SequentialEvaluator(m) as eval_seq: lhs_large_results = m.run_experiments(design_name='lhs_large', evaluator=eval_seq) lhs_large_results.head() assert lhs_large_results.head()['net_benefits'].values == approx( [-584.36098322, -541.5458395, -185.16661464, -135.85689709, -357.36106457]) lhs_outcomes = m.read_experiment_measures(design_name='lhs') assert lhs_outcomes.head()['time_savings'].values == approx( [13.4519273, 26.34172999, 12.48385198, 15.10165981, 15.48056139]) correct_scores = numpy.array( [[0.06603461, 0.04858595, 0.06458574, 0.03298163, 0.05018515, 0., 0., 0.53156587, 0.05060416, 0.02558088, 0.04676956, 0.04131266, 0.04179378], [0.06003223, 0.04836434, 0.06059554, 0.03593644, 0.27734396, 0., 0., 0.28235419, 0.05303979, 0.03985181, 0.04303371, 0.05004349, 0.04940448], [0.08760605, 0.04630414, 0.0795043, 0.03892201, 0.10182534, 0., 0., 0.42508457, 0.04634321, 0.03216387, 0.0497183, 0.04953772, 0.0429905], [0.08365598, 0.04118732, 0.06716887, 0.03789444, 0.06509519, 0., 0., 0.31494171, 0.06517462, 0.02895742, 0.04731707, 0.17515158, 0.07345581], [0.06789382, 0.07852257, 0.05066944, 0.04807088, 0.32054735, 0., 0., 0.15953055, 0.05320201, 0.02890069, 0.07033928, 0.06372418, 0.05859923], [0.05105435, 0.09460353, 0.04614178, 0.04296901, 0.45179611, 0., 0., 0.04909801, 0.05478798, 0.023099, 0.08160785, 0.05642169, 0.04842069], [0.04685703, 0.03490931, 0.03214081, 0.03191602, 0.56130318, 0., 0., 0.04011044, 0.04812986, 0.02228924, 0.09753361, 0.04273004, 0.04208045], ]) scores = m.get_feature_scores('lhs', random_state=123) for _i in range(scores.metadata.values.shape[0]): for _j in range(scores.metadata.values.shape[1]): assert scores.metadata.values[_i,_j] == approx(correct_scores[_i,_j], rel=.1) from ema_workbench.analysis import prim x = m.read_experiment_parameters(design_name='lhs_large') prim_alg = prim.Prim( m.read_experiment_parameters(design_name='lhs_large'), m.read_experiment_measures(design_name='lhs_large')['net_benefits'] > 0, threshold=0.4, ) box1 = prim_alg.find_box() assert dict(box1.peeling_trajectory.iloc[45]) == approx({ 'coverage': 0.8014705882352942, 'density': 0.582109479305741, 'id': 45, 'mass': 0.1498, 'mean': 0.582109479305741, 'res_dim': 4, }) from emat.util.xmle import Show from emat.util.xmle.elem import Elem assert isinstance(Show(box1.show_tradeoff()), Elem) from ema_workbench.analysis import cart cart_alg = cart.CART( m.read_experiment_parameters(design_name='lhs_large'), m.read_experiment_measures(design_name='lhs_large')['net_benefits'] > 0, ) cart_alg.build_tree() cart_dict = dict(cart_alg.boxes[0].iloc[0]) assert cart_dict['debt_type'] == {'GO Bond', 'Paygo', 'Rev Bond'} assert cart_dict['interest_rate_lock'] == {False, True} del cart_dict['debt_type'] del cart_dict['interest_rate_lock'] assert cart_dict == approx({ 'free_flow_time': 60, 'initial_capacity': 100, 'alpha': 0.10001988547129116, 'beta': 3.500215589924521, 'input_flow': 80.0, 'value_of_time': 0.00100690634109406, 'unit_cost_expansion': 95.00570832093116, 'interest_rate': 0.0250022738169142, 'yield_curve': -0.0024960505548531774, 'expand_capacity': 0.0006718732232418368, 'amortization_period': 15, }) assert isinstance(Show(cart_alg.show_tree(format='svg')), Elem) from emat import Measure MAXIMIZE = Measure.MAXIMIZE MINIMIZE = Measure.MINIMIZE robustness_functions = [ Measure( 'Expected Net Benefit', kind=Measure.INFO, variable_name='net_benefits', function=numpy.mean, # min=-150, # max=50, ), Measure( 'Probability of Net Loss', kind=MINIMIZE, variable_name='net_benefits', function=lambda x: numpy.mean(x < 0), min=0, max=1, ), Measure( '95%ile Travel Time', kind=MINIMIZE, variable_name='build_travel_time', function=functools.partial(numpy.percentile, q=95), min=60, max=150, ), Measure( '99%ile Present Cost', kind=Measure.INFO, variable_name='present_cost_expansion', function=functools.partial(numpy.percentile, q=99), # min=0, # max=10, ), Measure( 'Expected Present Cost', kind=Measure.INFO, variable_name='present_cost_expansion', function=numpy.mean, # min=0, # max=10, ), ] from emat import Constraint constraint_1 = Constraint( "Maximum Log Expected Present Cost", outcome_names="Expected Present Cost", function=Constraint.must_be_less_than(4000), ) constraint_2 = Constraint( "Minimum Capacity Expansion", parameter_names="expand_capacity", function=Constraint.must_be_greater_than(10), ) constraint_3 = Constraint( "Maximum Paygo", parameter_names='debt_type', outcome_names='99%ile Present Cost', function=lambda i, j: max(0, j - 1500) if i == 'Paygo' else 0, ) from emat.optimization import HyperVolume, EpsilonProgress, SolutionViewer, ConvergenceMetrics convergence_metrics = ConvergenceMetrics( HyperVolume.from_outcomes(robustness_functions), EpsilonProgress(), SolutionViewer.from_model_and_outcomes(m, robustness_functions), ) with SequentialEvaluator(m) as eval_seq: robust_results, convergence = m.robust_optimize( robustness_functions, scenarios=20, nfe=5, constraints=[ constraint_1, constraint_2, constraint_3, ], epsilons=[0.05, ] * len(robustness_functions), convergence=convergence_metrics, evaluator=eval_seq, ) assert isinstance(robust_results, pandas.DataFrame) mm = m.create_metamodel_from_design('lhs') design2 = design_experiments(road_scope, db=emat_db, n_samples_per_factor=10, sampler='lhs', random_seed=2) design2_results = mm.run_experiments(design2)
def test_database_walkthrough(data_regression, dataframe_regression): # import os # import numpy as np # import pandas as pd # import seaborn; # seaborn.set_theme() # import plotly.io; # plotly.io.templates.default = "seaborn" # import emat # import yaml # from emat.util.show_dir import show_dir # from emat.analysis import display_experiments # emat.versions() # For this walkthrough of database features, we'll work in a temporary directory. # (In real projects you'll likely want to save your data somewhere less ephemeral, # so don't just copy this tempfile code into your work.) tempdir = tempfile.TemporaryDirectory() os.chdir(tempdir.name) # We begin our example by populating a database with some experimental data, by creating and # running a single design of experiments for the Road Test model. import emat.examples scope, db, model = emat.examples.road_test() design = model.design_experiments() model.run_experiments(design) # ## Single-Design Datasets # ### Writing Out Raw Data # # When the database has only a single design of experiments, or if we # don't care about any differentiation between multiple designs that we # may have created and ran, we can dump the entire set of model runs, # including uncertainties, policy levers, and performance measures, all # consolidated into a single pandas DataFrame using the # `read_experiment_all` function. The constants even appear in this DataFrame # too, for good measure. df = db.read_experiment_all(scope.name) dataframe_regression.check(pd.DataFrame(df), basename='test_database__df') # Exporting this data is simply a matter of using the usual pandas # methods to save the dataframe to a format of your choosing. We'll # save our data into a gzipped CSV file, which is somewhat compressed # (we're not monsters here) but still widely compatible for a variety of uses. df.to_csv("road_test_1.csv.gz") # This table contains most of the information we want to export from # our database, but not everything. We also probably want to have access # to all of the information in the exploratory scope as well. Our example # generator gives us a `Scope` reference directly, but if we didn't have that # we can still extract it from the database, using the `read_scope` method. s = db.read_scope() s.dump(filename="road_test_scope.yaml") # ### Reading In Raw Data # # Now, we're ready to begin anew, constructing a fresh database from scratch, # using only the raw formatted files. # # First, let's load our scope from the yaml file, and initialize a clean database # using that scope. s2 = emat.Scope("road_test_scope.yaml") db2 = emat.SQLiteDB("road_test_2.sqldb") db2.store_scope(s2) # Just as we used pandas to save out our consolidated DataFrame of experimental results, # we can use it to read in a consolidated table of experiments. df2 = pd.read_csv("road_test_1.csv.gz", index_col='experiment') # dataframe_regression.check(df2, basename='test_database__df2') # Writing experiments to a database is not quite as simple as reading them. There # is a parallel `write_experiment_all` method for the `Database` class, but to use # it we need to provide not only the DataFrame of actual results, but also a name for # the design of experiments we are writing (all experiments exist within designs) and # the source of the performance measure results (zero means actual results from a # core model run, and non-zero values are ID numbers for metamodels). This allows many # different possible sets of performance measures to be stored for the same set # of input parameters. db2.write_experiment_all( scope_name=s2.name, design_name='general', source=0, xlm_df=df2, ) df2b = db.read_experiment_all(scope.name) dataframe_regression.check(pd.DataFrame(df2b), basename='test_database__df2b') # ## Multiple-Design Datasets # # The EMAT database is not limited to storing a single design of experiments. Multiple designs # can be stored for the same scope. We'll add a set of univariate sensitivity test to our # database, and a "ref" design that contains a single experiment with all inputs set to their # default values. design_uni = model.design_experiments(sampler='uni') model.run_experiments(design_uni) model.run_reference_experiment() # We now have three designs stored in our database. We can confirm this # by reading out the set of design names. assert sorted(db.read_design_names(s.name)) == sorted( ['lhs', 'ref', 'uni']) # Note that there # can be some experiments that are in more than one design. This is # not merely duplicating the experiment and results, but actually # assigning the same experiment to both designs. We can see this # for the 'uni' and 'ref' designs -- both contain the all-default # parameters experiment, and when we read these designs out of the # database, the same experiment number is reported out in both # designs. uni = db.read_experiment_all(scope.name, design_name='uni') ref = db.read_experiment_all(scope.name, design_name='ref') dataframe_regression.check(pd.DataFrame(uni), basename='test_database__uni') dataframe_regression.check(pd.DataFrame(ref), basename='test_database__ref') # ### Writing Out Raw Data # # We can read a single dataframe containing all the experiments associated with # this scope by omitting the `design_name` argument, just as if there was only # one design. df = db.read_experiment_all(scope.name) df.to_csv("road_test_2.csv.gz") # If we want to be able to reconstruct the various designs of experiments later, # we'll also need to write out instructions for that. The `read_all_experiment_ids` # method can give us a dictionary of all the relevant information. design_experiments = db.read_all_experiment_ids(scope.name, design_name='*', grouped=True) data_regression.check(design_experiments) # We can write this dictionary to a file in 'yaml' format. with open("road_test_design_experiments.yaml", 'wt') as f: yaml.dump(design_experiments, f) ### Reading In Raw Data # To construct a new emat Database with multiple designs of experients,... db3 = emat.SQLiteDB("road_test_3.sqldb") db3.store_scope(s2) df3 = pd.read_csv("road_test_2.csv.gz", index_col='experiment') with open("road_test_design_experiments.yaml", 'rt') as f: design_experiments2 = yaml.safe_load(f) data_regression.check(design_experiments2) db3.write_experiment_all( scope_name=s2.name, design_name=design_experiments2, source=0, xlm_df=df3, ) assert sorted(db3.read_design_names(s.name)) == sorted( ['lhs', 'ref', 'uni']) dx = db3.read_all_experiment_ids(scope.name, design_name='*', grouped=True) assert dx == {'lhs': '1-110', 'ref': '111', 'uni': '111-132'} uni3 = db3.read_experiment_all(scope.name, design_name='uni') dataframe_regression.check(pd.DataFrame(uni3), basename='test_database__uni') ## Re-running Experiments # This section provides a short walkthrough of how to handle mistakes # in an EMAT database. By "mistakes" we are referring to incorrect # values that have been written into the database by accident, generally # arising from core model runs that were misconfigured or suffered # non-fatal errors that caused the results to be invalid. # # One approach to handling such problems is to simply start over with a # brand new clean database file. However, this may be inconvenient if # the database already includes a number of valid results, especially if # those valid results were expensive to generate. It may also be desirable # to keep prior invalid results on hand, so as to easily recognized when # errors recur. # # We begin this example by populating our database with some more experimental data, by creating and # running a single design of experiments for the Road Test model, except these experiments will be # created with a misconfigured model (lane_width = 11, it should be 10), so the results will be bad. model.lane_width = 10.3 oops = model.design_experiments(design_name='oops', random_seed=12345) model.run_experiments(oops) # We can review a dataframe of results as before, using the `read_experiment_all` # method. This time we will add `with_run_ids=True`, which will add an extra # column to the index, showing a universally unique id attached to each row # of results. oops_result1 = db.read_experiment_all(scope.name, 'oops', with_run_ids=True) dataframe_regression.check( pd.DataFrame(oops_result1).reset_index(drop=True), basename='test_database__oops_result1') # Some of these results are obviously problematic. Increasing capacity cannot possibly # result in a negative travel time savings. (Braess paradox doesn't apply here because # it's just one link, not a network.) So those negative values are clearly wrong. We # can fix the model so they won't be wrong, but by default the `run_experiments` method # won't actually re-run models when the results are already available in the database. # To solve this conundrum, we can mark the incorrect results as invalid, using a query # to pull out the rows that can be flagged as wrong. db.invalidate_experiment_runs(queries=['time_savings < 0']) # The `[73]` returned here indicates that 73 sets of results were invalidated by this command. # Now we can fix our model, and then use the `run_experiments` method to get new model runs for # the invalidated results. model.lane_width = 10 oops_result2 = model.run_experiments(oops) dataframe_regression.check( pd.DataFrame(oops_result2).reset_index(drop=True), basename='test_database__oops_result2') # The re-run fixed the negative values, although it left in place the other # experimental runs in the database. By the way we constructed this example, # we know those are wrong too, and it's evident in the apparent discontinuity # in the input flow graph, which we can zoom in on. # ax = oops_result2.plot.scatter(x='input_flow', y='time_savings', color='r') # ax.plot([109, 135], [0, 35], '--', color='y'); # Those original results are bad too, and we want to invalidate them as well. # In addition to giving conditional queries to the `invalidate_experiment_runs` # method, we can also give a dataframe of results that have run ids attached, # and those unique ids will be used to to find and invalidate results in the # database. Here, we pass in the dataframe of all the results, which contains # all 110 runs, but only 37 runs are newly invalidated (77 were invalidated # previously). db.invalidate_experiment_runs(oops_result1) # Now when we run the experiments again, those 37 experiments are re-run. oops_result3 = model.run_experiments(oops) dataframe_regression.check( pd.DataFrame(oops_result3).reset_index(drop=True), basename='test_database__oops_result3') ### Writing Out All Runs # # By default, the `read_experiment_all` method returns the most recent valid set of # performance measures for each experiment, but we can override this behavior to # ask for all run results, or all valid or invalid results. This allows us to easily # write out data files containing all the results stored in the database. oops_all = db.read_experiment_all(scope.name, with_run_ids=True, runs='all') dataframe_regression.check(pd.DataFrame(oops_all).reset_index(drop=True), basename='test_database__oops_all') # If we want to mark the valid and invalid runs, we can read them # seperately and attach a tag to the two dataframes. runs_1 = db.read_experiment_all(scope.name, with_run_ids=True, runs='valid') runs_1['is_valid'] = True runs_0 = db.read_experiment_all(scope.name, with_run_ids=True, runs='invalid') runs_0['is_valid'] = False all_runs = pd.concat([runs_1, runs_0]) dataframe_regression.check(pd.DataFrame(all_runs).reset_index(drop=True), basename='test_database__all_runs')
def test_database_merging(): import emat road_test_scope_file = emat.package_file("model", "tests", "road_test.yaml") road_scope = emat.Scope(road_test_scope_file) emat_db = emat.SQLiteDB() road_scope.store_scope(emat_db) assert emat_db.read_scope_names() == ["EMAT Road Test"] from emat.experiment.experimental_design import design_experiments design = design_experiments(road_scope, db=emat_db, n_samples_per_factor=10, sampler="lhs") large_design = design_experiments(road_scope, db=emat_db, n_samples=500, sampler="lhs", design_name="lhs_large") assert emat_db.read_design_names("EMAT Road Test") == ["lhs", "lhs_large"] from emat.model.core_python import PythonCoreModel, Road_Capacity_Investment m = PythonCoreModel(Road_Capacity_Investment, scope=road_scope, db=emat_db) lhs_results = m.run_experiments(design_name="lhs") lhs_large_results = m.run_experiments(design_name="lhs_large") reload_results = m.read_experiments(design_name="lhs") pd.testing.assert_frame_equal( reload_results, lhs_results, check_like=True, ) lhs_params = m.read_experiment_parameters(design_name="lhs") assert len(lhs_params) == 110 assert len(lhs_params.columns) == 13 lhs_outcomes = m.read_experiment_measures(design_name="lhs") assert len(lhs_outcomes) == 110 assert len(lhs_outcomes.columns) == 7 mm = m.create_metamodel_from_design("lhs") assert mm.metamodel_id == 1 assert isinstance(mm.function, emat.MetaModel) design2 = design_experiments(road_scope, db=emat_db, n_samples_per_factor=10, sampler="lhs", random_seed=2) design2_results = mm.run_experiments(design2) assert len(design2_results) == 110 assert len(design2_results.columns) == 20 assert emat_db.read_design_names(None) == ["lhs", "lhs_2", "lhs_large"] check = emat_db.read_experiment_measures(None, "lhs_2") assert len(check) == 110 assert len(check.columns) == 7 assert emat_db.read_experiment_measure_sources(None, "lhs_2") == [1] m.allow_short_circuit = False design2_results0 = m.run_experiments(design2.iloc[:5]) assert len(design2_results0) == 5 assert len(design2_results0.columns) == 20 with pytest.raises(ValueError): # now there are two sources of some measures emat_db.read_experiment_measures(None, "lhs_2") assert set(emat_db.read_experiment_measure_sources(None, "lhs_2")) == {0, 1} check = emat_db.read_experiment_measures(None, "lhs_2", source=0) assert len(check) == 5 check = emat_db.read_experiment_measures(None, "lhs_2", source=1) assert len(check) == 110 import emat.examples s2, db2, m2 = emat.examples.road_test() # write the design for lhs_2 into a different database. # it ends up giving different experient id's to these, which is fine. db2.write_experiment_parameters( None, "lhs_2", emat_db.read_experiment_parameters(None, "lhs_2")) check = db2.read_experiment_parameters( None, "lhs_2", ) assert len(check) == 110 assert len(check.columns) == 13 pd.testing.assert_frame_equal( design2.reset_index(drop=True), check.reset_index(drop=True), check_like=True, ) design2_results2 = m2.run_experiments("lhs_2") check = emat_db.read_experiment_measures(None, "lhs_2", source=0) assert len(check) == 5 assert len(check.columns) == 7 check = emat_db.read_experiment_measures(None, "lhs_2", runs="valid") assert len(check) == 115 emat_db.merge_database(db2) check = emat_db.read_experiment_measures(None, "lhs_2", source=0) assert len(check) == 110 assert len(check.columns) == 7 check = emat_db.read_experiment_measures(None, "lhs_2", runs="valid") assert len(check) == 225
def test_read_db_gz(): road_test_scope_file = emat.package_file("model", "tests", "road_test.yaml") with pytest.raises(FileNotFoundError): emat.Scope(emat.package_file("nope.yaml")) s = emat.Scope(road_test_scope_file) with pytest.raises(FileNotFoundError): emat.SQLiteDB(emat.package_file("nope.db.gz")) if not os.path.exists(emat.package_file("examples", "roadtest.db.gz")): db_w = emat.SQLiteDB(emat.package_file("examples", "roadtest.db.tmp"), initialize=True) s.store_scope(db_w) s.design_experiments(n_samples=110, random_seed=1234, db=db_w, design_name="lhs") from emat.model.core_python import Road_Capacity_Investment m_w = emat.PythonCoreModel(Road_Capacity_Investment, scope=s, db=db_w) m_w.run_experiments(design_name="lhs", db=db_w) db_w.conn.close() import gzip import shutil with open(emat.package_file("examples", "roadtest.db.tmp"), "rb") as f_in: with gzip.open(emat.package_file("examples", "roadtest.db.gz"), "wb") as f_out: shutil.copyfileobj(f_in, f_out) db = emat.SQLiteDB(emat.package_file("examples", "roadtest.db.gz")) assert repr(db) == '<emat.SQLiteDB with scope "EMAT Road Test">' assert db.get_db_info()[:9] == "SQLite @ " assert db.get_db_info()[-11:] == "roadtest.db" assert db.read_scope_names() == ["EMAT Road Test"] s1 = db.read_scope("EMAT Road Test") assert type(s1) == type(s) for k in ("_x_list", "_l_list", "_c_list", "_m_list", "name", "desc"): assert getattr(s, k) == getattr(s1, k), k assert s == s1 experiments = db.read_experiment_all("EMAT Road Test", "lhs") assert experiments.shape == (110, 20) assert list(experiments.columns) == [ "free_flow_time", "initial_capacity", "alpha", "beta", "input_flow", "value_of_time", "unit_cost_expansion", "interest_rate", "yield_curve", "expand_capacity", "amortization_period", "debt_type", "interest_rate_lock", "no_build_travel_time", "build_travel_time", "time_savings", "value_of_time_savings", "net_benefits", "cost_of_capacity_expansion", "present_cost_expansion", ] from emat.model.core_python import Road_Capacity_Investment m = emat.PythonCoreModel(Road_Capacity_Investment, scope=s, db=db) assert m.metamodel_id == None