def evaluate(input_rows): quantities = [ Quantity(symbol_type=ROW_IDX_TO_SYMBOL_NAME[idx], value=ureg.parse_expression(row['Value'])) for idx, row in enumerate(input_rows) if row['Value'] ] material = Material() for quantity in quantities: material.add_quantity(quantity) graph = Graph() output_material = graph.evaluate(material) output_quantities = output_material.get_aggregated_quantities() print(output_quantities) output_rows = [{ 'Property': symbol.display_names[0], 'Value': str(quantity.value), 'Provenance': None } for symbol, quantity in output_quantities.items()] return output_rows
def test_symbol_ancestry(self): """ Tests the Symbol Ancestry algorithm on a non-cyclic graph. The canonical graph and the canonical material are used for this test. """ symbols = GraphTest.generate_canonical_symbols() models = GraphTest.generate_canonical_models() material = GraphTest.generate_canonical_material(symbols) del models['model6'] g = Graph(symbol_types=symbols, models=models, composite_models=dict()) out1 = g.required_inputs_for_property(symbols['F']) self.assertTrue(out1.head.m is None and out1.head.parent is None and out1.head.inputs == {symbols['F']} and len(out1.head.children) == 1, "Tree head not properly defined.") self.assertTrue(out1.head.children[0].m == models['model3'] and out1.head.children[0].inputs == {symbols['B']} and out1.head.children[0].parent is out1.head and len(out1.head.children[0].children) == 1, "Tree branch improperly formed.") self.assertTrue(out1.head.children[0].children[0].m == models['model1'] and out1.head.children[0].children[0].inputs == {symbols['A']} and out1.head.children[0].children[0].parent is out1.head.children[0] and len(out1.head.children[0].children) == 1 and len(out1.head.children[0].children[0].children) == 0, "Tree branch improperly formed.") out2 = g.required_inputs_for_property(symbols['D']) self.assertTrue(out2.head.m is None and out2.head.parent is None and out2.head.inputs == {symbols['D']} and len(out2.head.children) == 2, "Tree head not properly defined.") m_map = {x.m: x for x in out2.head.children} self.assertTrue(m_map[models['model4']].inputs == {symbols['B'], symbols['C']} and m_map[models['model4']].parent is out2.head, "Tree branch improperly formed.") self.assertTrue(m_map[models['model5']].inputs == {symbols['C'], symbols['G']} and m_map[models['model5']].parent is out2.head and len(m_map[models['model5']].children) == 2, "Tree branch improperly formed.") m_map_1 = {x.m: x for x in m_map[models['model4']].children} m_map_2 = {x.m: x for x in m_map[models['model5']].children} self.assertTrue(m_map_1[models['model1']].inputs == {symbols['A']} and m_map_1[models['model1']].parent is m_map[models['model4']] and m_map_1[models['model1']].children == [], "Tree branch improperly formed.") self.assertTrue(m_map_2[models['model1']].inputs == {symbols['G'], symbols['A']} and m_map_2[models['model1']].parent is m_map[models['model5']] and len(m_map_2[models['model1']].children) == 1 and m_map_2[models['model1']].children[0].parent is m_map_2[models['model1']] and m_map_2[models['model1']].children[0].children == [] and m_map_2[models['model1']].children[0].inputs == {symbols['A']}, "Tree branch improperly formed.") self.assertTrue(m_map_2[models['model2']].inputs == {symbols['C'], symbols['A']} and m_map_2[models['model2']].parent is m_map[models['model5']] and len(m_map_2[models['model2']].children) == 1 and m_map_2[models['model2']].children[0].parent is m_map_2[models['model2']] and m_map_2[models['model2']].children[0].children == [] and m_map_2[models['model2']].children[0].inputs == {symbols['A']}, "Tree branch improperly formed.")
def test_apply_material_to_graph(self): g = Graph() new_mat = g.evaluate(self.mat) # TODO: # For some reason Travis and this version are not commensurate # 257 != 263, should resolve this self.assertGreater(len(new_mat.get_quantities()), 250)
def test_get_path(self): """ Tests the ability to generate all paths from one symbol to another. """ symbols = GraphTest.generate_canonical_symbols() models = GraphTest.generate_canonical_models() material = GraphTest.generate_canonical_material(symbols) del models['model6'] g = Graph(symbol_types=symbols, models=models, composite_models=dict()) paths_1 = g.get_paths(symbols['A'], symbols['F']) paths_2 = g.get_paths(symbols['A'], symbols['D']) ans_1 = [SymbolPath({symbols['A']}, [models['model1'], models['model3']])] ans_2 = [ SymbolPath({symbols['A'], symbols['C']}, [models['model2'], models['model5']]), SymbolPath({symbols['A'], symbols['G']}, [models['model1'], models['model5']]), SymbolPath({symbols['A']}, [models['model1'], models['model4']]), SymbolPath({symbols['A']}, [models['model1'], models['model2'], models['model5']]), SymbolPath({symbols['A']}, [models['model2'], models['model1'], models['model5']]) ] self.assertTrue(len(paths_1) == len(ans_1), "Incorrect paths generated.") self.assertTrue(len(paths_2) == len(ans_2), "Incorrect paths generated.") for i in paths_1: self.assertTrue(i in ans_1, "Incorrect paths generated.") for i in paths_2: self.assertTrue(i in ans_2, "Incorrect paths generated.")
def test_graph_setup(self): """ Tests the outcome of constructing the canonical graph. """ symbols = GraphTest.generate_canonical_symbols() models = GraphTest.generate_canonical_models() g = Graph(models=models, symbol_types=symbols, composite_models=dict()) st_c = {x for x in symbols.values()} st_g = g.get_symbol_types() m_c = {x.name: x for x in models.values()} m_g = g.get_models() self.assertTrue(st_c == st_g, 'Canonical constructed graph does not have the right Symbol objects.') self.assertTrue(m_c == m_g, 'Canonical constructed graph does not have the right Model objects.') for m in models.values(): for input_set in m.input_sets: for symbol in input_set: self.assertTrue(symbols[symbol] in g._input_to_model.keys(), "Canonical constructed graph does not have an edge from input: " "{} to model: {}".format(symbol, m)) self.assertTrue(m in g._input_to_model[symbol], "Canonical constructed graph does not have an edge from input: " "{} to model: {}".format(symbol, m)) for output_set in m.output_sets: for symbol in output_set: self.assertTrue(symbols[symbol] in g._output_to_model.keys(), "Canonical constructed graph does not have an edge from input: " "{} to model: {}".format(symbol, m)) self.assertTrue(m in g._output_to_model[symbol], "Canonical constructed graph does not have an edge from input: " "{} to model: {}".format(symbol, m))
def evaluate(input_rows, data, aggregate): quantities = [ QuantityFactory.create_quantity( symbol_type=ROW_IDX_TO_SYMBOL_NAME[idx], value=ureg.parse_expression(row['Editable Value'])) for idx, row in enumerate(input_rows) if row['Editable Value'] ] if data and len(data) > 0: quantities += json.loads(data, cls=MontyDecoder).values() if not quantities: raise PreventUpdate material = Material() for quantity in quantities: material.add_quantity(quantity) graph = Graph() output_material = graph.evaluate(material) if aggregate: output_quantities = output_material.get_aggregated_quantities( ).values() else: output_quantities = output_material.get_quantities() output_rows = [{ 'Property': quantity.symbol.display_names[0], 'Value': quantity.pretty_string(sigfigs=3) } for quantity in output_quantities] output_table = dt.DataTable(id='output-table', rows=output_rows, editable=False) # TODO: clean up input_quantity_names = [q.symbol.name for q in quantities] derived_quantity_names = set( [q.symbol.name for q in output_quantities]) - \ set(input_quantity_names) material_graph_data = graph_conversion( graph.get_networkx_graph(), nodes_to_highlight_green=input_quantity_names, nodes_to_highlight_yellow=list(derived_quantity_names)) options = AESTHETICS['global_options'] options['edges']['color'] = '#000000' output_graph = html.Div(GraphComponent(id='material-graph', graph=material_graph_data, options=options), style={ 'width': '100%', 'height': '400px' }) return [output_graph, html.Br(), output_table]
def setUp(self): path = os.path.join(TEST_DIR, "fitting_test_data.csv") test_data = pd.read_csv(path) graph = Graph() materials = [Material([QuantityFactory.create_quantity("band_gap", bg)]) for bg in test_data['band_gap']] self.evaluated = [graph.evaluate(mat) for mat in materials] self.benchmarks = [{"refractive_index": n} for n in test_data['refractive_index']]
def test_graph_conversion(self): graph = Graph() converted = graph_conversion(graph.get_networkx_graph()) serialized = json.dumps(converted) self.assertIsNotNone(serialized) # Ensure that there are both nodes and proper edges self.assertIn('Band gap', [n['label'] for n in converted['nodes']]) self.assertIn({'from': 'band_gap', "to": "Is Metallic"}, converted['edges'])
def test_graph_conversion(self): graph = Graph() converted = graph_conversion(graph.get_networkx_graph()) serialized = json.dumps(converted) self.assertIsNotNone(serialized) # Ensure that there are both nodes and proper edges self.assertIn('Band gap', [n['data']['label'] for n in converted if n['group'] == 'nodes']) self.assertIn({'source': 'band_gap', "target": "Is Metallic"}, [n['data'] for n in converted if n['group'] == 'edges'])
def test_model_add_remove(self): """ Tests the outcome of adding and removing a model from the canonical graph. """ symbols = GraphTest.generate_canonical_symbols() models = GraphTest.generate_canonical_models() g = Graph(models=models, symbol_types=symbols, composite_models=dict()) g.remove_models({models['model6'].name: models['model6']}) self.assertTrue(models['model6'] not in g.get_models().values(), "Model was unsuccessfully removed from the graph.") for s in g._input_to_model.values(): self.assertTrue(models['model6'] not in s, "Model was unsuccessfully removed from the graph.") for s in g._output_to_model.values(): self.assertTrue(models['model6'] not in s, "Model was unsuccessfully removed from the graph.") m6 = models['model6'] del models['model6'] for m in models.values(): self.assertTrue(m in g.get_models().values(), "Too many models were removed.") g.update_models({'Model6': m6}) self.assertTrue(m6 in g.get_models().values(), "Model was unsuccessfully added to the graph.") self.assertTrue(m6 in g._input_to_model[symbols['D']], "Model was unsuccessfully added to the graph.") self.assertTrue(m6 in g._input_to_model[symbols['F']], "Model was unsuccessfully added to the graph.") self.assertTrue(m6 in g._output_to_model[symbols['A']], "Model was unsuccessfully added to the graph.")
def process_item(self, item): # Define quantities corresponding to materials doc fields # Attach quantities to materials item = MontyDecoder().process_decoded(item) logger.info("Populating material for %s", item['task_id']) material = Material() for mkey, property_name in self.materials_symbol_map.items(): value = get(item, mkey) if value: material.add_quantity(Quantity(property_name, value)) # Add custom things, e. g. computed entry computed_entry = get_entry(item) material.add_quantity(Quantity("computed_entry", computed_entry)) material.add_quantity( Quantity("external_identifier_mp", item['task_id'])) input_quantities = material.get_quantities() # Use graph to generate expanded quantity pool logger.info("Evaluating graph for %s", item['task_id']) graph = Graph() graph.remove_models({ "dimensionality_cheon": DEFAULT_MODEL_DICT['dimensionality_cheon'], "dimensionality_gorai": DEFAULT_MODEL_DICT['dimensionality_gorai'] }) new_material = graph.evaluate(material) # Format document and return logger.info("Creating doc for %s", item['task_id']) doc = {"inputs": [quantity.as_dict() for quantity in input_quantities]} for symbol, quantity in new_material.get_aggregated_quantities().items( ): all_qs = new_material._symbol_to_quantity[symbol] # Only add new quantities if len(all_qs) == 1 and list(all_qs)[0] in input_quantities: continue qs = [quantity.as_dict() for quantity in all_qs] sub_doc = { "quantities": qs, "mean": unumpy.nominal_values(quantity.value).tolist(), "std_dev": unumpy.std_devs(quantity.value).tolist(), "units": qs[0]['units'], "title": quantity._symbol_type.display_names[0] } doc[symbol.name] = sub_doc doc.update({ "task_id": item["task_id"], "pretty_formula": item["pretty_formula"] }) return jsanitize(doc, strict=True)
def test_evaluate_cyclic(self): """ Tests the evaluation algorithm on a cyclic graph. The canonical graph and the canonical material are used for this test. """ symbols = GraphTest.generate_canonical_symbols() models = GraphTest.generate_canonical_models() material = GraphTest.generate_canonical_material(symbols) g = Graph(symbol_types=symbols, models=models, composite_models=dict()) material_derived = g.evaluate(material) expected_quantities = [ # Starting Quantity(symbols['A'], 19), Quantity(symbols['A'], 23), # Derives -1 (M1) Quantity(symbols['B'], 38), Quantity(symbols['B'], 46), Quantity(symbols['C'], 57), Quantity(symbols['C'], 69), # Derives -2 (M3, M1) Quantity(symbols['F'], 266), Quantity(symbols['F'], 322), # Derives -2 (M4, M1) Quantity(symbols['D'], 23826), Quantity(symbols['D'], 28842), Quantity(symbols['D'], 34914), # Derives -1 (M2) Quantity(symbols['G'], 95), Quantity(symbols['G'], 115), # Derives -2 (M5, M1, M2) Quantity(symbols['D'], 70395), Quantity(symbols['D'], 85215), Quantity(symbols['D'], 103155), ] self.assertTrue( material == GraphTest.generate_canonical_material(symbols), "evaluate() mutated the original material argument.") derived_quantities = material_derived.get_quantities() self.assertTrue( len(expected_quantities) == len(derived_quantities), "Evaluate did not correctly derive outputs.") for q in expected_quantities: self.assertTrue( q in material_derived._symbol_to_quantity[q.symbol], "Evaluate failed to derive all outputs.") self.assertTrue(q in derived_quantities)
def setUpClass(cls): add_builtin_models_to_registry() # This is the visible light dataset used in the propnet paper path = os.path.join(TEST_DATA_DIR, "vis_bg_ri_data.csv") test_data = pd.read_csv(path) graph = Graph() materials = [ Material([QuantityFactory.create_quantity("band_gap", bg)]) for bg in test_data['Band Gap'] ] cls.evaluated = [graph.evaluate(mat) for mat in materials] cls.benchmarks = [{ "refractive_index": n } for n in test_data['Refractive Index']]
def test_evaluate_single_material_degenerate_property(self): """ Graph has one material on it: mat1 mat1 has trivial degenerate properties relative permittivity and relative permeability 2 experimental relative permittivity measurements 2 experimental relative permeability measurements Determines if TestGraph1 is correctly evaluated using the evaluate method. We expect 4 refractive_index properties to be calculated as the following: sqrt(3), sqrt(5), sqrt(6), sqrt(10) """ # Setup propnet = Graph() mat1 = Material() mat1.add_quantity(Quantity(DEFAULT_SYMBOLS['relative_permeability'], 1)) mat1.add_quantity(Quantity(DEFAULT_SYMBOLS['relative_permeability'], 2)) mat1.add_quantity(Quantity(DEFAULT_SYMBOLS['relative_permittivity'], 3)) mat1.add_quantity(Quantity(DEFAULT_SYMBOLS['relative_permittivity'], 5)) mat1_derived = propnet.evaluate(mat1) # Expected outputs s_outputs = [] s_outputs.append(Quantity('relative_permeability', 1)) s_outputs.append(Quantity('relative_permeability', 2)) s_outputs.append(Quantity('relative_permittivity', 3)) s_outputs.append(Quantity('relative_permittivity', 5)) s_outputs.append(Quantity('refractive_index', 3**0.5)) s_outputs.append(Quantity('refractive_index', 5**0.5)) s_outputs.append(Quantity('refractive_index', 6**0.5)) s_outputs.append(Quantity('refractive_index', 10**0.5)) st_outputs = [] st_outputs.append(DEFAULT_SYMBOLS['relative_permeability']) st_outputs.append(DEFAULT_SYMBOLS['relative_permittivity']) st_outputs.append(DEFAULT_SYMBOLS['refractive_index']) # Test for q_expected in s_outputs: q = None for q_derived in mat1_derived._symbol_to_quantity[ q_expected.symbol]: if q_derived == q_expected: q = q_derived self.assertTrue(q is not None, "Quantity missing from evaluate.")
def process_item(self, item): """ Run correlation calculation on a pair of properties using the specified function. Args: item: (dict) input provided by get_items() (see get_items() for structure) Returns: (tuple<str, str, float, str, int>) output of calculation with necessary information about calculation included. Format in tuple: independent property (x-axis) name, dependent property (y-axis) name, correlation value, correlation function name, number of data points used for correlation length of shortest path between properties on propnet graph where x-axis property is starting property and y-axis property is ending property. Note: if no (forward) connection exists, the path length will be None. This does not preclude y->x having a forward path. """ prop_x, prop_y = item['x_name'], item['y_name'] data_x, data_y = item['x_data'], item['y_data'] func_name, func = item['func'] n_points = len(data_x) g = Graph() try: path_length_xy = g.get_degree_of_separation(prop_x, prop_y) path_length_yx = g.get_degree_of_separation(prop_y, prop_x) except ValueError: # This shouldn't happen...but just in case path_length_xy = None path_length_yx = None try: path_length = min(path_length_xy, path_length_yx) except TypeError: path_length = path_length_xy or path_length_yx if n_points < 2: result = 0.0 else: try: result = func(data_x, data_y) except Exception as ex: # If correlation fails, catch the error, save it, and move on result = ex return prop_x, prop_y, result, func_name, n_points, path_length
def test_evaluate_constraints(self): """ Tests the evaluation algorithm on a non-cyclic graph involving constraints. The canonical graph and the canonical material are used for this test. """ model4 = EquationModel(name="model4", equations=["D=B*C*11"], constraints=["G==0"]) symbols = GraphTest.generate_canonical_symbols() models = GraphTest.generate_canonical_models() models['model4'] = model4 del models['model6'] material = GraphTest.generate_canonical_material(symbols) g = Graph(symbol_types=symbols, models=models, composite_models=dict()) material_derived = g.evaluate(material) expected_quantities = [ Quantity(symbols['A'], 19), Quantity(symbols['A'], 23), Quantity(symbols['B'], 38), Quantity(symbols['B'], 46), Quantity(symbols['C'], 57), Quantity(symbols['C'], 69), Quantity(symbols['G'], 95), Quantity(symbols['G'], 115), Quantity(symbols['F'], 266), Quantity(symbols['F'], 322), Quantity(symbols['D'], 70395), Quantity(symbols['D'], 85215), Quantity(symbols['D'], 103155) ] self.assertTrue( material == GraphTest.generate_canonical_material(symbols), "evaluate() mutated the original material argument.") derived_quantities = material_derived.get_quantities() self.assertTrue( len(expected_quantities) == len(derived_quantities), "Evaluate did not correctly derive outputs.") for q in expected_quantities: self.assertTrue( q in material_derived._symbol_to_quantity[q.symbol], "Evaluate failed to derive all outputs.") self.assertTrue(q in derived_quantities)
def test_get_provenance_graph(self): g = Graph() qs = [ Quantity("bulk_modulus", 100), Quantity("shear_modulus", 50), Quantity("density", 8.96) ] mat = Material(qs) evaluated = g.evaluate(mat) # TODO: this should be tested more thoroughly out = list(evaluated['vickers_hardness'])[0] with tempfile.ScratchDir('.'): out.draw_provenance_graph("out.png") pgraph = out.get_provenance_graph() end = list(evaluated['vickers_hardness'])[0] shortest_lengths = nx.shortest_path_length(pgraph, qs[0]) self.assertEqual(shortest_lengths[end], 4)
def test_get_path_constraint(self): """ Tests the ability to generate all paths from one symbol to another with constraints. """ model4 = EquationModel("model4", ['D=B*C*11'], constraints=["G==0"]) symbols = GraphTest.generate_canonical_symbols() models = GraphTest.generate_canonical_models(constrain_model_4=True) models['model4'] = model4 del models['model6'] g = Graph(symbol_types=symbols, models=models, composite_models=dict()) paths_1 = g.get_paths(symbols['A'], symbols['F']) paths_2 = g.get_paths(symbols['A'], symbols['D']) ans_1 = [ SymbolPath({symbols['A']}, [models['model1'], models['model3']]) ] ans_2 = [ SymbolPath({symbols['A'], symbols['C']}, [models['model2'], models['model5']]), SymbolPath({symbols['A'], symbols['G']}, [models['model1'], models['model5']]), SymbolPath({symbols['A'], symbols['C'], symbols['B']}, [models['model2'], models['model4']]), SymbolPath({symbols['A'], symbols['G']}, [models['model1'], models['model4']]), SymbolPath({symbols['A']}, [models['model1'], models['model2'], models['model5']]), SymbolPath({symbols['A']}, [models['model2'], models['model1'], models['model5']]), SymbolPath({symbols['A']}, [models['model1'], models['model2'], models['model4']]), SymbolPath({symbols['A']}, [models['model2'], models['model1'], models['model4']]) ] self.assertTrue( len(paths_1) == len(ans_1), "Incorrect paths generated.") self.assertTrue( len(paths_2) == len(ans_2), "Incorrect paths generated.") for i in paths_1: self.assertTrue(i in ans_1, "Incorrect paths generated.") for i in paths_2: self.assertTrue(i in ans_2, "Incorrect paths generated.")
def test_symbol_expansion_cyclic_constraints(self): """ Tests the Symbol Expansion algorithm on a cyclic graph with constraints. The canonical graph and the canonical material are used for this test. """ model4 = EquationModel("model4", ['D=B*C*11'], constraints=["G==0"]) symbols = GraphTest.generate_canonical_symbols() models = GraphTest.generate_canonical_models(constrain_model_4=True) models['model4'] = model4 material = GraphTest.generate_canonical_material(symbols) g = Graph(symbol_types=symbols, models=models, composite_models=dict()) ts = [] ans = [] ts.append(g.calculable_properties({symbols['A']})) ans.append({x for x in symbols.values() if x is not symbols['A']}) ts.append(g.calculable_properties({symbols['B']})) ans.append({symbols['F']}) ts.append(g.calculable_properties({symbols['C']})) ans.append(set()) ts.append(g.calculable_properties({symbols['C'], symbols['G']})) ans.append({symbols['D']}) ts.append(g.calculable_properties({symbols['B'], symbols['C']})) ans.append({symbols['F']}) for i in range(0, len(ts)): self.assertEqual(ts[i], ans[i], "Symbol Expansion failed: test - " + str(i))
def test_symbol_expansion_cyclic(self): """ Tests the Symbol Expansion algorithm on a cyclic graph. The canonical graph and the canonical material are used for this test. """ symbols = GraphTest.generate_canonical_symbols() models = GraphTest.generate_canonical_models() material = GraphTest.generate_canonical_material(symbols) g = Graph(symbol_types=symbols, models=models, composite_models=dict()) ts = [] ans = [] ts.append(g.calculable_properties({symbols['A']})) ans.append({x for x in symbols.values() if x is not symbols['A']}) ts.append(g.calculable_properties({symbols['B']})) ans.append({symbols['F']}) ts.append(g.calculable_properties({symbols['C']})) ans.append(set()) ts.append(g.calculable_properties({symbols['C'], symbols['G']})) ans.append({symbols['D']}) ts.append(g.calculable_properties({symbols['B'], symbols['C']})) ans.append({x for x in symbols.values() if x is not symbols['B'] and x is not symbols['C']}) for i in range(0, len(ts)): self.assertTrue(ts[i] == ans[i], "Symbol Expansion failed: test - " + str(i))
def setUp(self): # Create some test properties and a few base objects self.q1 = QuantityFactory.create_quantity(DEFAULT_SYMBOLS['bulk_modulus'], ureg.Quantity.from_tuple([200, [['gigapascals', 1]]])) self.q2 = QuantityFactory.create_quantity(DEFAULT_SYMBOLS['shear_modulus'], ureg.Quantity.from_tuple([100, [['gigapascals', 1]]])) self.q3 = QuantityFactory.create_quantity(DEFAULT_SYMBOLS['bulk_modulus'], ureg.Quantity.from_tuple([300, [['gigapascals', 1]]])) self.material = Material() self.graph = Graph()
def test_super_evaluate(self): """ Tests the graph's composite material evaluation. """ mpr = MPRester() m1 = mpr.get_material_for_mpid("mp-13") m2 = mpr.get_material_for_mpid("mp-24972") sm = CompositeMaterial([m1, m2]) g = Graph() sm = g.super_evaluate(sm) self.assertTrue( 'pilling_bedworth_ratio' in sm._symbol_to_quantity.keys(), "Super Evaluate failed to derive expected outputs.") self.assertTrue( len(sm._symbol_to_quantity['pilling_bedworth_ratio']) > 0, "Super Evaluate failed to derive expected outputs.")
def test_super_evaluate(self): """ Tests the graph's composite material evaluation. """ mpr = MPRester() m1 = mpr.get_material_for_mpid("mp-13") # Temporary hack for problem with zero band-gap materials m1.remove_symbol("band_gap_pbe") m1.add_quantity(Quantity("band_gap", 0.0)) m2 = mpr.get_material_for_mpid("mp-24972") sm = CompositeMaterial([m1, m2]) g = Graph() sm = g.super_evaluate(sm, allow_model_failure=False) self.assertTrue('pilling_bedworth_ratio' in sm._symbol_to_quantity.keys(), "Super Evaluate failed to derive expected outputs.") self.assertTrue(len(sm._symbol_to_quantity['pilling_bedworth_ratio']) > 0, "Super Evaluate failed to derive expected outputs.")
def test_symbol_add_remove(self): """ Tests the outcome of adding and removing a Symbol from the canonical graph. """ symbols = GraphTest.generate_canonical_symbols() models = GraphTest.generate_canonical_models() g = Graph(models=models, symbol_types=symbols, composite_models=dict()) g.remove_symbol_types({'F': symbols['F']}) self.assertTrue(symbols['F'] not in g.get_symbol_types(), "Symbol was not properly removed.") self.assertTrue(symbols['F'] not in g._input_to_model.keys(), "Symbol was not properly removed.") self.assertTrue(symbols['F'] not in g._output_to_model.keys(), "Symbol was not properly removed.") self.assertTrue(models['model3'] not in g.get_models().values(), "Removing symbol did not remove a model using that symbol.") self.assertTrue(models['model6'] not in g.get_models().values(), "Removing symbol did not remove a model using that symbol.") g.update_symbol_types({'F': symbols['F']}) self.assertTrue(symbols['F'] in g.get_symbol_types(), "Symbol was not properly added.")
def setUpClass(cls): add_builtin_symbols_to_registry() # Create some test properties and a few base objects cls.q1 = QuantityFactory.create_quantity( Registry("symbols")['bulk_modulus'], ureg.Quantity.from_tuple([200, [['gigapascals', 1]]])) cls.q2 = QuantityFactory.create_quantity( Registry("symbols")['shear_modulus'], ureg.Quantity.from_tuple([100, [['gigapascals', 1]]])) cls.q3 = QuantityFactory.create_quantity( Registry("symbols")['bulk_modulus'], ureg.Quantity.from_tuple([300, [['gigapascals', 1]]])) cls.material = None cls.graph = Graph()
def process_item(self, item): """ Run correlation calculation on a pair of properties using the specified function. Args: item: (dict) input provided by get_items() (see get_items() for structure) Returns: (tuple<str, str, float, str, int>) output of calculation with necessary information about calculation included. Format in tuple: independent property (x-axis) name, dependent property (y-axis) name, correlation value, correlation function name, number of data points used for correlation length of shortest path between properties on propnet graph where x-axis property is starting property and y-axis property is ending property. Note: if no (forward) connection exists, the path length will be None. This does not preclude y->x having a forward path. """ prop_x, prop_y = item['x_name'], item['y_name'] data_x, data_y = item['x_data'], item['y_data'] func_name, func = item['func'] n_points = len(data_x) g = Graph() try: path_length = g.get_degree_of_separation(prop_x, prop_y) except ValueError: path_length = None if n_points < 2: correlation = 0.0 else: correlation = func(data_x, data_y) return prop_x, prop_y, correlation, func_name, n_points, path_length
def __init__(self, materials, propstore, materials_symbol_map=None, criteria=None, source_name="", include_deprecated=False, include_sandboxed=False, graph_parallel=False, max_graph_workers=None, graph_timeout=None, allow_child_process=False, **kwargs): """ Args: materials (Store): store of materials properties materials_symbol_map (dict): mapping of keys in materials store docs to symbols propstore (Store): store of propnet properties criteria (dict): criteria for Mongodb find() query specifying criteria for records to process source_name (str): identifier for record source include_deprecated (bool): True processes materials marked as deprecated via the "deprecated" field. False skips those materials. If an entry does not have the "deprecated" field, it will be processed. Note that False will create a logical "and" with any criteria specified in "criteria". Default: False include_sandboxed (bool): True processes materials regardless of their MP sandbox. Note that False will create a logical "and" with any criteria specified in "criteria". False restricts materials to the "core" sandbox. Default: False graph_parallel (bool): True runs the graph algorithm in parallel with the number of workers specified by max_workers. Default: False (serial) Note: there will be no substantial speed-up from using a parallel runner with a parallel builder if there are long-running model evaluations that don't get timed out using the timeout keyword. max_graph_workers (int): number of processes to spawn for parallel graph evaluation. Note that graph evaluation speed-up tops out at 3-4 parallel processes. If the builder is run in a parallel maggma Runner, each will spawn max_workers number of processes to evaluate. For 4 parallel graph processes running on 3 parallel runners, this will spawn: 1 main runner process + 3 parallel runners + (3 parallel runners * 4 graph processes) = 16 total processes graph_timeout (int): number of seconds after which to timeout per property (available only on Unix-based systems). Default: None (no limit) allow_child_process (bool): If True, the user will be warned when graph_parallel is True and the builder is being run in a child process, usually indicating the builder is being run in a parallelized Runner, which is not recommended due to inefficiency in having to re-fork the graph processes with every new material. False suppresses this warning. **kwargs: kwargs for builder """ self.materials = materials self.propstore = propstore self.include_deprecated = include_deprecated self.include_sandboxed = include_sandboxed filters = [] if criteria: filters.append(criteria) if not include_deprecated: deprecated_filter = { "$or": [{ "deprecated": { "$exists": False } }, { "deprecated": False }] } filters.append(deprecated_filter) if not include_sandboxed: sandboxed_filter = {'sbxn': 'core'} filters.append(sandboxed_filter) if len(filters) > 1: self.criteria = {'$and': filters} else: self.criteria = filters[0] if filters else None self.materials_symbol_map = materials_symbol_map \ or MPRester.mapping if source_name == "": # Because this builder is not fully general, will keep this here self.source_name = "Materials Project" else: self.source_name = source_name self.graph_parallel = graph_parallel if not graph_parallel and max_graph_workers is not None: raise ValueError("Cannot specify max_workers with parallel=False") self.max_graph_workers = max_graph_workers self.graph_timeout = graph_timeout self.allow_child_process = allow_child_process self._graph_evaluator = Graph(parallel=graph_parallel, max_workers=max_graph_workers) props = list(self.materials_symbol_map.keys()) props += [ "task_id", "pretty_formula", "run_type", "is_hubbard", "pseudo_potential", "hubbards", "potcar_symbols", "oxide_type", "final_energy", "unit_cell_formula", "created_at", "deprecated", "sbxn" ] props = list(set(props)) super(PropnetBuilder, self).__init__(source=materials, target=propstore, query=self.criteria, ufn=self.process, projection=props, **kwargs)
class PropnetBuilder(MapBuilder): """ Basic builder for running propnet derivations on various properties """ def __init__(self, materials, propstore, materials_symbol_map=None, criteria=None, source_name="", include_deprecated=False, include_sandboxed=False, graph_parallel=False, max_graph_workers=None, graph_timeout=None, allow_child_process=False, **kwargs): """ Args: materials (Store): store of materials properties materials_symbol_map (dict): mapping of keys in materials store docs to symbols propstore (Store): store of propnet properties criteria (dict): criteria for Mongodb find() query specifying criteria for records to process source_name (str): identifier for record source include_deprecated (bool): True processes materials marked as deprecated via the "deprecated" field. False skips those materials. If an entry does not have the "deprecated" field, it will be processed. Note that False will create a logical "and" with any criteria specified in "criteria". Default: False include_sandboxed (bool): True processes materials regardless of their MP sandbox. Note that False will create a logical "and" with any criteria specified in "criteria". False restricts materials to the "core" sandbox. Default: False graph_parallel (bool): True runs the graph algorithm in parallel with the number of workers specified by max_workers. Default: False (serial) Note: there will be no substantial speed-up from using a parallel runner with a parallel builder if there are long-running model evaluations that don't get timed out using the timeout keyword. max_graph_workers (int): number of processes to spawn for parallel graph evaluation. Note that graph evaluation speed-up tops out at 3-4 parallel processes. If the builder is run in a parallel maggma Runner, each will spawn max_workers number of processes to evaluate. For 4 parallel graph processes running on 3 parallel runners, this will spawn: 1 main runner process + 3 parallel runners + (3 parallel runners * 4 graph processes) = 16 total processes graph_timeout (int): number of seconds after which to timeout per property (available only on Unix-based systems). Default: None (no limit) allow_child_process (bool): If True, the user will be warned when graph_parallel is True and the builder is being run in a child process, usually indicating the builder is being run in a parallelized Runner, which is not recommended due to inefficiency in having to re-fork the graph processes with every new material. False suppresses this warning. **kwargs: kwargs for builder """ self.materials = materials self.propstore = propstore self.include_deprecated = include_deprecated self.include_sandboxed = include_sandboxed filters = [] if criteria: filters.append(criteria) if not include_deprecated: deprecated_filter = { "$or": [{ "deprecated": { "$exists": False } }, { "deprecated": False }] } filters.append(deprecated_filter) if not include_sandboxed: sandboxed_filter = {'sbxn': 'core'} filters.append(sandboxed_filter) if len(filters) > 1: self.criteria = {'$and': filters} else: self.criteria = filters[0] if filters else None self.materials_symbol_map = materials_symbol_map \ or MPRester.mapping if source_name == "": # Because this builder is not fully general, will keep this here self.source_name = "Materials Project" else: self.source_name = source_name self.graph_parallel = graph_parallel if not graph_parallel and max_graph_workers is not None: raise ValueError("Cannot specify max_workers with parallel=False") self.max_graph_workers = max_graph_workers self.graph_timeout = graph_timeout self.allow_child_process = allow_child_process self._graph_evaluator = Graph(parallel=graph_parallel, max_workers=max_graph_workers) props = list(self.materials_symbol_map.keys()) props += [ "task_id", "pretty_formula", "run_type", "is_hubbard", "pseudo_potential", "hubbards", "potcar_symbols", "oxide_type", "final_energy", "unit_cell_formula", "created_at", "deprecated", "sbxn" ] props = list(set(props)) super(PropnetBuilder, self).__init__(source=materials, target=propstore, query=self.criteria, ufn=self.process, projection=props, **kwargs) def process(self, item): if self.graph_parallel and not self.allow_child_process and \ current_process().name != "MainProcess": logger.warning( "It appears derive_quantities() is running " "in a child process, possibly in a parallelized " "Runner.\nThis is not recommended and will deteriorate " "performance.") # Define quantities corresponding to materials doc fields # Attach quantities to materials item = MontyDecoder().process_decoded(item) logger.info("Populating material for %s", item['task_id']) material = Material() if 'created_at' in item.keys(): date_created = item['created_at'] else: date_created = None provenance = ProvenanceElement( source={ "source": self.source_name, "source_key": item['task_id'], "date_created": date_created }) for mkey, property_name in self.materials_symbol_map.items(): value = pydash.get(item, mkey) if value: material.add_quantity( QuantityFactory.create_quantity( property_name, value, units=Registry("units").get(property_name, None), provenance=provenance)) # Add custom things, e. g. computed entry computed_entry = get_entry(item) if computed_entry: material.add_quantity( QuantityFactory.create_quantity("computed_entry", computed_entry, provenance=provenance)) else: logger.info("Unable to create computed entry for {}".format( item['task_id'])) material.add_quantity( QuantityFactory.create_quantity("external_identifier_mp", item['task_id'], provenance=provenance)) input_quantities = material.symbol_quantities_dict # Use graph to generate expanded quantity pool logger.info("Evaluating graph for %s", item['task_id']) new_material = self._graph_evaluator.evaluate( material, timeout=self.graph_timeout) # Format document and return logger.info("Creating doc for %s", item['task_id']) # Gives the initial inputs that were used to derive properties of a # certain material. doc = { "inputs": [ StorageQuantity.from_quantity(q) for q in chain.from_iterable(input_quantities.values()) ] } for symbol, quantities in new_material.symbol_quantities_dict.items(): # If no new quantities of a given symbol were derived (i.e. if the initial # input quantity/ies is/are the only one/s listed in the new material) then don't add # that quantity to the propnet entry document as a derived quantity. if len(quantities) == len(input_quantities[symbol]): continue sub_doc = {} try: # Write out all quantities as dicts including the # internal ID for provenance tracing qs = [ jsanitize(StorageQuantity.from_quantity(q), strict=True) for q in quantities ] except AttributeError as ex: # Check to see if this is an error caused by an object # that is not JSON serializable msg = ex.args[0] if "object has no attribute 'as_dict'" in msg: # Write error to db and logger errmsg = "Quantity of Symbol '{}' is not ".format(symbol.name) + \ "JSON serializable. Cannot write quantities to database!" logger.error(errmsg) sub_doc['error'] = errmsg qs = [] else: # If not, re-raise the error raise ex sub_doc['quantities'] = qs doc[symbol.name] = sub_doc aggregated_quantities = new_material.get_aggregated_quantities() for symbol, quantity in aggregated_quantities.items(): if symbol.name not in doc: # No new quantities were derived continue # Store mean and std dev for aggregated quantities sub_doc = { "mean": unumpy.nominal_values(quantity.value).tolist(), "std_dev": unumpy.std_devs(quantity.value).tolist(), "units": quantity.units.format_babel() if quantity.units else None, "title": quantity.symbol.display_names[0] } # Symbol Name -> Sub_Document, listing all Quantities of that type. doc[symbol.name].update(sub_doc) doc.update({ "task_id": item["task_id"], "pretty_formula": item.get("pretty_formula"), "deprecated": item.get("deprecated", False) }) if self.include_sandboxed: doc.update({'sbxn': item.get("sbxn", [])}) return jsanitize(doc, strict=True)
def test_provenance(self): model4 = EquationModel(name="model4", equations=["D=B*C*11"], constraints=["G==0"]) symbols = GraphTest.generate_canonical_symbols() models = GraphTest.generate_canonical_models() models['model4'] = model4 del models['model6'] material = GraphTest.generate_canonical_material(symbols) g = Graph(symbol_types=symbols, models=models, composite_models=dict()) material_derived = g.evaluate(material) expected_quantities = [ Quantity(symbols['A'], 19), Quantity(symbols['A'], 23), Quantity(symbols['B'], 38), Quantity(symbols['B'], 46), Quantity(symbols['C'], 57), Quantity(symbols['C'], 69), Quantity(symbols['G'], 95), Quantity(symbols['G'], 115), Quantity(symbols['F'], 266), Quantity(symbols['F'], 322), Quantity(symbols['D'], 70395), Quantity(symbols['D'], 85215), Quantity(symbols['D'], 103155) ] for q in material_derived._symbol_to_quantity[symbols['A']]: self.assertTrue(q._provenance is None) for q in material_derived._symbol_to_quantity[symbols['B']]: if q.value == 38: self.assertTrue(q._provenance.model is models['model1'].name, "provenance improperly calculated") self.assertTrue(expected_quantities[0] in q._provenance.inputs, "provenance improperly calculated") else: self.assertTrue(q._provenance.model is models['model1'].name, "provenance improperly calculated") self.assertTrue(expected_quantities[1] in q._provenance.inputs, "provenance improperly calculated") for q in material_derived._symbol_to_quantity[symbols['C']]: if q.value == 57: self.assertTrue(q._provenance.model is models['model1'].name, "provenance improperly calculated") self.assertTrue(expected_quantities[0] in q._provenance.inputs, "provenance improperly calculated") else: self.assertTrue(q._provenance.model is models['model1'].name, "provenance improperly calculated") self.assertTrue(expected_quantities[1] in q._provenance.inputs, "provenance improperly calculated") for q in material_derived._symbol_to_quantity[symbols['G']]: if q.value == 95: self.assertTrue(q._provenance.model is models['model2'].name, "provenance improperly calculated") self.assertTrue(expected_quantities[0] in q._provenance.inputs, "provenance improperly calculated") else: self.assertTrue(q._provenance.model is models['model2'].name, "provenance improperly calculated") self.assertTrue(expected_quantities[1] in q._provenance.inputs, "provenance improperly calculated") for q in material_derived._symbol_to_quantity[symbols['D']]: if q.value == 70395: self.assertTrue(q._provenance.model is models['model5'].name, "provenance improperly calculated") self.assertTrue(expected_quantities[4] in q._provenance.inputs, "provenance improperly calculated") self.assertTrue(expected_quantities[6] in q._provenance.inputs, "provenance improperly calculated")
def test_derive_quantities(self): # Simple one quantity test quantity = Quantity("band_gap", 3.2) graph = Graph() new, qpool = graph.derive_quantities([quantity]) new_mat = graph.evaluate(Material([quantity]))