def test_as_dict_as_json(self): # Check symbols to be sure they are still ok for name, sym in self.generate_symbols().items(): compare_dict = sym.as_dict() self.assertDictEqual(compare_dict, self.custom_syms_as_dicts[name]) compare_dict = jsanitize(sym, strict=True) self.assertDictEqual(compare_dict, self.custom_symbols_json[name]) # Quantities with custom symbols for sym, q in self.quantities_custom_symbol.items(): for qq, expected_dict, expected_json in zip(q, self.sq_custom_sym_as_dicts[sym], self.sq_custom_sym_json[sym]): sq = StorageQuantity.from_quantity(qq) compare_dict = sq.as_dict() self.assertDictEqual(compare_dict, expected_dict) compare_dict = jsanitize(sq, strict=True) self.assertDictEqual(compare_dict, expected_json) # Quantities with canonical symbols directly calculated from a real model for sym, q in self.quantities_canonical_symbol.items(): for qq, expected_dict, expected_json in zip(q, self.sq_canonical_sym_as_dicts_no_value[sym], self.sq_canonical_sym_json_no_value[sym]): sq = StorageQuantity.from_quantity(qq) compare_dict = sq.as_dict() self.assertTrue(np.isclose(qq.magnitude, compare_dict['value'])) compare_dict.pop('value') self.assertDictEqual(compare_dict, expected_dict) compare_dict = jsanitize(sq, strict=True) self.assertTrue(np.isclose(qq.magnitude, compare_dict['value'])) compare_dict.pop('value') self.assertDictEqual(compare_dict, expected_json) # Quantity with uncertainty (calculated from mean), using custom symbols sq = StorageQuantity.from_quantity(self.quantity_with_uncertainty) compare_dict = sq.as_dict() self.assertTrue(np.isclose(self.quantity_with_uncertainty.magnitude, compare_dict['value'])) uncertainty_value = compare_dict['uncertainty'] self.assertTrue(np.isclose(self.quantity_with_uncertainty.uncertainty.magnitude, uncertainty_value)) compare_dict.pop('value') compare_dict.pop('uncertainty') self.assertDictEqual(self.sq_with_uncertainty_as_dict_no_numbers, compare_dict) compare_dict = jsanitize(sq, strict=True) self.assertTrue(np.isclose(self.quantity_with_uncertainty.magnitude, compare_dict['value'])) uncertainty_value = compare_dict['uncertainty'] self.assertTrue(np.isclose(self.quantity_with_uncertainty.uncertainty.magnitude, uncertainty_value)) compare_dict.pop('value') compare_dict.pop('uncertainty') self.assertDictEqual(self.sq_with_uncertainty_json_no_numbers, compare_dict) # Quantity that is an object, using a custom symbol sq = StorageQuantity.from_quantity(self.object_quantity) compare_dict = sq.as_dict() self.assertDictEqual(self.sq_object_as_dict, compare_dict) compare_dict = jsanitize(sq, strict=True) self.assertDictEqual(self.sq_object_json, compare_dict)
def test_default_instantiation(self): provenance_store = ProvenanceStore() self.assertIsNone(provenance_store.inputs) self.assertIsNone(provenance_store.model) self.assertIsNone(provenance_store.source) provenance_store_quantity = ProvenanceStoreQuantity() self.assertIsNone(provenance_store_quantity._data_type) self.assertIsNone(provenance_store_quantity.value) self.assertIsNone(provenance_store_quantity._internal_id) self.assertIsNone(provenance_store_quantity.provenance) self.assertIsNone(provenance_store_quantity.symbol) self.assertIsNone(provenance_store_quantity.tags) self.assertIsNone(provenance_store_quantity.uncertainty) self.assertIsNone(provenance_store_quantity.units) self.assertFalse(provenance_store_quantity.has_value()) storage_quantity = StorageQuantity() self.assertIsNone(storage_quantity._data_type) self.assertIsNone(storage_quantity.value) self.assertIsNone(storage_quantity._internal_id) self.assertIsNone(storage_quantity.provenance) self.assertIsNone(storage_quantity.symbol) self.assertIsNone(storage_quantity.tags) self.assertIsNone(storage_quantity.uncertainty) self.assertIsNone(storage_quantity.units)
def process(self, item): if self.graph_parallel and not self.allow_child_process and \ current_process().name != "MainProcess": logger.warning( "It appears derive_quantities() is running " "in a child process, possibly in a parallelized " "Runner.\nThis is not recommended and will deteriorate " "performance.") # Define quantities corresponding to materials doc fields # Attach quantities to materials item = MontyDecoder().process_decoded(item) logger.info("Populating material for %s", item['task_id']) material = Material() if 'created_at' in item.keys(): date_created = item['created_at'] else: date_created = None provenance = ProvenanceElement( source={ "source": self.source_name, "source_key": item['task_id'], "date_created": date_created }) for mkey, property_name in self.materials_symbol_map.items(): value = pydash.get(item, mkey) if value: material.add_quantity( QuantityFactory.create_quantity( property_name, value, units=Registry("units").get(property_name, None), provenance=provenance)) # Add custom things, e. g. computed entry computed_entry = get_entry(item) if computed_entry: material.add_quantity( QuantityFactory.create_quantity("computed_entry", computed_entry, provenance=provenance)) else: logger.info("Unable to create computed entry for {}".format( item['task_id'])) material.add_quantity( QuantityFactory.create_quantity("external_identifier_mp", item['task_id'], provenance=provenance)) input_quantities = material.symbol_quantities_dict # Use graph to generate expanded quantity pool logger.info("Evaluating graph for %s", item['task_id']) new_material = self._graph_evaluator.evaluate( material, timeout=self.graph_timeout) # Format document and return logger.info("Creating doc for %s", item['task_id']) # Gives the initial inputs that were used to derive properties of a # certain material. doc = { "inputs": [ StorageQuantity.from_quantity(q) for q in chain.from_iterable(input_quantities.values()) ] } for symbol, quantities in new_material.symbol_quantities_dict.items(): # If no new quantities of a given symbol were derived (i.e. if the initial # input quantity/ies is/are the only one/s listed in the new material) then don't add # that quantity to the propnet entry document as a derived quantity. if len(quantities) == len(input_quantities[symbol]): continue sub_doc = {} try: # Write out all quantities as dicts including the # internal ID for provenance tracing qs = [ jsanitize(StorageQuantity.from_quantity(q), strict=True) for q in quantities ] except AttributeError as ex: # Check to see if this is an error caused by an object # that is not JSON serializable msg = ex.args[0] if "object has no attribute 'as_dict'" in msg: # Write error to db and logger errmsg = "Quantity of Symbol '{}' is not ".format(symbol.name) + \ "JSON serializable. Cannot write quantities to database!" logger.error(errmsg) sub_doc['error'] = errmsg qs = [] else: # If not, re-raise the error raise ex sub_doc['quantities'] = qs doc[symbol.name] = sub_doc aggregated_quantities = new_material.get_aggregated_quantities() for symbol, quantity in aggregated_quantities.items(): if symbol.name not in doc: # No new quantities were derived continue # Store mean and std dev for aggregated quantities sub_doc = { "mean": unumpy.nominal_values(quantity.value).tolist(), "std_dev": unumpy.std_devs(quantity.value).tolist(), "units": quantity.units.format_babel() if quantity.units else None, "title": quantity.symbol.display_names[0] } # Symbol Name -> Sub_Document, listing all Quantities of that type. doc[symbol.name].update(sub_doc) doc.update({ "task_id": item["task_id"], "pretty_formula": item.get("pretty_formula"), "deprecated": item.get("deprecated", False) }) if self.include_sandboxed: doc.update({'sbxn': item.get("sbxn", [])}) return jsanitize(doc, strict=True)
def test_value_lookup_to_quantity(self): def rec_verify_lookup(p_lookup, p_original): self.assertIsInstance(p_lookup, ProvenanceElement) for v in p_lookup.inputs or []: self.assertIsInstance(v, BaseQuantity) v_orig = [x for x in p_original.inputs if x._internal_id == v._internal_id] self.assertEqual(len(v_orig), 1) v_orig = v_orig[0] self.assertIsNotNone(v.value) if isinstance(v, NumQuantity): self.assertTrue(np.isclose(v.value, v_orig.value)) if v_orig.uncertainty: self.assertTrue(np.isclose(v.uncertainty, v_orig.uncertainty)) else: self.assertEqual(v.value, v_orig.value) rec_verify_lookup(v.provenance, v_orig.provenance) lookup_dict = self.get_lookup_dict() lookup_fun = self.lookup_fun quantities = \ list(chain.from_iterable(self.quantities_custom_symbol.values())) + \ list(chain.from_iterable(self.quantities_canonical_symbol.values())) + \ [self.quantity_with_uncertainty, self.object_quantity] for q in quantities: json_dict = jsanitize(StorageQuantity.from_quantity(q), strict=True) sq_json = MontyDecoder().process_decoded(json_dict) if sq_json.provenance.inputs: for v in sq_json.provenance.inputs: self.assertIsNone(v.value) q_json_dict = sq_json.to_quantity(lookup=lookup_dict) q_json_fun = sq_json.to_quantity(lookup=lookup_fun) q_json_reconstruct_dict = StorageQuantity.reconstruct_quantity(json_dict, lookup_dict) q_json_reconstruct_fun = StorageQuantity.reconstruct_quantity(json_dict, lookup_fun) for q_json in (q_json_dict, q_json_fun, q_json_reconstruct_dict, q_json_reconstruct_fun): self.assertIsInstance(q_json, type(q)) if isinstance(q_json, NumQuantity): self.assertTrue(np.isclose(q_json.value, q.value)) if q.uncertainty: self.assertTrue(np.isclose(q_json.uncertainty, q.uncertainty)) else: self.assertEqual(q_json.value, q.value) rec_verify_lookup(q_json.provenance, q.provenance) if q.provenance.inputs: with self.assertRaises(ValueError): # Needs lookup but doesn't get a lookup container sq_json.to_quantity() with self.assertRaises(ValueError): # Needs lookup but doesn't get a lookup container StorageQuantity.reconstruct_quantity(json_dict) with self.assertRaises(ValueError): sq_json.to_quantity(lookup=self.lookup_fun_missing_value) with self.assertRaises(ValueError): StorageQuantity.reconstruct_quantity(json_dict, lookup=self.lookup_fun_missing_value) with self.assertRaises(TypeError): sq_json.to_quantity(lookup=self.lookup_fun_incorrect_type) with self.assertRaises(TypeError): StorageQuantity.reconstruct_quantity(json_dict, lookup=self.lookup_fun_incorrect_type) key = q.provenance.inputs[0]._internal_id key_lookup = lookup_dict.pop(key) with self.assertRaises(ValueError): sq_json.to_quantity(lookup=lookup_dict) with self.assertRaises(ValueError): StorageQuantity.reconstruct_quantity(json_dict, lookup=lookup_dict) with self.assertRaises(ValueError): sq_json.to_quantity(lookup=self.lookup_fun_key_not_found) with self.assertRaises(ValueError): StorageQuantity.reconstruct_quantity(json_dict, lookup=self.lookup_fun_key_not_found) lookup_dict[key] = key_lookup with self.assertRaises(TypeError): sq_json.to_quantity(lookup='This is not a lookup') with self.assertRaises(TypeError): StorageQuantity.reconstruct_quantity(json_dict, lookup='This is not a lookup')
def test_from_dict_from_json(self): # Test with non-canonical symbol for original_quantity in chain.from_iterable(self.quantities_custom_symbol.values()): q = StorageQuantity.from_quantity(original_quantity) d = q.as_dict() q_from_dict = StorageQuantity.from_dict(d) self.assertIsInstance(q_from_dict, StorageQuantity) self.assertEqual(q_from_dict._data_type, "NumQuantity") self.assertEqual(q_from_dict.symbol, q.symbol) self.assertTrue(np.isclose(q_from_dict.value, q.magnitude)) self.assertEqual(q_from_dict.units, q.units) self.assertListEqual(q_from_dict.tags, q.tags) self.assertEqual(q_from_dict, q) self.rec_provenance_tree_check(q_from_dict.provenance, original_quantity.provenance) json_dict = jsanitize(q, strict=True) q_from_json_dict = MontyDecoder().process_decoded(json_dict) self.assertIsInstance(q_from_json_dict, StorageQuantity) self.assertEqual(q_from_json_dict._data_type, "NumQuantity") self.assertEqual(q_from_json_dict.symbol, q.symbol) self.assertTrue(np.isclose(q_from_json_dict.value, q.magnitude)) self.assertEqual(q_from_json_dict.units, q.units) self.assertListEqual(q_from_json_dict.tags, q.tags) self.assertEqual(q_from_json_dict.provenance, original_quantity.provenance) self.assertEqual(q_from_json_dict, q) self.rec_provenance_tree_check(q_from_json_dict.provenance, original_quantity.provenance, from_dict=True) # Test with canonical symbol for original_quantity in chain.from_iterable(self.quantities_canonical_symbol.values()): q = StorageQuantity.from_quantity(original_quantity) d = q.as_dict() q_from_dict = StorageQuantity.from_dict(d) self.assertIsInstance(q_from_dict, StorageQuantity) self.assertEqual(q_from_dict._data_type, "NumQuantity") self.assertEqual(q_from_dict.symbol, q.symbol) self.assertTrue(np.isclose(q_from_dict.value, q.magnitude)) self.assertEqual(q_from_dict.units, q.units) self.assertListEqual(q_from_dict.tags, q.tags) self.assertEqual(q_from_dict, q) self.rec_provenance_tree_check(q_from_dict.provenance, original_quantity.provenance) json_dict = jsanitize(q, strict=True) q_from_json_dict = MontyDecoder().process_decoded(json_dict) self.assertIsInstance(q_from_json_dict, StorageQuantity) self.assertEqual(q_from_json_dict._data_type, "NumQuantity") self.assertEqual(q_from_json_dict.symbol, q.symbol) self.assertTrue(np.isclose(q_from_json_dict.magnitude, q.magnitude)) self.assertEqual(q_from_json_dict.units, q.units) self.assertListEqual(q_from_json_dict.tags, q.tags) self.assertEqual(q_from_json_dict.provenance, original_quantity.provenance) self.assertEqual(q_from_json_dict, q) self.rec_provenance_tree_check(q_from_json_dict.provenance, original_quantity.provenance, from_dict=True) # Test with quantity with uncertainty, custom symbol original_quantity = self.quantity_with_uncertainty q = StorageQuantity.from_quantity(original_quantity) d = q.as_dict() q_from_dict = StorageQuantity.from_dict(d) self.assertIsInstance(q_from_dict, StorageQuantity) self.assertEqual(q_from_dict._data_type, "NumQuantity") self.assertEqual(q_from_dict.symbol, q.symbol) self.assertTrue(np.isclose(q_from_dict.value, q.magnitude)) self.assertEqual(q_from_dict.units, q.units) self.assertTrue(np.isclose(q_from_dict.uncertainty, q.uncertainty)) self.assertListEqual(q_from_dict.tags, q.tags) self.assertEqual(q_from_dict, q) self.rec_provenance_tree_check(q_from_dict.provenance, original_quantity.provenance) json_dict = jsanitize(q, strict=True) q_from_json_dict = MontyDecoder().process_decoded(json_dict) self.assertIsInstance(q_from_json_dict, StorageQuantity) self.assertEqual(q_from_json_dict._data_type, "NumQuantity") self.assertEqual(q_from_json_dict.symbol, q.symbol) self.assertTrue(np.isclose(q_from_json_dict.magnitude, q.magnitude)) self.assertEqual(q_from_json_dict.units, q.units) self.assertTrue(np.isclose(q_from_json_dict.uncertainty, q.uncertainty)) self.assertListEqual(q_from_json_dict.tags, q.tags) self.assertEqual(q_from_json_dict.provenance, original_quantity.provenance) self.assertEqual(q_from_json_dict, q) self.rec_provenance_tree_check(q_from_json_dict.provenance, original_quantity.provenance, from_dict=True) # Test with object quantity original_quantity = self.object_quantity q = StorageQuantity.from_quantity(original_quantity) d = q.as_dict() q_from_dict = StorageQuantity.from_dict(d) self.assertIsInstance(q_from_dict, StorageQuantity) self.assertEqual(q_from_dict._data_type, "ObjQuantity") self.assertEqual(q_from_dict.symbol, q.symbol) self.assertEqual(q_from_dict.value, q.value) self.assertListEqual(q_from_dict.tags, q.tags) self.assertEqual(q_from_dict, q) self.rec_provenance_tree_check(q_from_dict.provenance, original_quantity.provenance) json_dict = jsanitize(q, strict=True) q_from_json_dict = MontyDecoder().process_decoded(json_dict) self.assertIsInstance(q_from_json_dict, StorageQuantity) self.assertEqual(q_from_json_dict._data_type, "ObjQuantity") self.assertEqual(q_from_json_dict.symbol, q.symbol) self.assertEqual(q_from_json_dict.value, q.value) self.assertListEqual(q_from_json_dict.tags, q.tags) self.assertEqual(q_from_json_dict, q) self.rec_provenance_tree_check(q_from_json_dict.provenance, original_quantity.provenance, from_dict=True)
def test_storage_quantity_from_quantity(self): for q in chain.from_iterable(self.quantities_custom_symbol.values()): storage_quantities = [StorageQuantity.from_quantity(q), StorageQuantity(q)] for storage_quantity in storage_quantities: self.assertIsInstance(storage_quantity, StorageQuantity) self.assertEqual(storage_quantity._data_type, "NumQuantity") # This checks value equality self.assertEqual(storage_quantity.symbol, q.symbol) self.assertIsInstance(storage_quantity.value, int) self.assertTrue(storage_quantity.value, q.magnitude) self.assertListEqual(storage_quantity.tags, q.tags) # This checks __eq__() and that __eq__() commutes self.assertEqual(storage_quantity, q) self.assertEqual(q, storage_quantity) # This checks types and values explicitly in provenance to make sure everything was built correctly. # It is more robust than __eq__() self.rec_provenance_tree_check(storage_quantity.provenance, q.provenance) q = self.quantity_with_uncertainty storage_quantities_with_uncertainty = [StorageQuantity.from_quantity(q), StorageQuantity(q)] for storage_quantity_with_uncertainty in storage_quantities_with_uncertainty: self.assertIsInstance(storage_quantity_with_uncertainty, StorageQuantity) self.assertEqual(storage_quantity_with_uncertainty._data_type, "NumQuantity") self.assertEqual(storage_quantity_with_uncertainty.symbol, q.symbol) self.assertIsInstance(storage_quantity_with_uncertainty.value, float) self.assertTrue(np.isclose(storage_quantity_with_uncertainty.value, q.value)) self.assertListEqual(storage_quantity_with_uncertainty.tags, q.tags) self.assertIsNotNone(storage_quantity_with_uncertainty.uncertainty) self.assertIsInstance(storage_quantity_with_uncertainty.uncertainty, ureg.Quantity) self.assertEqual(storage_quantity_with_uncertainty, q) # Test ObjQuantity coercion q = self.object_quantity storage_quantities_object = [StorageQuantity.from_quantity(q), StorageQuantity(q)] for storage_quantity_object in storage_quantities_object: self.assertIsInstance(storage_quantity_object, StorageQuantity) self.assertEqual(storage_quantity_object._data_type, "ObjQuantity") self.assertIsInstance(storage_quantity_object.value, str) self.assertEqual(storage_quantity_object.value, q.value) self.assertEqual(storage_quantity_object.symbol, q.symbol) self.assertListEqual(storage_quantity_object.tags, q.tags) self.assertIsNone(storage_quantity_object.units) self.assertEqual(storage_quantity_object, q) # Test copy of StorageQuantity with from_quantity q_in = StorageQuantity(q) q_out = StorageQuantity.from_quantity(q_in) self.assertIsNot(q_in, q_out) self.assertEqual(q_in, q_out) # Test fail with incorrect object type with self.assertRaises(TypeError): StorageQuantity('Incorrect type') with self.assertRaises(TypeError): StorageQuantity.from_quantity('Incorrect type')
def process_item(self, item): # Define quantities corresponding to materials doc fields # Attach quantities to materials item = MontyDecoder().process_decoded(item) logger.info("Populating material for %s", item['task_id']) material = Material() if 'created_at' in item.keys(): date_created = item['created_at'] else: date_created = "" provenance = ProvenanceElement( source={ "source": self.source_name, "source_key": item['task_id'], "date_created": date_created }) for mkey, property_name in self.materials_symbol_map.items(): value = get(item, mkey) if value: material.add_quantity( QuantityFactory.create_quantity(property_name, value, provenance=provenance)) # Add custom things, e. g. computed entry computed_entry = get_entry(item) material.add_quantity( QuantityFactory.create_quantity("computed_entry", computed_entry, provenance=provenance)) material.add_quantity( QuantityFactory.create_quantity("external_identifier_mp", item['task_id'], provenance=provenance)) input_quantities = material.get_quantities() # Use graph to generate expanded quantity pool logger.info("Evaluating graph for %s", item['task_id']) graph = Graph() graph.remove_models({ "dimensionality_cheon": DEFAULT_MODEL_DICT['dimensionality_cheon'], "dimensionality_gorai": DEFAULT_MODEL_DICT['dimensionality_gorai'] }) new_material = graph.evaluate(material) # Format document and return logger.info("Creating doc for %s", item['task_id']) # Gives the initial inputs that were used to derive properties of a # certain material. doc = { "inputs": [StorageQuantity.from_quantity(q) for q in input_quantities] } for symbol, quantity in new_material.get_aggregated_quantities().items( ): all_qs = new_material._symbol_to_quantity[symbol] # Only add new quantities # TODO: Condition insufficiently general. # Can end up with initial quantities added as "new quantities" if len(all_qs) == 1 and list(all_qs)[0] in input_quantities: continue # Write out all quantities as dicts including the # internal ID for provenance tracing qs = [StorageQuantity.from_quantity(q).as_dict() for q in all_qs] # THE listing of all Quantities of a given symbol. sub_doc = { "quantities": qs, "mean": unumpy.nominal_values(quantity.value).tolist(), "std_dev": unumpy.std_devs(quantity.value).tolist(), "units": quantity.units.format_babel() if quantity.units else None, "title": quantity._symbol_type.display_names[0] } # Symbol Name -> Sub_Document, listing all Quantities of that type. doc[symbol.name] = sub_doc doc.update({ "task_id": item["task_id"], "pretty_formula": item["pretty_formula"] }) return jsanitize(doc, strict=True)