class DatabaseExplorer(object): def __init__(self, name): self.db = Database(name) self.data = self.db.load() for db in databases[name]["depends"]: self.data.update(Database(db).load()) def uses_this_process(self, key, recursion=0): if recursion: return dict( [ (k, self.uses_this_process(k, recursion - 1)) for k in self.data if key in [e["input"] for e in self.data[k].get("exchanges", [])] ] ) else: return [ k for k in self.data if key in [e["input"] for e in self.data[k].get("exchanges", [])] ] def provides_this_process(self, key, recursion=0): if recursion: return dict( [ (e["input"], self.provides_this_process(e["input"], recursion - 1)) for e in self.data[key].get("exchanges", []) ] ) else: return [(e["input"], ()) for e in self.data[key].get("exchanges", [])]
def add_unlinked_flows_to_biosphere_database(self, biosphere_name=None): biosphere_name = biosphere_name or config.biosphere assert biosphere_name in databases, u"{} biosphere database not found".format( biosphere_name ) bio = Database(biosphere_name) KEYS = {"name", "unit", "categories"} def reformat(exc): dct = {key: value for key, value in list(exc.items()) if key in KEYS} dct.update( type="emission", exchanges=[], code=activity_hash(dct), database=biosphere_name, ) return dct new_data = [ reformat(exc) for ds in self.data for exc in ds.get("exchanges", []) if exc["type"] == "biosphere" and not exc.get("input") ] data = bio.load() # Dictionary eliminate duplicates data.update({(biosphere_name, activity_hash(exc)): exc for exc in new_data}) bio.write(data) self.apply_strategy( functools.partial( link_iterable_by_fields, other=( obj for obj in Database(biosphere_name) if obj.get("type") == "emission" ), kind="biosphere", ), )
def add_missing_cfs(self): new_flows = [] for method in self.data: for cf in method["exchanges"]: if "input" not in cf: cf["code"] = str(uuid.uuid4()) new_flows.append(cf) new_flows = recursive_str_to_unicode( dict([self._format_flow(cf) for cf in new_flows])) if new_flows: biosphere = Database(self.biosphere_name) biosphere_data = biosphere.load() biosphere_data.update(new_flows) biosphere.write(biosphere_data) print(u"Added {} new biosphere flows".format(len(new_flows)))
def write_database(self, data=None, delete_existing=True, backend=None, activate_parameters=False, **kwargs): """ Write data to a ``Database``. All arguments are optional, and are normally not specified. ``delete_existing`` effects both the existing database (it will be emptied prior to writing if True, which is the default), and, if ``activate_parameters`` is True, existing database and activity parameters. Database parameters will only be deleted if the import data specifies a new set of database parameters (i.e. ``database_parameters`` is not ``None``) - the same is true for activity parameters. If you need finer-grained control, please use the ``DatabaseParameter``, etc. objects directly. Args: * *data* (dict, optional): The data to write to the ``Database``. Default is ``self.data``. * *delete_existing* (bool, default ``True``): See above. * *activate_parameters* (bool, default ``False``). Instead of storing parameters in ``Activity`` and other proxy objects, create ``ActivityParameter`` and other parameter objects, and evaluate all variables and formulas. * *backend* (string, optional): Storage backend to use when creating ``Database``. Default is the default backend. Returns: ``Database`` instance. """ data = self.data if data is None else data self.metadata.update(kwargs) if activate_parameters: # Comes before .write_database because we # need to remove `parameters` key activity_parameters = self._prepare_activity_parameters( data, delete_existing ) if {o['database'] for o in data} != {self.db_name}: error = "Activity database must be {}, but {} was also found".format( self.db_name, {o['database'] for o in data}.difference({self.db_name}) ) raise WrongDatabase(error) if len({o['code'] for o in data}) < len(data): seen, duplicates = set(), [] for o in data: if o['code'] in seen: duplicates.append(o['name']) else: seen.add(o['code']) error = "The following activities have non-unique codes: {}" raise NonuniqueCode(error.format(duplicates)) data = {(ds['database'], ds['code']): ds for ds in data} if self.db_name in databases: # TODO: Raise error if unlinked exchanges? db = Database(self.db_name) if delete_existing: existing = {} else: existing = db.load(as_dict=True) else: existing = {} if 'format' not in self.metadata: self.metadata['format'] = self.format with warnings.catch_warnings(): warnings.simplefilter("ignore") db = Database(self.db_name, backend=backend) db.register(**self.metadata) self.write_database_parameters(activate_parameters, delete_existing) existing.update(data) db.write(existing) if activate_parameters: self._write_activity_parameters(activity_parameters) print(u"Created database: {}".format(self.db_name)) return db
class DatabaseHealthCheck(object): def __init__(self, database): self.db = Database(database) self.db.filters = {"type": "process"} def check(self, graphs_dir=None): tg, tfn, bg, bfn = self.make_graphs(graphs_dir) aggregated = self.aggregated_processes() return { "tg": tg, "tfn": tfn, "bg": bg, "bfn": bfn, "pr": self.page_rank(), "ue": self.unique_exchanges(), "uncertainty": self.uncertainty_check(), "sp": aggregated["system_processes"], "me": aggregated["many_exchanges"], "nsp": self.no_self_production(), "mo": self.multioutput_processes(), "ob": self.ouroboros(), } def make_graphs(self, graphs_dir=None): lca = LCA({self.db.random(): 1}) lca.lci() tech_filename = safe_filename(self.db.name) + ".technosphere.png" tech_filepath = os.path.join(graphs_dir or projects.output_dir, tech_filename) SparseMatrixGrapher(lca.technosphere_matrix).graph(tech_filepath, dpi=600) bio_filename = safe_filename(self.db.name) + ".biosphere.png" bio_filepath = os.path.join(graphs_dir or projects.output_dir, bio_filename) SparseMatrixGrapher(lca.biosphere_matrix).graph(bio_filepath, dpi=600) return tech_filepath, tech_filename, bio_filepath, bio_filename def page_rank(self): return PageRank(self.db).calculate() def unique_exchanges(self): data = self.db.load() exchanges = [(exc["input"], exc["amount"], exc["type"]) for ds in data.values() for exc in ds.get("exchanges", []) if exc["type"] in {"biosphere", "technosphere"}] bio_exchanges = [obj for obj in exchanges if obj[2] == "biosphere"] tech_exchanges = [obj for obj in exchanges if obj[2] == "technosphere"] return ( len(tech_exchanges), len(set(tech_exchanges)), len(bio_exchanges), len(set(bio_exchanges)), ) def uncertainty_check(self): # TODO: Also report no (None) uncertainty data = self.db.load() results = { obj.id: { "total": 0, "bad": 0 } for obj in uncertainty_choices } for ds in data.values(): for exc in ds.get("exchanges", []): ut = exc.get("uncertainty type") if ut is None: continue results[ut]["total"] += 1 if ut == LognormalUncertainty.id: right_amount = np.allclose(np.log(np.abs(exc["amount"])), exc["loc"], rtol=1e-3) if not exc.get("scale") or not right_amount: results[ut]["bad"] += 1 elif ut == NormalUncertainty.id: if not exc.get("scale") or abs( exc["amount"]) != exc["loc"]: results[ut]["bad"] += 1 elif ut in {TriangularUncertainty.id, UniformUncertainty.id}: if exc["minimum"] >= exc["maximum"]: results[ut]["bad"] += 1 return results def multioutput_processes(self): num_production_exchanges = [( key, len([ exc for exc in ds.get("exchanges") if exc["type"] == "production" and exc["input"] != key ]), ) for key, ds in self.db.load().items()] return [obj for obj in num_production_exchanges if obj[1]] def aggregated_processes(self, cutoff=500): num_exchanges = { key: { "technosphere": len([ exc for exc in value.get("exchanges", []) if exc["type"] == "technosphere" ]), "biosphere": len([ exc for exc in value.get("exchanges", []) if exc["type"] == "biosphere" ]), } for key, value in self.db.load().items() if value.get("type", "process") == "process" } system_processes = [ (key, value["biosphere"]) for key, value in num_exchanges.items() if value["technosphere"] == 0 and value["biosphere"] > cutoff ] many_exchanges = [(key, value["technosphere"]) for key, value in num_exchanges.items() if value["technosphere"] > cutoff] return { "system_processes": system_processes, "many_exchanges": many_exchanges } def no_self_production(self): self_production = lambda a, b: not a or b in a return { key for key, value in self.db.load().items() if value.get("type", "process") == "process" and not self_production( { exc["input"] for exc in value.get("exchanges", []) if exc["type"] == "production" }, key, ) } def ouroboros(self): """Find processes that consume their own reference products as inputs. Not necessarily an error, but should be examined carefully (see `Two potential points of confusion in LCA math <http://chris.mutel.org/too-confusing.html>`__). Returns: A set of database keys. """ return { key for key, value in self.db.load().items() if any(exc for exc in value.get("exchanges", []) if exc["input"] == key and exc["type"] == "technosphere") }