def extract(self): def extract_flow_data(o): ds = { "categories": ( o.compartment.compartment.text, o.compartment.subcompartment.text, ), "code": o.get("id"), "CAS number": o.get("casNumber"), "name": o.name.text, "database": self.db_name, "exchanges": [], "unit": o.unitName.text, } ds["type"] = EMISSIONS_CATEGORIES.get(ds["categories"][0], ds["categories"][0]) return ds lci_dirpath = os.path.join(os.path.dirname(__file__), "..", "data", "lci") fp = os.path.join(lci_dirpath, "ecoinvent elementary flows 3.7.xml") root = objectify.parse(open(fp, encoding="utf-8")).getroot() flow_data = recursive_str_to_unicode( [extract_flow_data(ds) for ds in root.iterchildren()]) previous = os.path.join(lci_dirpath, "previous elementary flows.json") return flow_data + json.load(open(previous))
def extract(cls, path, db_name, use_mp=True): data = [] if os.path.isdir(path): filelist = [ os.path.join(path, filename) for filename in os.listdir(path) if filename[-4:].lower() == ".xml" # Skip SimaPro-specific flow list and filename != 'ElementaryFlows.xml' ] else: filelist = [path] if not filelist: raise OSError("Provided path doesn't appear to have any XML files") if sys.version_info < (3, 0): use_mp = False if use_mp: with multiprocessing.Pool(processes=multiprocessing.cpu_count()) as pool: print("Extracting XML data from {} datasets".format(len(filelist))) results = [ pool.apply_async( Ecospold1DataExtractor.process_file, args=(x, db_name) ) for x in filelist ] data = [ x for p in results for x in p.get() if x ] else: pbar = pyprind.ProgBar(len(filelist), title="Extracting ecospold1 files:", monitor=True) data = [] for index, filepath in enumerate(filelist): for x in cls.process_file(filepath, db_name): if x: data.append(x) pbar.update(item_id = filename[:15]) print(pbar) if sys.version_info < (3, 0): print("Converting to unicode") return recursive_str_to_unicode(data) else: return data
def delete_activity (self,activity): """A method to delete a flow from database. Parameters ---------- activity: str The flow to be deleted. """ data = self.db.load() del data[activity] from bw2data.utils import recursive_str_to_unicode self.db.write(recursive_str_to_unicode(data)) self.db.process() print ("deleted activity flow: %s" % (str(activity)))
def extract(cls, dirpath, db_name, use_mp=True): assert os.path.exists(dirpath) if os.path.isdir(dirpath): filelist = [ filename for filename in os.listdir(dirpath) if os.path.isfile(os.path.join(dirpath, filename)) and filename.split(".")[-1].lower() == "spold" ] elif os.path.isfile(dirpath): filelist = [dirpath] else: raise OSError("Can't understand path {}".format(dirpath)) if sys.version_info < (3, 0): use_mp = False if use_mp: with multiprocessing.Pool(processes=multiprocessing.cpu_count()) as pool: print("Extracting XML data from {} datasets".format(len(filelist))) results = [ pool.apply_async( Ecospold2DataExtractor.extract_activity, args=(dirpath, x, db_name), ) for x in filelist ] data = [p.get() for p in results] else: pbar = pyprind.ProgBar( len(filelist), title="Extracting ecospold2 files:", monitor=True ) data = [] for index, filename in enumerate(filelist): data.append(cls.extract_activity(dirpath, filename, db_name)) pbar.update(item_id=filename[:15]) print(pbar) if sys.version_info < (3, 0): print("Converting to unicode") return recursive_str_to_unicode(data) else: return data
def add_missing_cfs(self): new_flows = [] for method in self.data: for cf in method["exchanges"]: if "input" not in cf: cf["code"] = str(uuid.uuid4()) new_flows.append(cf) new_flows = recursive_str_to_unicode( dict([self._format_flow(cf) for cf in new_flows])) if new_flows: biosphere = Database(self.biosphere_name) biosphere_data = biosphere.load() biosphere_data.update(new_flows) biosphere.write(biosphere_data) print(u"Added {} new biosphere flows".format(len(new_flows)))
def _to_unicode(data): if sys.version_info < (3, 0): return recursive_str_to_unicode(data) else: return data
def save_as_bw2_dataset(self, db_name="MP default", unit=None, location=None, categories=[], save_aggregated_inventory=False): """Save simplified process to a database. Creates database if necessary; otherwise *adds* to existing database. Uses the ``unit`` and ``location`` of ``self.scaling_activities[0]``, if not otherwise provided. Assumes that one unit of the scaling activity is being produced. Args: * *db_name* (str): Name of Database * *unit* (str, optional): Unit of the simplified process * *location* (str, optional): Location of the simplified process * *categories* (list, optional): Category/ies of the scaling activity * *save_aggregated_inventory* (bool, optional): Saves in output minus input style by default (True), otherwise aggregated inventory of all inventories linked within the meta-process """ db = Database(db_name) if db_name not in databases: db.register() data = {} else: data = db.load() # GATHER DATASET INFORMATION self.key = (unicode(db_name), unicode(uuid.uuid4().urn[9:])) activity = self.scaling_activities[0] metadata = Database(activity[0]).load()[activity] # unit: if all scaling activities have the same unit, then set a unit, otherwise 'several' if self.scaling_activities != 1: units_set = set([ Database(sa[0]).load()[sa].get(u'unit', '') for sa in self.scaling_activities ]) if len(units_set) > 1: unit = 'several' # if several units, display nothing else: unit = units_set.pop() # EXCHANGES exchanges = [] if not save_aggregated_inventory: # save inventory as scaling activities - cuts # scaling activities for sa in self.scaling_activities: exchanges.append({ "amount": self.demand[self.mapping[sa]], "input": sa, "type": "biosphere" if sa[0] in (u"biosphere", u"biosphere3") else "technosphere", }) # cuts for cut in self.cuts: exchanges.append({ "amount": -cut[3], "input": cut[0], "type": "biosphere" if cut[0] in (u"biosphere", u"biosphere3") else "technosphere", }) else: # save aggregated inventory of all processes in chain exchanges = [{ "amount": exc[2], "input": exc[0], "type": "biosphere" if exc[0][0] in (u"biosphere", u"biosphere3") else "technosphere", } for exc in self.external_scaled_edges] # Production amount exchanges.append({ # Output value unless several outputs, then 1.0 "amount": self.outputs[0][2] if len(self.outputs) == 1 else 1.0, "input": self.key, "type": "production" }) # WRITE DATASET INFORMATION data[self.key] = { "name": self.name, "unit": unit or metadata.get(u'unit', ''), "location": location or metadata.get(u'location', ''), "categories": categories, "type": "process", "exchanges": exchanges, } # TODO: Include uncertainty from original databases. Can't just scale # uncertainty parameters. Maybe solution is to use "dummy" processes # like we want to do to separate inputs of same flow in any case. # data = db.relabel_data(data, db_name) db.write(recursive_str_to_unicode(data)) db.process()