def parse_values(self, el, session): if not self.parent: argmap = {} insertables = [] else: argmap, insertables = self.parent.parse_values(el, session) props = [prop for prop in self.props if prop.used] for prop in props: value = prop.xpath(el) if prop.many_remote and prop.used and value: _id = [el.attrib.values()[0]] _remote_ids = [] if len(set(value)) > 1: for raw_value in value: _remote_ids = _remote_ids + [ v for v in raw_value.split("#") if len(v) ] else: _remote_ids = [v for v in value[0].split("#") if len(v)] _ids = _id * len(_remote_ids) # Insert tuples in chunks of 400 elements max for chunk in chunks(list(zip(_ids, _remote_ids)), 400): _ins = prop.association_table.insert([{ f"{prop.domain.label}_id": _id, f"{prop.range.label}_id": _remote_id } for (_id, _remote_id) in chunk]) insertables.append(_ins) elif len(value) == 1 or len(set(value)) == 1: value = value[0] if isinstance(prop.range, CIMEnum): argmap[prop.key] = shorten_namespace(value, self.nsmap) else: try: t = prop.mapped_datatype if t == "Float": argmap[prop.key] = float(value) elif t == "Boolean": argmap[prop.key] = value.lower() == "true" elif t == "Integer": argmap[prop.key] = int(value) elif len([v for v in value.split("#") if v]) > 1: log.warning( f"Ambiguous data values for {self.name}:{prop.key}: {len(set(value))} unique values. " f"(Skipped)") # If reference doesn't resolve value is set to None (Validation # has to catch missing obligatory values) else: argmap[prop.key] = value.replace("#", "") except ValueError: argmap[prop.key] = value.replace("#", "") elif len(value) > 1: log.warning( f"Ambiguous data values for {self.name}:{prop.key}: {len(set(value))} unique values. " f"(Skipped)") # If reference doesn't resolve value is set to None (Validation # has to catch missing obligatory values) return argmap, insertables
def merge_sources(sources): d_ = defaultdict(dict) from lxml.etree import XPath xp = { "id": XPath("@rdf:ID", namespaces=get_nsmap(sources)), "about": XPath("@rdf:about", namespaces=get_nsmap(sources)) } for source in sources: for element in source.tree.getroot(): try: uuid = determine_uuid(element, xp) classname = shorten_namespace(element.tag, HDict(get_nsmap(sources))) if classname not in d_ or uuid not in d_[classname].keys(): d_[classname][uuid] = element else: [d_[classname][uuid].append(sub) for sub in element] # pylint: disable=expression-not-assigned except ValueError: log.warning(f"Skipped element during merge: {element}.") return d_
def test_get_class_names_cim(dummy_nsmap): assert shorten_namespace( frozenset([ '{http://iec.ch/TC57/2013/CIM-schema-cim16#}StaticVarCompensator' ]), dummy_nsmap) == ["StaticVarCompensator"]
def test_get_class_names_entsoe(dummy_nsmap): assert shorten_namespace( frozenset([ '{http://entsoe.eu/CIM/SchemaExtension/3/1#}EnergySchedulingType' ]), dummy_nsmap) == ["entsoe_EnergySchedulingType"]
def test_get_class_names_md(dummy_nsmap): assert shorten_namespace( frozenset( ["{http://iec.ch/TC57/61970-552/ModelDescription/1#}FullModel"]), dummy_nsmap) == ["md_FullModel"]
def merge_sources(sources, model_schema=None): """ Merge different sources of CIM datasets (usually the different profiles, but could also be multiple instances of the same profile when multiple datasets are merged via boundary datasets) :param sources: SourceInfo objects of the source files. :param model_schema: The schema used to deserialize the dataset. :return: A dictionary of the objects found in the dataset, keyed by classname and object uuid. """ uuid2name = dict() uuid2data = dict() classname_list = defaultdict(set) from cimpyorm.auxiliary import XPath xp = { "id": XPath("@rdf:ID", namespaces=get_nsmap(sources)), "about": XPath("@rdf:about", namespaces=get_nsmap(sources)) } for source in sources: for element in source.tree.getroot(): try: uuid = determine_uuid(element, xp) classname = shorten_namespace(element.tag, HDict(get_nsmap(sources))) # Set the classname only when UUID is attribute try: uuid = xp["id"](element)[0] if uuid in uuid2name and uuid2name[uuid] != classname: # If multiple objects of different class share the same uuid, raise an Error raise ReferenceError( f"uuid {uuid}={classname} already defined as {uuid2name[uuid]}" ) uuid2name[uuid] = classname except IndexError: pass classname_list[uuid] |= {classname} if uuid not in uuid2data: uuid2data[uuid] = element else: [uuid2data[uuid].append(sub) for sub in element] # pylint: disable=expression-not-assigned except ValueError: log.warning(f"Skipped element during merge: {element}.") # print warning in case uuid references use different classnames for uuid, name_set in classname_list.items(): if len(name_set) > 1: log.warning( f"Ambiguous classnames for {uuid} of type {uuid2name.get(uuid, None)} = {name_set}" ) # check that the class is the most specific one in the list if model_schema is not None: schema_classes = model_schema.get_classes() for uuid, classname in uuid2name.items(): try: cls = schema_classes[classname] except KeyError: log.info( f"Class {classname} is not included in schema. Objects of this class are not deserialized." ) else: try: if not all( issubclass(cls, schema_classes[_cname]) for _cname in classname_list[uuid]): raise ValueError( f"Class {classname} is not most specific of {classname_list[uuid]}." ) except KeyError as ex: raise ReferenceError( f"Malformed schema. Class-hierarchy-element is missing: {ex}." ) # transform the data into output structure d_ = defaultdict(dict) for uuid, classname in uuid2name.items(): d_[classname][uuid] = uuid2data[uuid] return d_
def insert(self, argmap, value): argmap[f"{self.key}_name"] = value.split(".")[-1] argmap[f"{self.key}_namespace"] = self.namespace.short argmap[f"{self.key}_enum_name"] = \ shorten_namespace(value, self.nsmap).split("_")[-1].split(".")[0] argmap[f"{self.key}_enum_namespace"] = self.namespace.short