def lint(session, model): events = [] for CIM_class in tqdm(model.schema.class_hierarchy("dfs"), desc=f"Linting...", leave=True): query = session.query(CIM_class.class_) for prop in CIM_class.props: if not prop.optional and prop.used: total = query.count() objects = query.filter_by(**{prop.full_label: None}).count() if objects: events.append({ "Class": CIM_class.label, "Property": prop.full_label, "Total": total, "Type": "Missing", "Violations": objects, "Unique": None }) log.debug( f"Missing mandatory property {prop.full_label} for " f"{objects} instances of type {CIM_class.label}.") if prop.range: try: if isinstance(prop.range, CIMClass): col = getattr(CIM_class.class_, prop.full_label + "_id") validity = session.query(col).except_( session.query(prop.range.class_.id)) elif isinstance(prop.range, CIMEnum): col = getattr(CIM_class.class_, prop.full_label + "_name") validity = session.query(col).except_( session.query(CIMEnumValue.name)) except AttributeError: log.warning( f"Couldn't determine validity of {prop.full_label} on " f"{CIM_class.label}. The linter does not yet support " f"many-to-many relationships.") # ToDo: Association table errors are currently not caught else: count = validity.count() # query.except() returns (None) if right hand side table is empty if count > 1 or (count == 1 and tuple( validity.one())[0] is not None): non_unique = query.filter( col.in_(val[0] for val in validity.all())).count() events.append({ "Class": CIM_class.label, "Property": prop.full_label, "Total": total, "Type": "Invalid", "Violations": non_unique, "Unique": count }) return pivot_table(DataFrame(events), values=["Violations", "Unique"], index=["Type", "Class", "Total", "Property"])
def _merge_elements(self): for Category, CatElements in self.Elements.items(): log.debug(f"Merging {Category}.") for NodeName, NodeElements in CatElements.items(): CatElements[NodeName] = self.Element_classes[Category]( merge_descriptions([e.description for e in NodeElements])) self.Elements[Category] = dict(CatElements)
def generate_relationship(self, nsmap=None): var, query_base = self.name_query() attrs = {} Map = {} log.debug(f"Generating relationship for {var} on {self.name}") if self.many_remote: if self.inverse: br = self.inverse.label if self.namespace == "cim" else self.namespace + "_" + self.inverse.label tbl = self.generate_association_table() self.association_table = tbl attrs[var] = relationship(self.range.label, secondary=tbl, backref=br) else: tbl = self.generate_association_table() attrs[var] = relationship(self.range.label, secondary=tbl) else: attrs[f"{var}_id"] = Column(String(50), ForeignKey(f"{self.range.label}.id"), name=f"{var}_id") if self.inverse: br = self.inverse.label if self.namespace == "cim" else self.namespace+"_"+self.inverse.label attrs[var] = relationship(self.range.label, foreign_keys=attrs[f"{var}_id"], backref=br) else: attrs[var] = relationship(self.range.label, foreign_keys=attrs[f"{var}_id"]) self.key = f"{var}_id" self.xpath = XPath(query_base + "/@rdf:resource", namespaces=nsmap) class_ = self.cls.class_ for attr, attr_value in attrs.items(): setattr(class_, attr, attr_value) return Map
def generate(self, nsmap): attrs = OrderedDict() dt = self.mapped_datatype if self.used: if isinstance(self.range, CIMEnum): var, query_base = self.name_query() attrs[f"{var}_name"] = Column(String(120), ForeignKey(CIMEnumValue.name), name=f"{var}_name") attrs[var] = relationship(CIMEnumValue, foreign_keys=attrs[f"{var}_name"]) self.key = f"{var}_name" self.xpath = XPath(query_base + "/@rdf:resource", namespaces=nsmap) elif self.range: self.generate_relationship(nsmap) elif not self.range: var, query_base = self.name_query() log.debug(f"Generating property for {var} on {self.name}") self.key = var self.xpath = XPath(query_base + "/text()", namespaces=nsmap) if dt: if dt == "String": attrs[var] = Column(String(50), name=f"{var}") elif dt in ("Float", "Decimal"): attrs[var] = Column(Float, name=f"{var}") elif dt == "Integer": attrs[var] = Column(Integer, name=f"{var}") elif dt == "Boolean": attrs[var] = Column(Boolean, name=f"{var}") else: attrs[var] = Column(String(30), name=f"{var}") else: # Fallback to parsing as String(50) attrs[var] = Column(String(50), name=f"{var}") for attr, attr_value in attrs.items(): setattr(self.cls.class_, attr, attr_value)
def test_merged_nsmaps(path): expected = {} for file in parseable_files(path): for key, value in et.parse(file).getroot().nsmap.items(): expected[key] = value tree = merge(path) log.info( f"{len(expected.keys())} entries expected in nsmap. {len(tree.getroot().nsmap.keys())} found" ) log.debug(f"Expected: {expected.keys()}") log.debug(f"Found: {tree.getroot().nsmap.keys()}") assert tree.getroot().nsmap == expected
def parse(dataset: Union[str, Path], backend: Engine = SQLite(), silence_tqdm: bool = False) -> Tuple[Session, Namespace]: """ Parse a database into a database backend and yield a database session to start querying on with the classes defined in the model namespace. Afterwards, the database can be queried using SQLAlchemy query syntax, providing the CIM classes contained in the :class:`~argparse.Namespace` return value. :param dataset: Path to the cim snapshot. :param backend: Database backend to be used (defaults to a SQLite on-disk database in the dataset location). :param silence_tqdm: Silence tqdm progress bars :return: :class:`sqlalchemy.orm.session.Session`, :class:`argparse.Namespace` """ from cimpyorm import Parser backend.update_path(dataset) # Reset database backend.drop() backend.reset() # And connect engine, session = backend.connect() files = Parser.get_files(dataset) from cimpyorm.Model.Source import SourceInfo sources = frozenset([SourceInfo(file) for file in files]) session.add_all(sources) session.commit() cim_version = Parser.get_cim_version(sources) schema = Schema(version=cim_version, session=session) backend.generate_tables(schema) log.info(f"Parsing data.") entries = Parser.merge_sources(sources) elements = Parser.parse_entries(entries, schema, silence_tqdm=silence_tqdm) log.info(f"Passing {len(elements):,} objects to database.") session.bulk_save_objects(elements) session.flush() log.debug(f"Start commit.") session.commit() log.debug(f"Finished commit.") if engine.dialect.name == "mysql": log.debug("Enabling foreign key checks in mysql database.") session.execute("SET foreign_key_checks='ON'") log.info("Exit.") model = schema.model return session, model
def init_type(self, base): """ Initialize ORM type using the CIMClass object :return: None """ log.debug(f"Initializing class {self.name}.") attrs = OrderedDict() attrs["__tablename__"] = self.name self.Map = dict() if self.parent: attrs["id"] = Column(String(50), ForeignKey(f"{self.parent.name}.id", ondelete="CASCADE"), primary_key=True) log.debug( f"Created id column on {self.name} with FK on {self.parent.name}." ) attrs["__mapper_args__"] = {"polymorphic_identity": self.name} else: # Base class attrs["type_"] = Column(String(50)) attrs["_source_id"] = Column(Integer, ForeignKey("SourceInfo.id")) attrs["_source"] = relationship("SourceInfo", foreign_keys=attrs["_source_id"]) attrs["id"] = Column(String(50), primary_key=True) log.debug(f"Created id column on {self.name} with no inheritance.") attrs["__mapper_args__"] = { "polymorphic_on": attrs["type_"], "polymorphic_identity": self.name } attrs["_schema_class"] = self if self.parent: self.class_ = type(self.name, (self.parent.class_, ), attrs) else: # Base class self.class_ = type(self.name, ( Parseable, base, ), attrs) log.debug(f"Defined class {self.name}.")
def __init__(self, session=None, version: str = "16"): """ Initialize a Backend object, containing information about the schema elements :param file_or_tree: The schema file or a parsed root """ self.g = None if not session: backend = InMemory() backend.reset() session = backend.session rdfs_path = find_rdfs_path(version) if not rdfs_path: raise FileNotFoundError( "Failed to find schema file. Please provide one.") tree = merge(rdfs_path) log.info(f"Dynamic code generation.") if session.query(SchemaElement).count(): # A schema is already present, so just load it instead of recreating self.session = session self.Element_classes = { c.__name__: c for c in [ CIMPackage, CIMClass, CIMProp, CIMDT, CIMEnum, CIMEnumValue, CIMDTUnit, CIMDTValue, CIMDTMultiplier, CIMDTDenominatorUnit, CIMDTDenominatorMultiplier ] } self.Elements = { c.__name__: { cim_class.name: cim_class for cim_class in session.query(c).all() } for c in self.Element_classes.values() } else: self.session = session if isinstance(tree, type(et.ElementTree())): self.file = None self.root = tree.getroot() else: self.file = tree self.root = et.parse(tree).getroot() self.Element_classes = { c.__name__: c for c in [ CIMPackage, CIMClass, CIMProp, CIMDT, CIMEnum, CIMEnumValue, CIMDTUnit, CIMDTValue, CIMDTMultiplier, CIMDTDenominatorUnit, CIMDTDenominatorMultiplier ] } self.Elements = { c.__name__: defaultdict(list) for c in self.Element_classes.values() } self._init_parser() self._generate() for _, Cat_Elements in self.Elements.items(): self.session.add_all(list(Cat_Elements.values())) self.session.commit() log.debug(f"Backend generated") session.add(SchemaInfo(self.root.nsmap)) self.init_model(session)
def _generate(self): xp_type_res = XPath(f"rdf:type/@rdf:resource", namespaces=self.root.nsmap) xp_stype_res = XPath(f"cims:stereotype/@rdf:resource", namespaces=self.root.nsmap) xp_stype_txt = XPath(f"cims:stereotype/text()", namespaces=self.root.nsmap) postponed = [] for element in self.root: type_res = xp_type_res(element) stype_res = xp_stype_res(element) stype_txt = xp_stype_txt(element) if Schema._isclass(type_res): if Schema._isenum(stype_res): obj = CIMEnum(element) self.Elements["CIMEnum"][obj.name].append(obj) elif Schema._isdt(stype_txt): obj = CIMDT(element) self.Elements["CIMDT"][obj.name].append(obj) else: obj = CIMClass(element) self.Elements["CIMClass"][obj.name].append(obj) elif Schema._isprop(type_res): postponed.append(element) elif Schema._ispackage(type_res): obj = CIMPackage(element) self.Elements["CIMPackage"][obj.name].append(obj) elif type_res: postponed.append(element) else: obj = SchemaElement(element) log.warning(f"Element skipped: {obj.name}") for element in postponed: type_res = xp_type_res(element) if Schema._isprop(type_res): obj = CIMProp(element) if obj._domain in self.Elements["CIMDT"].keys(): if obj.name.endswith(".unit"): obj = CIMDTUnit(element) self.Elements["CIMDTUnit"][obj.name].append(obj) elif obj.name.endswith(".value"): obj = CIMDTValue(element) self.Elements["CIMDTValue"][obj.name].append(obj) elif obj.name.endswith(".multiplier"): obj = CIMDTMultiplier(element) self.Elements["CIMDTMultiplier"][obj.name].append(obj) elif obj.name.endswith(".denominatorUnit"): obj = CIMDTDenominatorUnit(element) self.Elements["CIMDTDenominatorUnit"][obj.name].append( obj) elif obj.name.endswith(".denominatorMultiplier"): obj = CIMDTDenominatorMultiplier(element) self.Elements["CIMDTDenominatorMultiplier"][ obj.name].append(obj) else: obj = CIMDTProperty(element) self.Elements["CIMDTProperty"][obj.name].append(obj) else: self.Elements["CIMProp"][obj.name].append(obj) continue obj = CIMEnumValue(element) if obj._enum_name and obj._enum_name in self.Elements[ "CIMEnum"].keys(): self.Elements["CIMEnumValue"][obj.name].append(obj) else: log.debug(f"Failed to identify purpose for {type_res}") self._merge_elements() for key, value in self.Elements.items(): if value: log.debug(f"Generated {len(value)} {key}.")
def session(self): session = super().session log.debug("Deferring foreign key checks in mysql database.") session.execute("SET foreign_key_checks='OFF'") return session