def refresh_mviews(self): log.info("Refreshing the symlink materialized view.") State.get_db_session().execute(""" REFRESH MATERIALIZED VIEW resolved_symlinks WITH DATA; """) log.info("Symlink materialized view refresh complete.")
def __exit__(self, exception_type, exception_value, traceback): if exception_value: log.error('Rolling back transaction.') State.get_db_session().rollback() else: log.info('Committing transaction.') State.get_db_session().commit() log.info('Transaction committed.') return False
def _clear_existing(self): dml = update(RpmDetail).where( RpmDetail.system_id == self.system.system_id).values( file_exists=False) result = State.get_db_session().execute(dml) State.get_db_session().flush() State.get_db_session().commit() self.analyze_database() log.info(f"Cleared file_exists attribute for {result.rowcount} rows.")
def _mark_as_user_data(self, path_spec): """ path_spec can be a file or a directory. We want to match a regex like: path_spec + r'|' + pathspec + r'/.*' """ # db query to find matches and populate list of files to mark system: System = State.get_system() session: Session = State.get_db_session() base: List[FileDetail] = session.query(FileDetail).filter( (FileDetail.system_id == system.system_id) & (FileDetail.file_location.like(path_spec))).all() dirs: List[FileDetail] = session.query(FileDetail).filter( (FileDetail.system_id == system.system_id) & (FileDetail.file_location.like(path_spec + '/%'))).all() files = base + dirs for f in files: # update file_origin for each file. # ...update file_origin for each file. update_origin = update(FileDetail).values( origin=FileOrigin.UserData.name, ).where( FileDetail.file_detail_id == f.file_detail_id) result = session.execute(update_origin) session.flush() session.commit() self._log('Marked as UserData ' + f.file_location)
def _mark_as_content(self, path_spec): """ Link file to package and mark origin as package content. path_spec can be a file or a directory. We want to match a regex like: path_spec + r'|' + pathspec + r'/.*' """ # db query to find matches and populate list of files to mark system: System = State.get_system() session: Session = State.get_db_session() base: List[FileDetail] = session.query(FileDetail).filter( (FileDetail.system_id == system.system_id) & (FileDetail.file_location.like(path_spec))).all() dirs: List[FileDetail] = session.query(FileDetail).filter( (FileDetail.system_id == system.system_id) & (FileDetail.file_location.like(path_spec + '/%'))).all() files = base + dirs for f in files: # ...update file_origin for each file. update_origin = update(FileDetail).values( origin=FileOrigin.PackageContent.name, rpm_info_id=self._rpm_info.rpm_info_id).where( FileDetail.file_detail_id == f.file_detail_id) result = session.execute(update_origin) session.flush() session.commit() self._log('Linked to package ' + self._package_name + ': ' + f.file_location) self._log('Marked as PackageContent ' + f.file_location)
def _get_files_to_create( self, origins: Tuple[str] = None, file_prefix: str = None, ) -> Iterable[Tuple[FileDetail, FileStorage]]: session = State.get_db_session() where_clause = (System.system_id == self.system.system_id) & ( FileDetail.file_type == "F") if origins: where_clause &= (FileDetail.origin.in_(origins)) if file_prefix: where_clause &= (FileDetail.file_location.startswith(file_prefix)) files = session.query( FileDetail, FileStorage, ).join(System).join(FileDetailStorageLink).join(FileStorage).filter( where_clause, ).order_by(FileDetail.file_location).yield_per(10) for file_pair in files: yield file_pair
def process_modified_files(self, modified_rpm_files: ResultProxy) -> int: self.file_difference.clear_changed_flags() log.info("Flagging modified files") rpm: RpmInfo count = 0 for rpm_file in modified_rpm_files: rpm_file.file_changed = True State.get_db_session().add(rpm_file) count += 1 log.info(f"Flagged {count} files as modified.") State.get_db_session().flush() return count
def _flag_existing(self): update_rpm = update(RpmDetail).values(file_exists=True, ).where( (RpmDetail.system_id == self.system_id) & (RpmDetail.rpm_detail_id == RpmFileDetailLink.rpm_detail_id) & (RpmFileDetailLink.file_detail_id != None)) update_result = State.get_db_session().execute(update_rpm) log.info(f"{update_result.rowcount} files flagged as existing.") update_file = update(FileDetail).values( origin=FileOrigin.PackageInstalled.name, ).where((FileDetail.system_id == self.system_id) & (FileDetail.file_detail_id == RpmFileDetailLink.file_detail_id) & (RpmFileDetailLink.rpm_detail_id != None)) update_result = State.get_db_session().execute(update_file) log.info( f"{update_result.rowcount} files flagged as PackageInstalled.")
def _get_user_directories_to_create(self): session = State.get_db_session() directories = session.query(FileDetail).join(System).filter( (System.system_id == self.system.system_id) & (FileDetail.file_type == "D") & (FileDetail.origin.in_(get_user_content_names(), ))).order_by( FileDetail.file_location).yield_per(10) for directory in directories: yield directory
def test_path_modified(test_database): session = State.get_db_session() session.execute(""" update iac.rpm_detail set file_changed = TRUE where file_location = '/etc/profile' and system_id = 1; """) assert heuristic_utils.path_modified("/etc/profile") session.rollback()
def unknown_count() -> int: """ Count the number of files where FileOrigin is UnknownSource for the current system. """ system: System = State.get_system() session: Session = State.get_db_session() return session.query(FileDetail).filter( (FileDetail.system_id == system.system_id) & (FileDetail.origin == FileOrigin.UnknownSource.name)).count()
def _get_detail(self, rpm_detail: RpmDetail = None, symlink: ResolvedSymlinks = None, custom_path: str = None): if not symlink: path = custom_path or rpm_detail.file_location else: path = symlink.resolved_path return State.get_db_session().query(FileDetail).filter( FileDetail.file_location == path).one_or_none()
def path_exists(path_spec: str) -> bool: """ Return boolean if the path exists in the source file system. """ system: System = State.get_system() session: Session = State.get_db_session() file_detail: FileDetail = session.query(FileDetail).filter( (FileDetail.system_id == system.system_id) & (FileDetail.file_location == path_spec)).one_or_none() return file_detail is not None
def _run_update(self): rd: RpmDetail = aliased(RpmDetail) rdu: RpmDetail = aliased(RpmDetail) fd: FileDetail = aliased(FileDetail) lk: RpmFileDetailLink = aliased(RpmFileDetailLink) query = State.get_db_session().query( rd.rpm_detail_id, fd.file_detail_id).join( fd, (rd.system_id == fd.system_id) & (rd.file_location == fd.file_location)).outerjoin( lk, (lk.file_detail_id == fd.file_detail_id) & (lk.rpm_detail_id == rd.rpm_detail_id)).filter( rd.system_id == self.system.system_id, lk.rpm_file_detail_link_id == None) insert_dml = insert(RpmFileDetailLink).from_select([ rd.rpm_detail_id, fd.file_detail_id, ], query) result = State.get_db_session().execute(insert_dml) log.debug(f"{result.rowcount} files linked.") State.get_db_session().flush() State.get_db_session().commit() self.analyze_database() return result.rowcount
def _run_link_match_update(self): rd: RpmDetail = aliased(RpmDetail) fd: FileDetail = aliased(FileDetail) lk: RpmFileDetailLink = aliased(RpmFileDetailLink) query = State.get_db_session().query( rd.rpm_detail_id, fd.file_detail_id, ).join(ResolvedSymlinks, (ResolvedSymlinks.system_id == rd.system_id) & (ResolvedSymlinks.file_location == rd.file_location)).join( fd, (ResolvedSymlinks.system_id == fd.system_id) & (fd.file_location == ResolvedSymlinks.resolved_location)).outerjoin( lk, (lk.file_detail_id == fd.file_detail_id) & (lk.rpm_detail_id == rd.rpm_detail_id)).filter( rd.system_id == self.system_id, lk.rpm_file_detail_link_id == None) insert_dml = insert(RpmFileDetailLink).from_select([ rd.rpm_detail_id, fd.file_detail_id, ], query) result = State.get_db_session().execute(insert_dml) State.get_db_session().flush() State.get_db_session().commit() self.analyze_database() return result.rowcount
def _map_rpm_to_file(self, rpm_detail: RpmDetail = None): # Get by filename # happy path, no symlinks file_detail = self._get_detail(rpm_detail) if file_detail: return file_detail # try for a direct symlink match symlink = State.get_db_session().query(ResolvedSymlinks).filter( ResolvedSymlinks.file_location == rpm_detail.file_location).one_or_none() if symlink: file_detail = self._get_detail(rpm_detail=rpm_detail, symlink=symlink) if file_detail: return file_detail # no easy match here. brute force by directory directories = State.get_db_session().query(ResolvedSymlinks).filter( ResolvedSymlinks.target_type == 'D').all() for direct in directories: if rpm_detail.file_location.startswith(direct.file_location): path = ( f'{direct.resolved_location}' f'{rpm_detail.file_location[len(direct.file_location):]}') file_detail = self._get_detail( rpm_detail=rpm_detail, custom_path=path, ) if file_detail: return file_detail # TODO: What to do here? :) raise ValueError('cant find file details for rpm_id ' f'{rpm_detail.rpm_info}. {rpm_detail.file_location}')
def origin_unknown(path_spec: str) -> bool: """ Return True if FileOrigin is UnknownSource, else False. """ system: System = State.get_system() session: Session = State.get_db_session() file_detail: FileDetail = session.query(FileDetail).filter( (FileDetail.system_id == system.system_id) & (FileDetail.file_location == path_spec) & (FileDetail.origin == FileOrigin.UnknownSource.name)).one_or_none() return file_detail is not None
def store_system_info(self, **kwargs): try: system = State.get_db_session().query(System).filter( System.name == kwargs["name"]).one() except NoResultFound: system = System(name=kwargs["name"]) system.hostname = kwargs.get("hostname", kwargs["name"]) system.username = kwargs.get("username", os.getlogin()) system.key_file = kwargs.get("key_file") system.use_tty = kwargs.get("use_tty", False) system.port = kwargs.get("port", 22) system.remote_name = kwargs["remote_hostname"], system.kernel_version = kwargs["kernel_version"], system.os_distro = kwargs.get("distro"), system.os_major_ver = kwargs.get("major"), system.os_minor_ver = kwargs.get("minor"), system.os_revision = kwargs.get("revision"), State.get_db_session().add(system) State.get_db_session().flush() return system
def fetch_file_detail(rpm_detail: RpmDetail) -> FileDetail: db_session = State.get_db_session() system_id = State.get_system().system_id query = db_session.query(FileDetail).join(RpmFileDetailLink).join( RpmDetail).filter((RpmDetail.system_id == system_id) & ( RpmDetail.rpm_detail_id == rpm_detail.rpm_detail_id)) file_details = query.all() if len(file_details) > 0: return file_details[0] else: raise ValueError("FileDetail cannot be loaded.")
def package_installed(package_name: str) -> List[RpmInfo]: """ Check if package is installed on the source system. Mangle package name as needed for rpm or scl naming conventions. Check the rpm_details table for a match. Return None for no match, or an rpm_details object if found. """ system: System = State.get_system() session: Session = State.get_db_session() rpm_info: List[RpmInfo] = session.query(RpmInfo).filter( (RpmInfo.system_id == system.system_id) & (RpmInfo.name == package_name)).all() return rpm_info
def get_directory_contents(path_spec: str) -> List[FileDetail]: system: System = State.get_system() session: Session = State.get_db_session() lookup = path_spec if not lookup.endswith(os.path.sep): lookup += os.path.sep lookup += "%" file_details: List[FileDetail] = session.query(FileDetail).filter( (FileDetail.system_id == system.system_id) & (FileDetail.file_location.like(lookup)) & (func.strpos( func.substr(FileDetail.file_location, len(lookup)), os.path.sep, ) == 0)).all() return file_details
def path_modified(path_spec: str) -> bool: """ Check if a source system file was modified after package installation. path_spec is a full pathname on the source file system. Look it up in file_details table. If origin is PKG_MODIFIED, return true. """ system: System = State.get_system() session: Session = State.get_db_session() rpm_detail: RpmDetail = session.query(RpmDetail).filter( (RpmDetail.system_id == system.system_id) & (RpmDetail.file_location == path_spec)).one_or_none() if not rpm_detail: raise RpmFileNotFound("Unable to locate file in RpmDetails") return rpm_detail.file_changed
def _run(self): session = State.get_db_session() for path in ( "/etc/yum", "/etc/yum.repos.d", ): for record in hutils.get_directory_contents(path_spec=path): log.debug( f"Yum configuration file name {record.file_location}.") if record.origin == FileOrigin.UnknownSource.name: log.debug(f"Flagging {record.file_location} as UserData.") record.origin = FileOrigin.UserData.name session.add(record) session.commit()
def _add_packages(self): log.debug("entering _add_packages") session = State.get_db_session() # skip fake gpg-pubkey package # https://unix.stackexchange.com/questions/190203/what-are-gpg-pubkey-packages packages = session.query(RpmInfo).join(System).filter( (System.system_id == self.system.system_id) & not_(RpmInfo.name == "gpg-pubkey") # skip fake package ).order_by(RpmInfo.installation_date).yield_per(10) for package in packages: task = OrderedDict( name=f"Install {package.name}", package=self._get_install(package.name), ) self._yaml.append(task) log.debug("exiting _add_packages")
def main(): sys = utils.os.GetArguments().parse().name print('Path to analyze:') path = input() print('OK. Examining ' + path + ' on ' + sys) system: System = State.get_system(name=sys) session: Session = State.get_db_session() lookup = path if lookup.endswith(os.path.sep): lookup += "%" file_details: List[FileDetail] = session.query( FileDetail ).filter( (FileDetail.system_id == system.system_id) & (FileDetail.file_location.like(lookup)) ).all() for f in file_details: print(f.file_location + ' is ' + f.origin)
def _run_directory_match_update(self): rd: RpmDetail = aliased(RpmDetail) fd: FileDetail = aliased(FileDetail) lk: RpmFileDetailLink = aliased(RpmFileDetailLink) query = State.get_db_session().query( rd.rpm_detail_id, fd.file_detail_id).join( ResolvedSymlinks, (rd.system_id == ResolvedSymlinks.system_id) & (ResolvedSymlinks.target_type == "D") & (func.length(rd.file_location) > func.length( ResolvedSymlinks.file_location)) & (ResolvedSymlinks.file_location == func.substr( rd.file_location, 1, func.length(ResolvedSymlinks.file_location)))).join( fd, (fd.system_id == ResolvedSymlinks.system_id) & (fd.file_location == (ResolvedSymlinks.resolved_location + func.substr( rd.file_location, func.length(ResolvedSymlinks.file_location) + 1))) ).outerjoin(lk, (lk.file_detail_id == fd.file_detail_id) & (lk.rpm_detail_id == rd.rpm_detail_id)).filter( (rd.system_id == self.system_id) & (lk.rpm_file_detail_link_id == None) & (func.coalesce(fd.file_type, "") != "S") ).distinct() insert_dml = insert(RpmFileDetailLink).from_select([ rd.rpm_detail_id, fd.file_detail_id, ], query) result = State.get_db_session().execute(insert_dml) State.get_db_session().flush() State.get_db_session().commit() self.analyze_database() return result.rowcount
def __set_session(self): """ Get session from global context """ self._db_session = State.get_db_session()
def store_packages(self, **kwargs): pkg_data = kwargs.get("pkg_data") rpms = {} files = 0 log.info("Storing packages...") # Remove any existing records fdl = alias(RpmFileDetailLink) rd = alias(RpmDetail) delete_fdl = delete(fdl).where( exists( select( [1]).where(rd.c.system_id == self.system.system_id).where( rd.c.rpm_detail_id == fdl.c.rpm_detail_id))) log.info( f"Pruned {State.get_db_session().execute(delete_fdl).rowcount} links." ) system_rpm_detail = State.get_db_session().query(RpmDetail).filter( RpmDetail.system == self.system) system_rpm_detail.delete() log.info("Pruned existing RpmDetail records.") system_rpm_info = State.get_db_session().query(RpmInfo).filter( RpmInfo.system == self.system) system_rpm_info.delete() log.info("Pruned existing RpmInfo records.") State.get_db_session().flush() State.get_db_session().commit() fieldnames = ( 'package_name', 'version', 'release', 'architecture', 'installation_tid', 'installation_date', 'file_name', 'file_size', 'digest', 'file_class', 'flag', 'source_rpm', 'rpm_name', ) objects = [] for row in self._convert_results(file_iter=pkg_data, fieldnames=fieldnames): file = { key: value if value != '(none)' else None for key, value in row.items() } rpm_key = '+'.join([ file['package_name'] or 'none', file['version'] or 'none', file['architecture'] or 'none', ]) rpm = rpms.get(rpm_key, None) if not rpm: try: installation_tid = int(file['installation_tid'] or ''.strip()) except ValueError: installation_tid = None try: installation_date = parser.parse(file['installation_date']) except ValueError: installation_date = None rpm = RpmInfo( name=file['package_name'], version=file['version'], release=file['release'], filename=file['rpm_name'], architecture=file['architecture'], installation_tid=installation_tid, installation_date=installation_date, system_id=self.system.system_id, ) State.get_db_session().add(rpm) State.get_db_session().flush() rpms[rpm_key] = rpm try: file_size = int(file['file_size'] or ''.strip()) except ValueError: file_size = None objects.append({ "rpm_info_id": rpm.rpm_info_id, "file_location": file['file_name'], "file_size": file_size, "digest": file['digest'] or None, "file_info": file['file_class'], "file_flag": file['flag'], "system_id": self.system.system_id, "file_changed": None, }) files += 1 if files % 50000 == 0: log.info(f"{files}") State.get_db_session().bulk_insert_mappings(RpmDetail, objects) State.get_db_session().flush() objects.clear() if objects: State.get_db_session().bulk_insert_mappings(RpmDetail, objects) objects.clear() State.get_db_session().flush() State.get_db_session().commit() log.info('..done')
def analyze_database(self): State.get_db_session().execute("ANALYZE;")
def store_files(self, **kwargs): file_iter = kwargs.get("file_iter") files = 0 log.info("Storing files...") # Remove any existing records fdl = alias(RpmFileDetailLink) fd = alias(FileDetail) delete_fdl = delete(fdl).where( exists( select( [1]).where(fd.c.system_id == self.system.system_id).where( fd.c.file_detail_id == fdl.c.file_detail_id))) log.info( f"Pruned {State.get_db_session().execute(delete_fdl).rowcount} links." ) # delete FileStorage links FileDifference(system=self.system).clear_system_file_storage() system_files = State.get_db_session().query(FileDetail).filter( FileDetail.system == self.system) system_files.delete() log.info("Pruned existing FileDetails.") State.get_db_session().flush() State.get_db_session().commit() objects = [] for file_dict in self._convert_results(file_iter=file_iter): src = FileOrigin.UnknownSource file_path = file_dict.get("path", "") if (file_path.startswith("/dev/") or file_path.startswith("/tmp/") or file_path.startswith("/proc/") or (file_path.startswith("/var/log/") and file_path.endswith(".log"))): src = FileOrigin.EphemeralContent file_rec = { "system_id": self.system.system_id, "file_location": file_path or None, "file_type": file_dict['type'], "owner_uid": file_dict['uid'], "owner_gid": file_dict['gid'], "owner_name": file_dict['user'] or None, "owner_group": file_dict['group'] or None, "file_mode": file_dict['mode'] or None, "file_target": file_dict['target'] or None, "target_type": file_dict['target_type'] or None, "md5_digest": file_dict['md5'] or None, "sha256_digest": file_dict['sha256'] or None, "file_info": file_dict['info'] or None, "file_perm_mode": file_dict['perm'] or None, "origin": src.name, } objects.append(file_rec) files += 1 if files % 50000 == 0: log.info(f"{files}") State.get_db_session().bulk_insert_mappings( FileDetail, objects) State.get_db_session().flush() objects.clear() if objects: State.get_db_session().bulk_insert_mappings(FileDetail, objects) objects.clear() State.get_db_session().flush() State.get_db_session().commit() log.info('..done')