Exemplo n.º 1
0
    def refresh_mviews(self):
        log.info("Refreshing the symlink materialized view.")

        State.get_db_session().execute("""
REFRESH MATERIALIZED VIEW resolved_symlinks WITH DATA;
            """)

        log.info("Symlink materialized view refresh complete.")
Exemplo n.º 2
0
    def __exit__(self, exception_type, exception_value, traceback):

        if exception_value:
            log.error('Rolling back transaction.')
            State.get_db_session().rollback()
        else:
            log.info('Committing transaction.')
            State.get_db_session().commit()
            log.info('Transaction committed.')

        return False
Exemplo n.º 3
0
    def _clear_existing(self):
        dml = update(RpmDetail).where(
            RpmDetail.system_id == self.system.system_id).values(
                file_exists=False)

        result = State.get_db_session().execute(dml)

        State.get_db_session().flush()
        State.get_db_session().commit()
        self.analyze_database()

        log.info(f"Cleared file_exists attribute for {result.rowcount} rows.")
Exemplo n.º 4
0
    def _mark_as_user_data(self, path_spec):
        """
        path_spec can be a file or a directory. We want to match a regex like:
        path_spec + r'|' + pathspec + r'/.*'
        """

        # db query to find matches and populate list of files to mark
        system: System = State.get_system()
        session: Session = State.get_db_session()

        base: List[FileDetail] = session.query(FileDetail).filter(
            (FileDetail.system_id == system.system_id)
            & (FileDetail.file_location.like(path_spec))).all()

        dirs: List[FileDetail] = session.query(FileDetail).filter(
            (FileDetail.system_id == system.system_id)
            & (FileDetail.file_location.like(path_spec + '/%'))).all()

        files = base + dirs

        for f in files:
            # update file_origin for each file.
            # ...update file_origin for each file.
            update_origin = update(FileDetail).values(
                origin=FileOrigin.UserData.name, ).where(
                    FileDetail.file_detail_id == f.file_detail_id)
            result = session.execute(update_origin)
            session.flush()
            session.commit()
            self._log('Marked as UserData ' + f.file_location)
Exemplo n.º 5
0
    def _mark_as_content(self, path_spec):
        """
        Link file to package and mark origin as package content.

        path_spec can be a file or a directory. We want to match a regex like:
        path_spec + r'|' + pathspec + r'/.*'
        """

        # db query to find matches and populate list of files to mark
        system: System = State.get_system()
        session: Session = State.get_db_session()

        base: List[FileDetail] = session.query(FileDetail).filter(
            (FileDetail.system_id == system.system_id)
            & (FileDetail.file_location.like(path_spec))).all()

        dirs: List[FileDetail] = session.query(FileDetail).filter(
            (FileDetail.system_id == system.system_id)
            & (FileDetail.file_location.like(path_spec + '/%'))).all()

        files = base + dirs

        for f in files:
            # ...update file_origin for each file.
            update_origin = update(FileDetail).values(
                origin=FileOrigin.PackageContent.name,
                rpm_info_id=self._rpm_info.rpm_info_id).where(
                    FileDetail.file_detail_id == f.file_detail_id)
            result = session.execute(update_origin)
            session.flush()
            session.commit()
            self._log('Linked to package ' + self._package_name + ': ' +
                      f.file_location)
            self._log('Marked as PackageContent ' + f.file_location)
Exemplo n.º 6
0
    def _get_files_to_create(
        self,
        origins: Tuple[str] = None,
        file_prefix: str = None,
    ) -> Iterable[Tuple[FileDetail, FileStorage]]:

        session = State.get_db_session()

        where_clause = (System.system_id == self.system.system_id) & (
            FileDetail.file_type == "F")

        if origins:
            where_clause &= (FileDetail.origin.in_(origins))

        if file_prefix:
            where_clause &= (FileDetail.file_location.startswith(file_prefix))

        files = session.query(
            FileDetail,
            FileStorage,
        ).join(System).join(FileDetailStorageLink).join(FileStorage).filter(
            where_clause, ).order_by(FileDetail.file_location).yield_per(10)

        for file_pair in files:
            yield file_pair
Exemplo n.º 7
0
    def process_modified_files(self, modified_rpm_files: ResultProxy) -> int:
        self.file_difference.clear_changed_flags()

        log.info("Flagging modified files")

        rpm: RpmInfo

        count = 0

        for rpm_file in modified_rpm_files:
            rpm_file.file_changed = True
            State.get_db_session().add(rpm_file)
            count += 1

        log.info(f"Flagged {count} files as modified.")

        State.get_db_session().flush()

        return count
Exemplo n.º 8
0
    def _flag_existing(self):

        update_rpm = update(RpmDetail).values(file_exists=True, ).where(
            (RpmDetail.system_id == self.system_id)
            & (RpmDetail.rpm_detail_id == RpmFileDetailLink.rpm_detail_id)
            & (RpmFileDetailLink.file_detail_id != None))

        update_result = State.get_db_session().execute(update_rpm)
        log.info(f"{update_result.rowcount} files flagged as existing.")

        update_file = update(FileDetail).values(
            origin=FileOrigin.PackageInstalled.name,
        ).where((FileDetail.system_id == self.system_id) &
                (FileDetail.file_detail_id == RpmFileDetailLink.file_detail_id)
                & (RpmFileDetailLink.rpm_detail_id != None))

        update_result = State.get_db_session().execute(update_file)
        log.info(
            f"{update_result.rowcount} files flagged as PackageInstalled.")
Exemplo n.º 9
0
    def _get_user_directories_to_create(self):
        session = State.get_db_session()

        directories = session.query(FileDetail).join(System).filter(
            (System.system_id == self.system.system_id)
            & (FileDetail.file_type == "D")
            & (FileDetail.origin.in_(get_user_content_names(), ))).order_by(
                FileDetail.file_location).yield_per(10)

        for directory in directories:
            yield directory
Exemplo n.º 10
0
def test_path_modified(test_database):
    session = State.get_db_session()

    session.execute("""
update iac.rpm_detail set file_changed = TRUE
where file_location = '/etc/profile'
  and system_id = 1;
        """)

    assert heuristic_utils.path_modified("/etc/profile")
    session.rollback()
Exemplo n.º 11
0
def unknown_count() -> int:
    """
    Count the number of files where FileOrigin is UnknownSource for the
    current system.
    """
    system: System = State.get_system()
    session: Session = State.get_db_session()

    return session.query(FileDetail).filter(
        (FileDetail.system_id == system.system_id)
        & (FileDetail.origin == FileOrigin.UnknownSource.name)).count()
Exemplo n.º 12
0
    def _get_detail(self,
                    rpm_detail: RpmDetail = None,
                    symlink: ResolvedSymlinks = None,
                    custom_path: str = None):

        if not symlink:
            path = custom_path or rpm_detail.file_location
        else:
            path = symlink.resolved_path

        return State.get_db_session().query(FileDetail).filter(
            FileDetail.file_location == path).one_or_none()
Exemplo n.º 13
0
def path_exists(path_spec: str) -> bool:
    """
    Return boolean if the path exists in the source file system.
    """
    system: System = State.get_system()
    session: Session = State.get_db_session()

    file_detail: FileDetail = session.query(FileDetail).filter(
        (FileDetail.system_id == system.system_id)
        & (FileDetail.file_location == path_spec)).one_or_none()

    return file_detail is not None
Exemplo n.º 14
0
    def _run_update(self):

        rd: RpmDetail = aliased(RpmDetail)
        rdu: RpmDetail = aliased(RpmDetail)
        fd: FileDetail = aliased(FileDetail)
        lk: RpmFileDetailLink = aliased(RpmFileDetailLink)

        query = State.get_db_session().query(
            rd.rpm_detail_id, fd.file_detail_id).join(
                fd, (rd.system_id == fd.system_id) &
                (rd.file_location == fd.file_location)).outerjoin(
                    lk, (lk.file_detail_id == fd.file_detail_id) &
                    (lk.rpm_detail_id == rd.rpm_detail_id)).filter(
                        rd.system_id == self.system.system_id,
                        lk.rpm_file_detail_link_id == None)

        insert_dml = insert(RpmFileDetailLink).from_select([
            rd.rpm_detail_id,
            fd.file_detail_id,
        ], query)

        result = State.get_db_session().execute(insert_dml)
        log.debug(f"{result.rowcount} files linked.")
        State.get_db_session().flush()
        State.get_db_session().commit()
        self.analyze_database()

        return result.rowcount
Exemplo n.º 15
0
    def _run_link_match_update(self):

        rd: RpmDetail = aliased(RpmDetail)
        fd: FileDetail = aliased(FileDetail)
        lk: RpmFileDetailLink = aliased(RpmFileDetailLink)

        query = State.get_db_session().query(
            rd.rpm_detail_id,
            fd.file_detail_id,
        ).join(ResolvedSymlinks, (ResolvedSymlinks.system_id == rd.system_id) &
               (ResolvedSymlinks.file_location == rd.file_location)).join(
                   fd, (ResolvedSymlinks.system_id == fd.system_id) &
                   (fd.file_location
                    == ResolvedSymlinks.resolved_location)).outerjoin(
                        lk, (lk.file_detail_id == fd.file_detail_id) &
                        (lk.rpm_detail_id == rd.rpm_detail_id)).filter(
                            rd.system_id == self.system_id,
                            lk.rpm_file_detail_link_id == None)

        insert_dml = insert(RpmFileDetailLink).from_select([
            rd.rpm_detail_id,
            fd.file_detail_id,
        ], query)

        result = State.get_db_session().execute(insert_dml)
        State.get_db_session().flush()
        State.get_db_session().commit()
        self.analyze_database()
        return result.rowcount
Exemplo n.º 16
0
    def _map_rpm_to_file(self, rpm_detail: RpmDetail = None):
        # Get by filename
        # happy path, no symlinks
        file_detail = self._get_detail(rpm_detail)
        if file_detail:
            return file_detail

        # try for a direct symlink match
        symlink = State.get_db_session().query(ResolvedSymlinks).filter(
            ResolvedSymlinks.file_location ==
            rpm_detail.file_location).one_or_none()

        if symlink:
            file_detail = self._get_detail(rpm_detail=rpm_detail,
                                           symlink=symlink)
            if file_detail:
                return file_detail

        # no easy match here.  brute force by directory
        directories = State.get_db_session().query(ResolvedSymlinks).filter(
            ResolvedSymlinks.target_type == 'D').all()

        for direct in directories:
            if rpm_detail.file_location.startswith(direct.file_location):
                path = (
                    f'{direct.resolved_location}'
                    f'{rpm_detail.file_location[len(direct.file_location):]}')

                file_detail = self._get_detail(
                    rpm_detail=rpm_detail,
                    custom_path=path,
                )

                if file_detail:
                    return file_detail

        # TODO: What to do here? :)
        raise ValueError('cant find file details for rpm_id '
                         f'{rpm_detail.rpm_info}. {rpm_detail.file_location}')
Exemplo n.º 17
0
def origin_unknown(path_spec: str) -> bool:
    """
    Return True if FileOrigin is UnknownSource, else False.
    """
    system: System = State.get_system()
    session: Session = State.get_db_session()

    file_detail: FileDetail = session.query(FileDetail).filter(
        (FileDetail.system_id == system.system_id)
        & (FileDetail.file_location == path_spec)
        & (FileDetail.origin == FileOrigin.UnknownSource.name)).one_or_none()

    return file_detail is not None
Exemplo n.º 18
0
    def store_system_info(self, **kwargs):
        try:
            system = State.get_db_session().query(System).filter(
                System.name == kwargs["name"]).one()
        except NoResultFound:
            system = System(name=kwargs["name"])

        system.hostname = kwargs.get("hostname", kwargs["name"])
        system.username = kwargs.get("username", os.getlogin())
        system.key_file = kwargs.get("key_file")
        system.use_tty = kwargs.get("use_tty", False)
        system.port = kwargs.get("port", 22)
        system.remote_name = kwargs["remote_hostname"],
        system.kernel_version = kwargs["kernel_version"],
        system.os_distro = kwargs.get("distro"),
        system.os_major_ver = kwargs.get("major"),
        system.os_minor_ver = kwargs.get("minor"),
        system.os_revision = kwargs.get("revision"),

        State.get_db_session().add(system)
        State.get_db_session().flush()

        return system
Exemplo n.º 19
0
    def fetch_file_detail(rpm_detail: RpmDetail) -> FileDetail:
        db_session = State.get_db_session()

        system_id = State.get_system().system_id
        query = db_session.query(FileDetail).join(RpmFileDetailLink).join(
            RpmDetail).filter((RpmDetail.system_id == system_id) & (
                RpmDetail.rpm_detail_id == rpm_detail.rpm_detail_id))

        file_details = query.all()

        if len(file_details) > 0:
            return file_details[0]
        else:
            raise ValueError("FileDetail cannot be loaded.")
Exemplo n.º 20
0
def package_installed(package_name: str) -> List[RpmInfo]:
    """
    Check if package is installed on the source system.

    Mangle package name as needed for rpm or scl naming conventions.
    Check the rpm_details table for a match.
    Return None for no match, or an rpm_details object if found.
    """
    system: System = State.get_system()
    session: Session = State.get_db_session()

    rpm_info: List[RpmInfo] = session.query(RpmInfo).filter(
        (RpmInfo.system_id == system.system_id)
        & (RpmInfo.name == package_name)).all()

    return rpm_info
Exemplo n.º 21
0
def get_directory_contents(path_spec: str) -> List[FileDetail]:
    system: System = State.get_system()
    session: Session = State.get_db_session()

    lookup = path_spec
    if not lookup.endswith(os.path.sep):
        lookup += os.path.sep
    lookup += "%"

    file_details: List[FileDetail] = session.query(FileDetail).filter(
        (FileDetail.system_id == system.system_id)
        & (FileDetail.file_location.like(lookup))
        & (func.strpos(
            func.substr(FileDetail.file_location, len(lookup)),
            os.path.sep,
        ) == 0)).all()

    return file_details
Exemplo n.º 22
0
def path_modified(path_spec: str) -> bool:
    """
    Check if a source system file was modified after package installation.

    path_spec is a full pathname on the source file system. Look it up in
    file_details table. If origin is PKG_MODIFIED, return true.
    """

    system: System = State.get_system()
    session: Session = State.get_db_session()

    rpm_detail: RpmDetail = session.query(RpmDetail).filter(
        (RpmDetail.system_id == system.system_id)
        & (RpmDetail.file_location == path_spec)).one_or_none()

    if not rpm_detail:
        raise RpmFileNotFound("Unable to locate file in RpmDetails")

    return rpm_detail.file_changed
Exemplo n.º 23
0
    def _run(self):

        session = State.get_db_session()

        for path in (
                "/etc/yum",
                "/etc/yum.repos.d",
        ):
            for record in hutils.get_directory_contents(path_spec=path):

                log.debug(
                    f"Yum configuration file name {record.file_location}.")

                if record.origin == FileOrigin.UnknownSource.name:
                    log.debug(f"Flagging {record.file_location} as UserData.")
                    record.origin = FileOrigin.UserData.name
                    session.add(record)

        session.commit()
Exemplo n.º 24
0
    def _add_packages(self):

        log.debug("entering _add_packages")

        session = State.get_db_session()

        # skip fake gpg-pubkey package
        # https://unix.stackexchange.com/questions/190203/what-are-gpg-pubkey-packages
        packages = session.query(RpmInfo).join(System).filter(
            (System.system_id == self.system.system_id)
            & not_(RpmInfo.name == "gpg-pubkey")  # skip fake package
        ).order_by(RpmInfo.installation_date).yield_per(10)

        for package in packages:
            task = OrderedDict(
                name=f"Install {package.name}",
                package=self._get_install(package.name),
            )

            self._yaml.append(task)

        log.debug("exiting _add_packages")
Exemplo n.º 25
0
def main():
    sys = utils.os.GetArguments().parse().name

    print('Path to analyze:')
    path = input()
    print('OK. Examining ' + path + ' on ' + sys)
    
    system: System = State.get_system(name=sys)
    session: Session = State.get_db_session()

    lookup = path
    if lookup.endswith(os.path.sep):
        lookup += "%"
    
    file_details: List[FileDetail] = session.query(
        FileDetail
    ).filter(
        (FileDetail.system_id == system.system_id) &
        (FileDetail.file_location.like(lookup))
    ).all()
    
    for f in file_details:
        print(f.file_location + ' is ' + f.origin)
Exemplo n.º 26
0
    def _run_directory_match_update(self):

        rd: RpmDetail = aliased(RpmDetail)
        fd: FileDetail = aliased(FileDetail)
        lk: RpmFileDetailLink = aliased(RpmFileDetailLink)

        query = State.get_db_session().query(
            rd.rpm_detail_id, fd.file_detail_id).join(
                ResolvedSymlinks, (rd.system_id == ResolvedSymlinks.system_id)
                & (ResolvedSymlinks.target_type == "D") &
                (func.length(rd.file_location) > func.length(
                    ResolvedSymlinks.file_location)) &
                (ResolvedSymlinks.file_location == func.substr(
                    rd.file_location, 1,
                    func.length(ResolvedSymlinks.file_location)))).join(
                        fd, (fd.system_id == ResolvedSymlinks.system_id) &
                        (fd.file_location
                         == (ResolvedSymlinks.resolved_location + func.substr(
                             rd.file_location,
                             func.length(ResolvedSymlinks.file_location) + 1)))
                    ).outerjoin(lk, (lk.file_detail_id == fd.file_detail_id) &
                                (lk.rpm_detail_id == rd.rpm_detail_id)).filter(
                                    (rd.system_id == self.system_id)
                                    & (lk.rpm_file_detail_link_id == None)
                                    & (func.coalesce(fd.file_type, "") != "S")
                                ).distinct()

        insert_dml = insert(RpmFileDetailLink).from_select([
            rd.rpm_detail_id,
            fd.file_detail_id,
        ], query)

        result = State.get_db_session().execute(insert_dml)
        State.get_db_session().flush()
        State.get_db_session().commit()
        self.analyze_database()
        return result.rowcount
Exemplo n.º 27
0
 def __set_session(self):
     """
     Get session from global context
     """
     self._db_session = State.get_db_session()
Exemplo n.º 28
0
    def store_packages(self, **kwargs):

        pkg_data = kwargs.get("pkg_data")
        rpms = {}

        files = 0

        log.info("Storing packages...")

        # Remove any existing records
        fdl = alias(RpmFileDetailLink)
        rd = alias(RpmDetail)

        delete_fdl = delete(fdl).where(
            exists(
                select(
                    [1]).where(rd.c.system_id == self.system.system_id).where(
                        rd.c.rpm_detail_id == fdl.c.rpm_detail_id)))

        log.info(
            f"Pruned {State.get_db_session().execute(delete_fdl).rowcount} links."
        )

        system_rpm_detail = State.get_db_session().query(RpmDetail).filter(
            RpmDetail.system == self.system)

        system_rpm_detail.delete()
        log.info("Pruned existing RpmDetail records.")

        system_rpm_info = State.get_db_session().query(RpmInfo).filter(
            RpmInfo.system == self.system)
        system_rpm_info.delete()
        log.info("Pruned existing RpmInfo records.")

        State.get_db_session().flush()
        State.get_db_session().commit()

        fieldnames = (
            'package_name',
            'version',
            'release',
            'architecture',
            'installation_tid',
            'installation_date',
            'file_name',
            'file_size',
            'digest',
            'file_class',
            'flag',
            'source_rpm',
            'rpm_name',
        )

        objects = []
        for row in self._convert_results(file_iter=pkg_data,
                                         fieldnames=fieldnames):

            file = {
                key: value if value != '(none)' else None
                for key, value in row.items()
            }

            rpm_key = '+'.join([
                file['package_name'] or 'none',
                file['version'] or 'none',
                file['architecture'] or 'none',
            ])

            rpm = rpms.get(rpm_key, None)

            if not rpm:

                try:
                    installation_tid = int(file['installation_tid']
                                           or ''.strip())
                except ValueError:
                    installation_tid = None

                try:
                    installation_date = parser.parse(file['installation_date'])
                except ValueError:
                    installation_date = None

                rpm = RpmInfo(
                    name=file['package_name'],
                    version=file['version'],
                    release=file['release'],
                    filename=file['rpm_name'],
                    architecture=file['architecture'],
                    installation_tid=installation_tid,
                    installation_date=installation_date,
                    system_id=self.system.system_id,
                )

                State.get_db_session().add(rpm)
                State.get_db_session().flush()

                rpms[rpm_key] = rpm

            try:
                file_size = int(file['file_size'] or ''.strip())
            except ValueError:
                file_size = None

            objects.append({
                "rpm_info_id": rpm.rpm_info_id,
                "file_location": file['file_name'],
                "file_size": file_size,
                "digest": file['digest'] or None,
                "file_info": file['file_class'],
                "file_flag": file['flag'],
                "system_id": self.system.system_id,
                "file_changed": None,
            })

            files += 1

            if files % 50000 == 0:
                log.info(f"{files}")
                State.get_db_session().bulk_insert_mappings(RpmDetail, objects)
                State.get_db_session().flush()
                objects.clear()

        if objects:
            State.get_db_session().bulk_insert_mappings(RpmDetail, objects)

        objects.clear()
        State.get_db_session().flush()
        State.get_db_session().commit()

        log.info('..done')
Exemplo n.º 29
0
 def analyze_database(self):
     State.get_db_session().execute("ANALYZE;")
Exemplo n.º 30
0
    def store_files(self, **kwargs):
        file_iter = kwargs.get("file_iter")

        files = 0

        log.info("Storing files...")

        # Remove any existing records
        fdl = alias(RpmFileDetailLink)
        fd = alias(FileDetail)

        delete_fdl = delete(fdl).where(
            exists(
                select(
                    [1]).where(fd.c.system_id == self.system.system_id).where(
                        fd.c.file_detail_id == fdl.c.file_detail_id)))

        log.info(
            f"Pruned {State.get_db_session().execute(delete_fdl).rowcount} links."
        )

        # delete FileStorage links
        FileDifference(system=self.system).clear_system_file_storage()

        system_files = State.get_db_session().query(FileDetail).filter(
            FileDetail.system == self.system)

        system_files.delete()

        log.info("Pruned existing FileDetails.")

        State.get_db_session().flush()
        State.get_db_session().commit()

        objects = []

        for file_dict in self._convert_results(file_iter=file_iter):
            src = FileOrigin.UnknownSource
            file_path = file_dict.get("path", "")

            if (file_path.startswith("/dev/") or file_path.startswith("/tmp/")
                    or file_path.startswith("/proc/")
                    or (file_path.startswith("/var/log/")
                        and file_path.endswith(".log"))):
                src = FileOrigin.EphemeralContent

            file_rec = {
                "system_id": self.system.system_id,
                "file_location": file_path or None,
                "file_type": file_dict['type'],
                "owner_uid": file_dict['uid'],
                "owner_gid": file_dict['gid'],
                "owner_name": file_dict['user'] or None,
                "owner_group": file_dict['group'] or None,
                "file_mode": file_dict['mode'] or None,
                "file_target": file_dict['target'] or None,
                "target_type": file_dict['target_type'] or None,
                "md5_digest": file_dict['md5'] or None,
                "sha256_digest": file_dict['sha256'] or None,
                "file_info": file_dict['info'] or None,
                "file_perm_mode": file_dict['perm'] or None,
                "origin": src.name,
            }

            objects.append(file_rec)

            files += 1

            if files % 50000 == 0:
                log.info(f"{files}")
                State.get_db_session().bulk_insert_mappings(
                    FileDetail, objects)
                State.get_db_session().flush()
                objects.clear()

        if objects:
            State.get_db_session().bulk_insert_mappings(FileDetail, objects)

        objects.clear()
        State.get_db_session().flush()
        State.get_db_session().commit()

        log.info('..done')