def CollectStatsFromFiles(self, file_list, catalog_file, force_unpack=False): """Returns: A list of md5 sums of collected statistics.""" args_display = file_list if len(args_display) > 5: args_display = args_display[:5] + ["...more..."] self.logger.debug("Processing: %s, please be patient", args_display) stats_list = StatsListFromCatalog(file_list, catalog_file, self.debug) data_list = [] # Reversing the item order in the list, so that the pop() method can be used # to get packages, and the order of processing still matches the one in the # catalog file. stats_list.reverse() total_packages = len(stats_list) if not total_packages: raise PackageError("The length of package list is zero.") counter = itertools.count(1) self.logger.info("Juicing the svr4 package stream files...") if not self.debug: pbar = progressbar.ProgressBar() pbar.maxval = total_packages pbar.start() else: pbar = mute_progressbar.MuteProgressBar() while stats_list: # This way objects will get garbage collected as soon as they are removed # from the list by pop(). The destructor (__del__()) of the srv4 class # removes the temporary directory from the disk. This allows to process # the whole catalog. stats = stats_list.pop() stats.CollectStats(force=force_unpack) data_list.append(stats.GetAllStats()) pbar.update(counter.next()) pbar.finish() return data_list
def _ImportFiles(self, data, include_prefixes=None, show_progress=False): logging.debug("_ImportFiles()") osrel = data["osrel"] arch = data["arch"] contents = data["contents"] catalog = checkpkg_lib.Catalog() srv4_files_to_catalog = set() # The progressbar library doesn't like handling larger numbers # It displays up to 99% if we feed it a maxval in the range of hundreds of # thousands. progressbar_divisor = int(len(contents) / 1000) if progressbar_divisor < 1: progressbar_divisor = 1 update_period = 1L count = itertools.count() if show_progress: pbar = progressbar.ProgressBar() else: pbar = mute_progressbar.MuteProgressBar() pbar.maxval = len(contents) / progressbar_divisor pbar.start() cleaned_pkgs = set() for d in contents: i = count.next() if not i % update_period and (i / progressbar_divisor) <= pbar.maxval: pbar.update(i / progressbar_divisor) for pkgname in d["pkgnames"]: pkgname = self.SanitizeInstallContentsPkgname(pkgname) # If a package is a packge of our own, # it should not be imported that way; own packages should be # only managed by adding them to specific catalogs. skip_pkgname = False for prefix in common_constants.OWN_PKGNAME_PREFIXES: if pkgname.startswith(prefix): skip_pkgname = True break # Prefix whilelist - whitelisted prefixes win. if include_prefixes: for prefix_to_include in include_prefixes: if pkgname.startswith(prefix_to_include): skip_pkgname = False break if skip_pkgname: continue # We'll create one file instance for each package try: sqo_srv4 = self._GetFakeSrv4(pkgname, osrel, arch) except sqlobject.main.SQLObjectNotFound, e: print d raise if sqo_srv4 not in cleaned_pkgs: sqo_srv4.RemoveAllCswFiles() cleaned_pkgs.add(sqo_srv4) sqo_pkginst = self._GetPkginst(pkgname) f_path, f_basename = os.path.split(d["path"]) # This is really slow (one run ~1h), but works. # To speed it up, raw SQL + cursor.executemany() could be used, but # there's a incompatibility between MySQL and sqlite drivers: # MySQL: INSERT ... VALUES (%s, %s, %s); # sqlite: INSERT ... VALUES (?, ?, ?); # For now, using the sqlobject ORM which is slow, but at least # handles compatibility issues. csw_file = m.CswFile(pkginst=sqo_pkginst, line=d["line"], path=f_path, basename=f_basename, srv4_file=sqo_srv4) srv4_files_to_catalog.add(sqo_srv4)
def CollectStatsFromCatalogEntries(self, catalog_entries, force_unpack=False): """Returns: A list of md5 sums of collected statistics.""" args_display = [x['file_basename'] for x in catalog_entries] if len(args_display) > 5: args_display = args_display[:5] + ["...more..."] self.logger.debug("Processing: %s, please be patient", args_display) md5_sum_list = [] # Reversing the item order in the list, so that the pop() method can be used # to get packages, and the order of processing still matches the one in the # catalog file. total_packages = len(catalog_entries) if not total_packages: raise PackageError("The length of package list is zero.") counter = itertools.count(1) self.logger.info("Juicing the svr4 package stream files...") if self.debug: pbar = mute_progressbar.MuteProgressBar() else: pbar = progressbar.ProgressBar(widgets=[ progressbar.widgets.Percentage(), ' ', progressbar.widgets.ETA(), ' ', progressbar.widgets.Bar() ]) pbar.maxval = total_packages pbar.start() base_dir, _ = os.path.split(__file__) collect_pkg_metadata = os.path.join(base_dir, "collect_pkg_metadata.py") for catalog_entry in catalog_entries: pkg_file_name = catalog_entry['pkg_path'] args = [collect_pkg_metadata] stderr_file = subprocess.PIPE if self.debug: args.append('--debug') stderr_file = None if force_unpack: args += ['--force-unpack'] args += ['--input', pkg_file_name] ret_code, stdout, stderr = shell.ShellCommand(args, allow_error=False, stderr=stderr_file) try: data_back = cjson.decode(stdout) if data_back['md5_sum'] != catalog_entry['md5sum']: msg = ('Unexpected file content: on disk (or in catalog) the file ' '%r (%r) has MD5 sum %r but it turned out to be %r as ' 'seen by collect_pkg_metadata.py. ' 'We cannot continue, because we have no ' 'access to the data we are asked to examine. ' 'This can happen when you run mgar on intel and sparc in ' 'parallel, and you have some arch=all packages in the ' 'package set. This error will not happen if you run ' 'mgar platforms.' % (catalog_entry['file_basename'], catalog_entry['pkg_path'], catalog_entry['md5sum'], data_back['md5_sum'])) raise PackageError(msg) md5_sum_list.append(data_back['md5_sum']) except cjson.DecodeError: logging.fatal('Could not deserialize %r', stdout) raise pbar.update(counter.next()) pbar.finish() return md5_sum_list
def GetProgressBar(self): if self.show_progress and not self.debug: return progressbar.ProgressBar() else: return mute_progressbar.MuteProgressBar()
def _GetPbar(self, show_progress): if show_progress: pbar = progressbar.ProgressBar() else: pbar = mute_progressbar.MuteProgressBar() return pbar