def test_get_manager(self): # stdout is attached to a tty with redirect_output('stdout', self.tty.stdout): self.assertTrue(sys.stdout.isatty()) manager = enlighten.get_manager(unit='knights') self.assertIsInstance(manager, enlighten.Manager) self.assertTrue('unit' in manager.defaults) # stdout is not attached to a tty with redirect_output('stdout', OUTPUT): self.assertFalse(sys.stdout.isatty()) manager = enlighten.get_manager(unit='knights') self.assertIsInstance(manager, enlighten.Manager) self.assertTrue('unit' in manager.defaults)
def call_xml_cleaner(args): ''' Takes in a list of files and args and generates new files. ''' list_of_file_paths = get_file_directories(args) dir_info = directory_info(args) root_name = dir_info.get_root_name() manager = enlighten.get_manager() enterprise = manager.counter(total=len(list_of_file_paths), desc='Tidying files:', unit='files') name = get_subfolder_name(list_of_file_paths[0], root_name) for path in list_of_file_paths: name = get_subfolder_name(path, root_name) filename = '.\data\\04 - clean\\' + name + '.txt' list_to_create = [] currCenter = manager.counter(total=get_file_length(path), unit='lines', leave=False) with open(path, encoding='utf-8') as file: for line in file: list_to_create.append(clean_text_from_xml(line)) if len(list_to_create) > 1000: create_file(list_to_create, filename, mode='a') list_to_create = [] currCenter.update() if list_to_create: create_file(list_to_create, filename, mode='a') currCenter.close() enterprise.update() enterprise.close()
def query_data(self): t = Timer("Querying USPTO: {}".format(self.query_json)) count_patents = self.query_sounding() count_to_collect = self.limit if self.limit is not None and self.limit < count_patents else count_patents pages = math.ceil(count_to_collect / self.per_page) logger.info("Collecting {}/{} docs in {} page{}".format( count_to_collect, count_patents, pages, "s" if pages > 0 else "")) manager = enlighten.get_manager() ticker = manager.counter(total=pages, desc='Ticks', unit='ticks') for i in range(pages): if Config.ENV_NAME != "local": logger.info("{}/{}".format(i, pages)) page_df = self.query_paginated(i + 1, self.per_page) if self.df is None: self.df = page_df else: self.df = self.df.append(page_df, ignore_index=True) ticker.update() ticker.close() self.handle_external() t.log() logger.info("Collected {} edges".format(self.df.shape[0]))
def downloadData(param: Param, download: bool = True): '''Download user data (if {download} is True) to json files, merge them into a flat pandas.DataFrame, and write it to disk.''' logging.info(f"{param.filePath().name.replace('.','|')}") if download: subMethod = param.splitMethod(lower=True) for f in param.filePath(glob='*json'): f.unlink() pbarManager = enlighten.get_manager() with pbarManager.counter(unit='page', leave=False) as pbar: while param.page <= param.nPages: fileName = param.filePath(ext=f'.{param.page:04d}.json') response = getReq(param=param, pbarManager=pbarManager, collapse=False) param.page = int( response.get(subMethod).get('@attr').get('page')) param.nPages = int( response.get(subMethod).get('@attr').get('totalPages')) pbar.total = param.nPages # [tqdm: update total without resetting time elapsed](https://stackoverflow.com/a/58961015/13019084) pbar.update() param.filePath().parent.mkdir(exist_ok=True) with open(file=fileName, mode='w') as jsonF: json.dump(obj=response, fp=jsonF) param.page += 1 time.sleep(param.sleep) pbarManager.stop() DF = loadJSON(param) df = flattenDF(param=param, DF=DF, writeToDisk=True) if param.splitMethod() in ['TopArtists', 'TopAlbums', 'TopTracks']: writeCSV(param=param, df=df)
def convert_video_progress_bar(source: str, dest: str, manager=None): if manager is None: manager = enlighten.get_manager() name = source.rsplit(os.path.sep,1)[-1] if get_bitdepth(source).is_10bit: args = CONVERT_COMMAND_10Bits.format(source=source, dest=dest) else: args = CONVERT_COMMAND.format(source=source, dest=dest) proc = expect.spawn(args, encoding='utf-8') pbar = None try: proc.expect(pattern_duration) total = sum(map(lambda x: float(x[1])*60**x[0],enumerate(reversed(proc.match.groups()[0].strip().split(':'))))) cont = 0 pbar = manager.counter(total=100, desc=name, unit='%',bar_format=BAR_FMT, counter_format=COUNTER_FMT) while True: proc.expect(pattern_progress) progress = sum(map(lambda x: float(x[1])*60**x[0],enumerate(reversed(proc.match.groups()[0].strip().split(':'))))) percent = progress/total*100 pbar.update(percent-cont) cont = percent except expect.EOF: pass finally: if pbar is not None: pbar.close() proc.expect(expect.EOF) res = proc.before res += proc.read() exitstatus = proc.wait() if exitstatus: raise ffmpeg.Error('ffmpeg','',res)
class CountTrackable(BaseTrackable): _manager = enlighten.get_manager() def __init__(self, name: str, total: int, *, parent: BaseTrackable = None): super().__init__(name, parent=parent) self._total = total self._pbar = self._manager.counter(desc=name, total=total) @classmethod def reset_all(cls): cls._manager = enlighten.get_manager() @property def total(self): return self._total def update(self): self._pbar.update() if self._total is not None and self._pbar.count > self._total: raise PBarOutOfBound(f'Progress bar ran out of bound.') for trackable in self.children: trackable.reset() def reset(self): self._pbar.start = time.time() self._pbar.count = 0 self._pbar.refresh() @property def value(self): return self._pbar.count
def __init__(self, graph=None) -> None: super().__init__(graph) self.__display_manager = enlighten.get_manager() self.__status_bar: enlighten.StatusBar = None self.__all_nodes = set() self.__running = set() self.__completed = set()
def _insideEllipses(self): self._memoryUsage() for inst in self.list_inst: print('Computing ellipses for %s sample' % inst.label) manager = enlighten.get_manager() pbar = manager.counter(total=len(self.sigmas) * inst.size, desc='Progress', unit='Events %s' % inst.label) list_inside = [] for idx, sigma in enumerate(self.sigmas): print('....... Counting ellipse with sigma = %0.1f' % sigma) inside = np.zeros(inst.size) for i in range(inst.size): masspoint = inst.data.iloc[i, :][['jj_M', 'lljj_M']] check = self.window.isInWindow(self.center, sigma, masspoint) if check: inside[i] = 1 pbar.update() inside = pd.DataFrame(inside) inside.columns = [self.sigmas_columns[idx]] list_inside.append(inside) self._memoryUsage() print('....... Concatenating the results into a single DF') manager.stop() for inside in list_inside: inst.data = pd.concat([inst.data, inside], axis=1) self._memoryUsage()
def __init__(self): self.CS_LOW = '[' self.CS_HIGH = ']' self.MISO_BEGIN = '(' self.MISO_END = ')' self.SECTOR_SIZE = 4 * KB self.MEM_PATTERN_LABELS = [\ "Timestamp(ms)",\ "Operation",\ "Address",\ "Value"\ ] self._clock = 0 self._company = None self._chip_size = 0 self._def_address_size = 0 self._address_size = 0 self._model = None self._frequency = None self._clock_width = None self._op_codes = {} self._config_done = False self.progressbar = enlighten.get_manager()
def __init__(self): super().__init__() # Initialize the status_bars for header and the targets . self.manager = enlighten.get_manager() self.status_header = None self.status_target = {}
def main(): """ Main function """ with enlighten.get_manager() as manager: process_files(manager)
def main(): """ Main function """ with enlighten.get_manager() as manager: status = manager.status_bar( status_format=u'Enlighten{fill}Stage: {demo}{fill}{elapsed}', color='bold_underline_bright_white_on_lightslategray', justify=enlighten.Justify.CENTER, demo='Initializing', autorefresh=True, min_delta=0.5) docs = manager.term.link( 'https://python-enlighten.readthedocs.io/en/stable/examples.html', 'Read the Docs') manager.status_bar(' More examples on %s! ' % docs, position=1, fill='-', justify=enlighten.Justify.CENTER) initialize(manager, 15) status.update(demo='Loading') load(manager, 40) status.update(demo='Testing') run_tests(manager, 20) status.update(demo='Downloading') download(manager, 2.0 * 2**20) status.update(demo='File Processing') process_files(manager)
def main(): global MANAGER logging.basicConfig(level=logging.INFO) first_pass_parser, git_repo_group = Auditor._build_first_pass_parser() first_pass_args, extras = first_pass_parser.parse_known_args() first_pass_args_dict = vars(first_pass_args) with _DB(**first_pass_args_dict) as db: repo_reader = _RepoReader(db, **first_pass_args_dict) jira_reader = _JiraReader(db, **first_pass_args_dict) second_pass_parser = Auditor._build_second_pass_parser( repo_reader, first_pass_parser, git_repo_group) second_pass_args = second_pass_parser.parse_args( extras, first_pass_args) second_pass_args_dict = vars(second_pass_args) auditor = Auditor(repo_reader, jira_reader, db, **second_pass_args_dict) with enlighten.get_manager() as MANAGER: if second_pass_args.populate_from_git: auditor.populate_db_from_git() if second_pass_args.populate_from_jira: auditor.populate_db_from_jira() if second_pass_args.report_new_for_release_line: release_line = second_pass_args.report_new_for_release_line auditor.report_new_for_release_line(release_line) if second_pass_args.report_new_for_release_branch: release_branch = second_pass_args.report_new_for_release_branch auditor.report_new_for_release_branch(release_branch)
def add_duplicates(df_endog, bin_size_weeks): start_date = df_endog["patent_date"].min() t_from_start = ((df_endog["patent_date"] - start_date) / bin_size_weeks).astype(int) df_endog["t"] = df_endog["t"] + t_from_start # iterate through rows where next t is zero - so iterating through last entry in each series data = [] ind = [] # a mask where true if row after is less than row before mask = (df_endog["t"].shift(-1) < df_endog["t"]) manager = enlighten.get_manager() ticker = manager.counter( total=df_endog[mask].shape[0], desc='Patent Samples Transformed', unit='patents' ) for row in df_endog[mask][["log(knowledge_forward_cites)", "t", "patent_date"]].itertuples(): index, k, t, date = row # append the last k entry as many times as necessary to reach the present for i in range(int(df_endog["t"].max()) - int(t)): data.append((k, t + 1 + i, date)) ind.append(index) ticker.update() ticker.close() to_add = pd.DataFrame(data, index=ind, columns=["log(knowledge_forward_cites)", "t", "patent_date"]) df_endog = df_endog.append(to_add) return df_endog
def folder(path: Path = typer.Option(default='.', exists=True, file_okay=True, dir_okay=True, readable=True, resolve_path=True)): """ Convert all videos in a folder """ videos = [] for dir, folders, files in os.walk(path): base_dir = Path(dir) for file in files: file = base_dir / file guess = filetype.guess(str(file)) if guess and 'video' in guess.mime: videos.append(file) manager = enlighten.get_manager() pbar = manager.counter(total=len(videos), desc='Video', unit='videos') for video in videos: typer.secho(f'Processing: {video}') if get_codec(str(video)) != 'hevc': new_path = convertion_path(video) convert_video(str(video), str(new_path)) os.remove(str(video)) shutil.move(new_path, str(video)) pbar.update()
def __init__(self, config_filename: str, selected_analysis_options: params.SelectedAnalysisOptions, manager_task_name: str, **kwargs: Any): self.config_filename = config_filename self.selected_analysis_options = selected_analysis_options self.task_name = manager_task_name # Retrieve YAML config for manager configuration # NOTE: We don't store the overridden selected_analysis_options because in principle they depend # on the selected task. In practice, such options are unlikely to vary between the manager # and the analysis tasks. However, the validation cannot handle the overridden options # (because the leading hadron bias enum is converting into the object). So we just use # the overridden option in formatting the output prefix (where it is required to determine # the right path), and then passed the non-overridden values to the analysis objects. self.config, overridden_selected_analysis_options = analysis_config.read_config_using_selected_options( task_name=self.task_name, config_filename=self.config_filename, selected_analysis_options=self.selected_analysis_options) # Determine the formatting options needed for the output prefix formatting_options = analysis_config.determine_formatting_options( task_name=self.task_name, config=self.config, selected_analysis_options=overridden_selected_analysis_options) # Additional helper variables self.task_config = self.config[self.task_name] self.output_info = analysis_objects.PlottingOutputWrapper( # Format to ensure that the selected analysis options are filled in. output_prefix=self.config["outputPrefix"].format( **formatting_options), printing_extensions=self.config["printingExtensions"], ) # Monitor the progress of the analysis. self._progress_manager = enlighten.get_manager()
def main(): """ Main function """ with enlighten.get_manager() as manager: multiprocess_systems(manager, random.randint(*SYSTEMS))
def build_nltk_wrappers(): imports = _walk(nltk) imports += _walk(nltk.cluster) imports += _walk(gensim.models) # imports += _walk(nltk.chunk.named_entity) imports += _walk(nltk.tag) manager = enlighten.get_manager() counter = manager.counter(total=len(imports), unit="classes") path = Path(__file__).parent / "_generated.py" imports = set(imports) with open(path, "w") as fp: fp.write( textwrap.dedent(f""" # AUTOGENERATED ON {datetime.datetime.now()} ## DO NOT MODIFY THIS FILE MANUALLY from autogoal.grammar import Continuous, Discrete, Categorical, Boolean from autogoal.contrib.nltk._builder import NltkStemmer, NltkTokenizer, NltkLemmatizer, NltkTagger, NltkTrainedTagger from autogoal.kb import * from autogoal.utils import nice_repr from numpy import inf, nan """)) for cls in imports: counter.update() _write_class(cls, fp) black.reformat_one(path, True, black.WriteBack.YES, black.FileMode(), black.Report()) counter.close() manager.stop()
def combine_games_for_season(self): result = audit_report_season_prompt(self.app.audit_report) if result.failure: return result self.scrape_year = result.value self.pbar_manager = enlighten.get_manager() self.init_progress_bars(game_date=self.all_dates_in_season[0]) subprocess.run(["clear"]) for game_date in self.all_dates_in_season: if self.every_eligible_game_is_combined(): num_days_remaining = self.get_number_of_days_remaining() self.update_progress_bars(game_date) self.date_progress_bar.update(num_days_remaining) LOGGER.info( f"Processed all eligible games for MLB {self.scrape_year}." ) time.sleep(1.5) break game_ids = self.date_game_id_map.get(game_date, None) if not game_ids: self.update_progress_bars(game_date) self.date_progress_bar.update() time.sleep(0.75) continue result = self.combine_selected_games(game_date, game_ids) if result.failure: return result self.date_progress_bar.update() self.close_progress_bars() return Result.Ok()
def build_sklearn_wrappers(): imports = _walk(sklearn) manager = enlighten.get_manager() counter = manager.counter(total=len(imports), unit="classes") path = Path(__file__).parent / "_generated.py" with open(path, "w") as fp: fp.write( textwrap.dedent( f""" # AUTOGENERATED ON {datetime.datetime.now()} ## DO NOT MODIFY THIS FILE MANUALLY from numpy import inf, nan from autogoal.grammar import Continuous, Discrete, Categorical, Boolean from autogoal.contrib.sklearn._builder import SklearnEstimator, SklearnTransformer from autogoal.kb import * """ ) ) for cls in imports: counter.update() _write_class(cls, fp) black.reformat_one( path, True, black.WriteBack.YES, black.FileMode(), black.Report() ) counter.close() manager.stop()
def folder(path: Path = typer.Argument(default='.', exists=True, file_okay=True, dir_okay=True, readable=True, resolve_path=True), ignore_extension: str = typer.Option(default=None), ignore_path: Path = typer.Option(default=None, exists=True, file_okay=True, dir_okay=True, readable=True, resolve_path=True)): """ Convert all videos and audios in a folder """ videos, audios = get_videos_and_audios(path, ignore_extension, ignore_path) manager = enlighten.get_manager() errors_files = [] pbar = manager.counter(total=len(videos) + len(audios), desc='Files', unit='files') errors_files, pbar = process_files(videos, False, manager, errors_files, pbar) errors_files, pbar = process_files(audios, True, manager, errors_files, pbar) if errors_files: typer.secho('This videos could not be processed:', fg=RED) typer.secho(str(errors_files), fg=RED)
def select_images(df, operetta_folder, method="copy"): render_path = o.ensure_dir(os.path.join(operetta_folder, 'out', 'render')) manager = enlighten.get_manager() bar = manager.counter(total=len(df), desc='Progress', unit='files') for ix, r in df.iterrows(): destination_folder = o.ensure_dir( os.path.join(operetta_folder, 'out', 'selected-images', '%s@%s' % (r["Cell Type"], r["Cell Count"]), r["Compound"])) # name, original_path = o.ConfiguredChannels.filename_of_render(r, render_path) name = 'r%d-c%d-f%d-p%s-i%d.jpg' % (r["row"], r["col"], r["fid"], str(r["p"]), r["zid"]) original_path = os.path.join(render_path, name) destination_path = o.ensure_dir(os.path.join(destination_folder, name)) try: if method == "link": logger.debug('linking %s to %s' % (name, destination_folder)) os.symlink(original_path, destination_path, False) elif method == "copy": logger.debug('copying %s to %s' % (name, destination_folder)) copyfile(original_path, destination_path) elif method == "move": logger.debug('moving %s to %s' % (name, destination_folder)) os.rename(original_path, destination_path) bar.update() except Exception as e: logger.warning('no render for %s' % original_path) logger.warning(e) # traceback.print_stack() manager.stop()
def main(): """ Main function """ manager = enlighten.get_manager() run_tests(manager, 100) load(manager, 80)
def __init__(self, freeze_info): self.manager = enlighten.get_manager() self.counter = self.manager.counter( total=100, color='red', bar_format=bar_format) self.failure_counter = self.counter self.success_counter = self.counter.add_subcounter('cyan') freeze_info.add_hook('page_frozen', self.update_bar) freeze_info.add_hook('page_failed', self.update_bar)
def main(): """ Main function """ manager = enlighten.get_manager() process_files(manager) manager.stop() # Clears all temporary counters and progress bars
def fetch_ib_contract_details(self, ): contracts_db = ContractsDatabase() columns = [ 'contract_id', 'contract_type_from_listing', 'broker_symbol', 'exchange', 'currency' ] filters = {'primary_exchange': "NULL"} contracts = contracts_db.get_contracts(filters=filters, return_columns=columns) logging.info( f"Found {len(contracts)} contracts with missing IB details in master listing." ) if len(contracts) == 0: return # Setup progress bar manager = enlighten.get_manager() pbar = manager.counter(total=len(contracts), desc="Contracts", unit="contracts") exiter = GracefulExiter() tws = Tws() ib_details_db = IbDetailsDatabase() tws.connect() logging.info(f"Connnected to TWS.") try: for contract in contracts: # Check for abort conditions if exiter.exit() or tws.has_error(): logging.info(f"Abort fetching of IB details.") break contract_details = tws.download_contract_details( contract_type_from_listing=contract[ 'contract_type_from_listing'], broker_symbol=contract['broker_symbol'], exchange=contract['exchange'], currency=contract['currency']) if contract_details is not None: ib_details_db.insert_ib_details( contract_id=contract['contract_id'], contract_type_from_details=contract_details.stockType, primary_exchange=contract_details.contract. primaryExchange, industry=contract_details.industry, category=contract_details.category, subcategory=contract_details.subcategory) pbar.update() finally: tws.disconnect() logging.info(f"Disconnnected from TWS.")
def __init__(self, iterable): try: total = len(iterable) except (TypeError, AttributeError): total = None self.iterable = iterable self.manager = enlighten.get_manager() self.pbar = self.manager.counter(total=total)
def process_files(): """ Process a random number of files on a random number of systems across multiple data centers """ # Start with a manager manager = enlighten.get_manager() # Simulated preparation prep = manager.counter(total=SPLINES, desc='Reticulating:', unit='splines') for num in range(SPLINES): # pylint: disable=unused-variable time.sleep(random.uniform(0.1, 0.5)) # Random processing time prep.update() prep.close() # Get a top level progress bar enterprise = manager.counter(total=DATACENTERS, desc='Processing:', unit='datacenters') # Iterate through data centers for dnum in range(1, DATACENTERS + 1): systems = random.randint(*SYSTEMS) # Random number of systems # Get a child progress bar. leave is False so it can be replaced currCenter = manager.counter(total=systems, desc=' Datacenter %d:' % dnum, unit='systems', leave=False) # Iterate through systems for snum in range(1, systems + 1): # Has no total, so will act as counter. Leave is False system = manager.counter(desc=' System %d:' % snum, unit='files', leave=False) files = random.randint(*FILES) # Random file count # Iterate through files for fnum in range(files): # pylint: disable=unused-variable system.update() # Update count time.sleep(random.uniform(0.0001, 0.0005)) # Random processing time system.close() # Close counter so it gets removed # Log status LOGGER.info('Updated %d files on System %d in Datacenter %d', files, snum, dnum) currCenter.update() # Update count currCenter.close() # Close counter so it gets removed enterprise.update() # Update count enterprise.close() # Close counter, won't be removed but does a refresh manager.stop() # Clears all temporary counters and progress bars
def __init__(self): self.__yt = None self.__choice = 0 self.__mp3_mode = False self.__directory = self.__get_default_download_path() self.__vids = {} # list of videos to download self.__thread_pool = [] self.__parallel_download = False self.__manager = enlighten.get_manager() self.__ticks = {}
def begin(self, generations, pop_size): self.manager = enlighten.get_manager() self.pop_counter = self.manager.counter(total=pop_size, unit="evals", leave=True, desc="Current Gen") self.total_counter = self.manager.counter(total=generations * pop_size, unit="evals", leave=True, desc="Best: 0.000")
def fsck_mirror(name, config, verbose=False, force=False, repack_only=False, conn_only=False, repack_all_quick=False, repack_all_full=False): global logger logger = logging.getLogger(name) logger.setLevel(logging.DEBUG) # noinspection PyTypeChecker em = enlighten.get_manager(series=' -=#') if 'log' in config.keys(): ch = logging.FileHandler(config['log']) formatter = logging.Formatter( "[%(process)d] %(asctime)s - %(levelname)s - %(message)s") ch.setFormatter(formatter) loglevel = logging.INFO if 'loglevel' in config.keys(): if config['loglevel'] == 'debug': loglevel = logging.DEBUG ch.setLevel(loglevel) logger.addHandler(ch) ch = logging.StreamHandler() formatter = logging.Formatter('%(message)s') ch.setFormatter(formatter) if verbose: ch.setLevel(logging.INFO) else: ch.setLevel(logging.CRITICAL) em.enabled = False logger.addHandler(ch) # push it into grokmirror to override the default logger grokmirror.logger = logger if conn_only or repack_all_quick or repack_all_full: force = True logger.info('Running grok-fsck for [%s]', name) # Lock the tree to make sure we only run one instance logger.debug('Attempting to obtain lock on %s', config['lock']) flockh = open(config['lock'], 'w') try: lockf(flockh, LOCK_EX | LOCK_NB) except IOError: logger.info('Could not obtain exclusive lock on %s', config['lock']) logger.info('Assuming another process is running.') return 0 manifest = grokmirror.read_manifest(config['manifest']) if os.path.exists(config['statusfile']): logger.info('Reading status from %s', config['statusfile']) stfh = open(config['statusfile'], 'rb') # noinspection PyBroadException try: # Format of the status file: # { # '/full/path/to/repository': { # 'lastcheck': 'YYYY-MM-DD' or 'never', # 'nextcheck': 'YYYY-MM-DD', # 'lastrepack': 'YYYY-MM-DD', # 'fingerprint': 'sha-1', # 's_elapsed': seconds, # 'quick_repack_count': times, # }, # ... # } status = json.loads(stfh.read().decode('utf-8')) except: # Huai le! logger.critical('Failed to parse %s', config['statusfile']) lockf(flockh, LOCK_UN) flockh.close() return 1 else: status = {} if 'frequency' in config: frequency = int(config['frequency']) else: frequency = 30 today = datetime.datetime.today() todayiso = today.strftime('%F') if force: # Use randomization for next check, again checkdelay = random.randint(1, frequency) else: checkdelay = frequency # Go through the manifest and compare with status # noinspection PyTypeChecker e_find = em.counter(total=len(manifest), desc='Discovering:', unit='repos', leave=False) for gitdir in list(manifest): e_find.update() fullpath = os.path.join(config['toplevel'], gitdir.lstrip('/')) if fullpath not in status.keys(): # Newly added repository if not force: # Randomize next check between now and frequency delay = random.randint(0, frequency) nextdate = today + datetime.timedelta(days=delay) nextcheck = nextdate.strftime('%F') else: nextcheck = todayiso status[fullpath] = { 'lastcheck': 'never', 'nextcheck': nextcheck, } logger.info('%s:', fullpath) logger.info(' added : next check on %s', nextcheck) e_find.close() # record newly found repos in the status file logger.debug('Updating status file in %s', config['statusfile']) with open(config['statusfile'], 'wb') as stfh: stfh.write(json.dumps(status, indent=2).encode('utf-8')) # Go through status and find all repos that need work done on them. # This is a dictionary that contains: # full_path_to_repo: # repack: 0, 1, 2 (0-no, 1-needs quick repack, 2-needs full repack) # fsck: 0/1 to_process = {} total_checked = 0 total_elapsed = 0 # noinspection PyTypeChecker e_cmp = em.counter(total=len(status), desc='Analyzing:', unit='repos', leave=False) for fullpath in list(status): e_cmp.update() # Check to make sure it's still in the manifest gitdir = fullpath.replace(config['toplevel'], '', 1) gitdir = '/' + gitdir.lstrip('/') if gitdir not in manifest.keys(): del status[fullpath] logger.debug('%s is gone, no longer in manifest', gitdir) continue needs_repack = needs_prune = needs_fsck = 0 obj_info = get_repo_obj_info(fullpath) try: packs = int(obj_info['packs']) count_loose = int(obj_info['count']) except KeyError: logger.warning('Unable to count objects in %s, skipping' % fullpath) continue schedcheck = datetime.datetime.strptime(status[fullpath]['nextcheck'], '%Y-%m-%d') nextcheck = today + datetime.timedelta(days=checkdelay) if 'repack' not in config.keys() or config['repack'] != 'yes': # don't look at me if you turned off repack logger.debug('Not repacking because repack=no in config') needs_repack = 0 elif repack_all_full and (count_loose > 0 or packs > 1): logger.debug('needs_repack=2 due to repack_all_full') needs_repack = 2 elif repack_all_quick and count_loose > 0: logger.debug('needs_repack=1 due to repack_all_quick') needs_repack = 1 elif conn_only: # don't do any repacks if we're running forced connectivity checks, unless # you specifically passed --repack-all-foo logger.debug('needs_repack=0 due to --conn-only') needs_repack = 0 else: # for now, hardcode the maximum loose objects and packs # TODO: we can probably set this in git config values? # I don't think this makes sense as a global setting, because # optimal values will depend on the size of the repo as a whole max_loose_objects = 1200 max_packs = 20 pc_loose_objects = 10 pc_loose_size = 10 # first, compare against max values: if packs >= max_packs: logger.debug('Triggering full repack of %s because packs > %s', fullpath, max_packs) needs_repack = 2 elif count_loose >= max_loose_objects: logger.debug('Triggering quick repack of %s because loose objects > %s', fullpath, max_loose_objects) needs_repack = 1 else: # is the number of loose objects or their size more than 10% of # the overall total? in_pack = int(obj_info['in-pack']) size_loose = int(obj_info['size']) size_pack = int(obj_info['size-pack']) total_obj = count_loose + in_pack total_size = size_loose + size_pack # set some arbitrary "worth bothering" limits so we don't # continuously repack tiny repos. if total_obj > 500 and count_loose/total_obj*100 >= pc_loose_objects: logger.debug('Triggering repack of %s because loose objects > %s%% of total', fullpath, pc_loose_objects) needs_repack = 1 elif total_size > 1024 and size_loose/total_size*100 >= pc_loose_size: logger.debug('Triggering repack of %s because loose size > %s%% of total', fullpath, pc_loose_size) needs_repack = 1 if needs_repack > 0 and check_precious_objects(fullpath): # if we have preciousObjects, then we only repack based on the same # schedule as fsck. logger.debug('preciousObjects is set') # for repos with preciousObjects, we use the fsck schedule for repacking if schedcheck <= today: logger.debug('Time for a full periodic repack of a preciousObjects repo') status[fullpath]['nextcheck'] = nextcheck.strftime('%F') needs_repack = 2 else: logger.debug('Not repacking preciousObjects repo outside of schedule') needs_repack = 0 # Do we need to fsck it? if not (repack_all_quick or repack_all_full or repack_only): if schedcheck <= today or force: status[fullpath]['nextcheck'] = nextcheck.strftime('%F') needs_fsck = 1 if needs_repack or needs_fsck or needs_prune: # emit a warning if we find garbage in a repo # we do it here so we don't spam people nightly on every cron run, # but only do it when a repo needs actual work done on it if obj_info['garbage'] != '0': logger.warning('%s:\n\tcontains %s garbage files (garbage-size: %s KiB)', fullpath, obj_info['garbage'], obj_info['size-garbage']) to_process[fullpath] = { 'repack': needs_repack, 'prune': needs_prune, 'fsck': needs_fsck, } e_cmp.close() if not len(to_process): logger.info('No repos need attention.') em.stop() return logger.info('Processing %s repositories', len(to_process)) # noinspection PyTypeChecker run = em.counter(total=len(to_process), desc='Processing:', unit='repos', leave=False) for fullpath, needs in to_process.items(): logger.info('%s:', fullpath) # Calculate elapsed seconds run.refresh() startt = time.time() # Wait till the repo is available and lock it for the duration of checks, # otherwise there may be false-positives if a mirrored repo is updated # in the middle of fsck or repack. grokmirror.lock_repo(fullpath, nonblocking=False) if needs['repack']: if run_git_repack(fullpath, config, needs['repack']): status[fullpath]['lastrepack'] = todayiso if needs['repack'] > 1: status[fullpath]['lastfullrepack'] = todayiso else: logger.warning('Repacking %s was unsuccessful, ' 'not running fsck.', fullpath) grokmirror.unlock_repo(fullpath) continue if needs['prune']: run_git_prune(fullpath, config) if needs['fsck']: run_git_fsck(fullpath, config, conn_only) endt = time.time() status[fullpath]['lastcheck'] = todayiso status[fullpath]['s_elapsed'] = int(endt-startt) logger.info(' done : %ss, next check on %s', status[fullpath]['s_elapsed'], status[fullpath]['nextcheck']) run.update() # We're done with the repo now grokmirror.unlock_repo(fullpath) total_checked += 1 total_elapsed += time.time()-startt # Write status file after each check, so if the process dies, we won't # have to recheck all the repos we've already checked logger.debug('Updating status file in %s', config['statusfile']) with open(config['statusfile'], 'wb') as stfh: stfh.write(json.dumps(status, indent=2).encode('utf-8')) run.close() em.stop() logger.info('Processed %s repos in %0.2fs', total_checked, total_elapsed) with open(config['statusfile'], 'wb') as stfh: stfh.write(json.dumps(status, indent=2).encode('utf-8')) lockf(flockh, LOCK_UN) flockh.close()
from __future__ import print_function, absolute_import import enlighten progress_manager = enlighten.get_manager() active_counters = [] class Progress(object): def __init__(self, total, desc='', leave=False): self.counter = progress_manager.counter(total=total, desc=desc, leave=leave) active_counters.append(self.counter) def __iter__(self): return self def __next__(self): raise NotImplementedError() def close(self): self.counter.close() active_counters.remove(self.counter) if len(active_counters) == 0: progress_manager.stop() def __enter__(self): return self def __exit__(self, exception_type, exception_value, traceback): self.close() class RangeProgress(Progress):
def grok_manifest(manifile, toplevel, args=None, logfile=None, usenow=False, check_export_ok=False, purge=False, remove=False, pretty=False, ignore=None, wait=False, verbose=False): if args is None: args = [] if ignore is None: ignore = [] logger.setLevel(logging.DEBUG) # noinspection PyTypeChecker em = enlighten.get_manager(series=' -=#') ch = logging.StreamHandler() formatter = logging.Formatter('%(message)s') ch.setFormatter(formatter) if verbose: ch.setLevel(logging.INFO) else: ch.setLevel(logging.CRITICAL) em.enabled = False logger.addHandler(ch) if logfile is not None: ch = logging.FileHandler(logfile) formatter = logging.Formatter( "[%(process)d] %(asctime)s - %(levelname)s - %(message)s") ch.setFormatter(formatter) ch.setLevel(logging.DEBUG) logger.addHandler(ch) # push our logger into grokmirror to override the default grokmirror.logger = logger grokmirror.manifest_lock(manifile) manifest = grokmirror.read_manifest(manifile, wait=wait) # If manifest is empty, don't use current timestamp if not len(manifest.keys()): usenow = False if remove and len(args): # Remove the repos as required, write new manfiest and exit for fullpath in args: repo = fullpath.replace(toplevel, '', 1) if repo in manifest.keys(): del manifest[repo] logger.info('Repository %s removed from manifest', repo) else: logger.info('Repository %s not in manifest', repo) # XXX: need to add logic to make sure we don't break the world # by removing a repository used as a reference for others # also make sure we clean up any dangling symlinks grokmirror.write_manifest(manifile, manifest, pretty=pretty) grokmirror.manifest_unlock(manifile) return 0 gitdirs = [] if purge or not len(args) or not len(manifest.keys()): # We automatically purge when we do a full tree walk gitdirs = grokmirror.find_all_gitdirs(toplevel, ignore=ignore) purge_manifest(manifest, toplevel, gitdirs) if len(manifest) and len(args): # limit ourselves to passed dirs only when there is something # in the manifest. This precaution makes sure we regenerate the # whole file when there is nothing in it or it can't be parsed. gitdirs = args # Don't draw a progress bar for a single repo em.enabled = False symlinks = [] # noinspection PyTypeChecker run = em.counter(total=len(gitdirs), desc='Processing:', unit='repos', leave=False) for gitdir in gitdirs: run.update() # check to make sure this gitdir is ok to export if (check_export_ok and not os.path.exists(os.path.join(gitdir, 'git-daemon-export-ok'))): # is it curently in the manifest? repo = gitdir.replace(toplevel, '', 1) if repo in list(manifest): logger.info('Repository %s is no longer exported, ' 'removing from manifest', repo) del manifest[repo] # XXX: need to add logic to make sure we don't break the world # by removing a repository used as a reference for others # also make sure we clean up any dangling symlinks continue if os.path.islink(gitdir): symlinks.append(gitdir) else: update_manifest(manifest, toplevel, gitdir, usenow) logger.info('Updated %s records in %0.2fs', len(gitdirs), run.elapsed) run.close() em.stop() if len(symlinks): set_symlinks(manifest, toplevel, symlinks) grokmirror.write_manifest(manifile, manifest, pretty=pretty) grokmirror.manifest_unlock(manifile)
def pull_mirror(name, config, verbose=False, force=False, nomtime=False, verify=False, verify_subpath='*', noreuse=False, purge=False, pretty=False, forcepurge=False): global logger global lock_fails # noinspection PyTypeChecker em = enlighten.get_manager(series=' -=#') logger = logging.getLogger(name) logger.setLevel(logging.DEBUG) if 'log' in config.keys(): ch = logging.FileHandler(config['log']) formatter = logging.Formatter( "[%(process)d] %(asctime)s - %(levelname)s - %(message)s") ch.setFormatter(formatter) loglevel = logging.INFO if 'loglevel' in config.keys(): if config['loglevel'] == 'debug': loglevel = logging.DEBUG ch.setLevel(loglevel) logger.addHandler(ch) ch = logging.StreamHandler() formatter = logging.Formatter('%(message)s') ch.setFormatter(formatter) if verbose: ch.setLevel(logging.INFO) else: ch.setLevel(logging.CRITICAL) em.enabled = False logger.addHandler(ch) # push it into grokmirror to override the default logger grokmirror.logger = logger logger.info('Checking [%s]', name) mymanifest = config['mymanifest'] if verify: logger.info('Verifying mirror against %s', config['manifest']) nomtime = True if config['manifest'].find('file:///') == 0: manifile = config['manifest'].replace('file://', '') if not os.path.exists(manifile): logger.critical('Remote manifest not found in %s! Quitting!', config['manifest']) return 1 fstat = os.stat(manifile) last_modified = fstat[8] logger.debug('mtime on %s is: %s', manifile, fstat[8]) if os.path.exists(config['mymanifest']): fstat = os.stat(config['mymanifest']) my_last_modified = fstat[8] logger.debug('Our last-modified is: %s', my_last_modified) if not (force or nomtime) and last_modified <= my_last_modified: logger.info('Manifest file unchanged. Quitting.') return 0 logger.info('Reading new manifest from %s', manifile) manifest = grokmirror.read_manifest(manifile) # Don't accept empty manifests -- that indicates something is wrong if not len(manifest.keys()): logger.warning('Remote manifest empty or unparseable! Quitting.') return 1 else: # Load it from remote host using http and header magic logger.info('Fetching remote manifest from %s', config['manifest']) # Do we have username:password@ in the URL? chunks = urlparse(config['manifest']) if chunks.netloc.find('@') > 0: logger.debug('Taking username/password from the URL for basic auth') (upass, netloc) = chunks.netloc.split('@') if upass.find(':') > 0: (username, password) = upass.split(':') else: username = upass password = '' manifesturl = config['manifest'].replace(chunks.netloc, netloc) logger.debug('manifesturl=%s', manifesturl) request = urllib_request.Request(manifesturl) password_mgr = urllib_request.HTTPPasswordMgrWithDefaultRealm() password_mgr.add_password(None, manifesturl, username, password) auth_handler = urllib_request.HTTPBasicAuthHandler(password_mgr) opener = urllib_request.build_opener(auth_handler) else: request = urllib_request.Request(config['manifest']) opener = urllib_request.build_opener() # Find out if we need to run at all first if not (force or nomtime) and os.path.exists(mymanifest): fstat = os.stat(mymanifest) mtime = fstat[8] logger.debug('mtime on %s is: %s', mymanifest, mtime) my_last_modified = time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(mtime)) logger.debug('Our last-modified is: %s', my_last_modified) request.add_header('If-Modified-Since', my_last_modified) try: ufh = opener.open(request, timeout=30) except HTTPError as ex: if ex.code == 304: logger.info('Server says we have the latest manifest. ' 'Quitting.') return 0 logger.warning('Could not fetch %s', config['manifest']) logger.warning('Server returned: %s', ex) return 1 except (URLError, ssl.SSLError, ssl.CertificateError) as ex: logger.warning('Could not fetch %s', config['manifest']) logger.warning('Error was: %s', ex) return 1 last_modified = ufh.headers.get('Last-Modified') last_modified = time.strptime(last_modified, '%a, %d %b %Y %H:%M:%S %Z') last_modified = calendar.timegm(last_modified) # We don't use read_manifest for the remote manifest, as it can be # anything, really. For now, blindly open it with gzipfile if it ends # with .gz. XXX: some http servers will auto-deflate such files. try: if config['manifest'].find('.gz') > 0: fh = gzip.GzipFile(fileobj=BytesIO(ufh.read())) else: fh = ufh jdata = fh.read().decode('utf-8') fh.close() manifest = anyjson.deserialize(jdata) except Exception as ex: logger.warning('Failed to parse %s', config['manifest']) logger.warning('Error was: %s', ex) return 1 mymanifest = grokmirror.read_manifest(mymanifest) culled = cull_manifest(manifest, config) to_clone = [] to_pull = [] existing = [] toplevel = config['toplevel'] if not os.access(toplevel, os.W_OK): logger.critical('Toplevel %s does not exist or is not writable', toplevel) sys.exit(1) if 'pull_threads' in config.keys(): pull_threads = int(config['pull_threads']) if pull_threads < 1: logger.info('pull_threads is less than 1, forcing to 1') pull_threads = 1 else: # be conservative logger.info('pull_threads is not set, consider setting it') pull_threads = 5 # noinspection PyTypeChecker e_cmp = em.counter(total=len(culled), desc='Comparing:', unit='repos', leave=False) for gitdir in list(culled): fullpath = os.path.join(toplevel, gitdir.lstrip('/')) e_cmp.update() # fingerprints were added in later versions, so deal if the upstream # manifest doesn't have a fingerprint if 'fingerprint' not in culled[gitdir]: culled[gitdir]['fingerprint'] = None # Attempt to lock the repo try: grokmirror.lock_repo(fullpath, nonblocking=True) except IOError: logger.info('Could not lock %s, skipping', gitdir) lock_fails.append(gitdir) # Force the fingerprint to what we have in mymanifest, # if we have it. culled[gitdir]['fingerprint'] = None if gitdir in mymanifest and 'fingerprint' in mymanifest[gitdir]: culled[gitdir]['fingerprint'] = mymanifest[gitdir][ 'fingerprint'] if len(lock_fails) >= pull_threads: logger.info('Too many repositories locked (%s). Exiting.', len(lock_fails)) return 0 continue if verify: if culled[gitdir]['fingerprint'] is None: logger.debug('No fingerprint for %s, not verifying', gitdir) grokmirror.unlock_repo(fullpath) continue if not fnmatch.fnmatch(gitdir, verify_subpath): grokmirror.unlock_repo(fullpath) continue logger.debug('Verifying %s', gitdir) if not os.path.exists(fullpath): verify_fails.append(gitdir) logger.info('Verify: %s ABSENT', gitdir) grokmirror.unlock_repo(fullpath) continue my_fingerprint = grokmirror.get_repo_fingerprint( toplevel, gitdir, force=force) if my_fingerprint == culled[gitdir]['fingerprint']: logger.info('Verify: %s OK', gitdir) else: logger.critical('Verify: %s FAILED', gitdir) verify_fails.append(gitdir) grokmirror.unlock_repo(fullpath) continue # Is the directory in place? if os.path.exists(fullpath): # Did grok-fsck request to reclone it? rfile = os.path.join(fullpath, 'grokmirror.reclone') if os.path.exists(rfile): logger.info('Reclone requested for %s:', gitdir) with open(rfile, 'r') as rfh: reason = rfh.read() logger.info(' %s', reason) to_clone.append(gitdir) grokmirror.unlock_repo(fullpath) continue # Fix owner and description, if necessary if gitdir in mymanifest.keys(): # This code is hurky and needs to be cleaned up desc = culled[gitdir].get('description') owner = culled[gitdir].get('owner') ref = None if config['ignore_repo_references'] != 'yes': ref = culled[gitdir].get('reference') # dirty hack to force on-disk owner/description checks # when we're called with -n, in case our manifest # differs from what is on disk for owner/description/alternates myref = None if nomtime: mydesc = None myowner = None else: mydesc = mymanifest[gitdir].get('description') myowner = mymanifest[gitdir].get('owner') if config['ignore_repo_references'] != 'yes': myref = mymanifest[gitdir].get('reference') if myowner is None: myowner = config['default_owner'] if owner is None: owner = config['default_owner'] if desc != mydesc or owner != myowner or ref != myref: # we can do this right away without waiting set_repo_params(toplevel, gitdir, owner, desc, ref) else: # It exists on disk, but not in my manifest? if noreuse: logger.critical('Found existing git repo in %s', fullpath) logger.critical('But you asked NOT to reuse repos') logger.critical('Skipping %s', gitdir) grokmirror.unlock_repo(fullpath) continue logger.info('Setting new origin for %s', gitdir) fix_remotes(gitdir, toplevel, config['site']) to_pull.append(gitdir) grokmirror.unlock_repo(fullpath) continue # fingerprints were added late, so if we don't have them # in the remote manifest, fall back on using timestamps changed = False if culled[gitdir]['fingerprint'] is not None: logger.debug('Will use fingerprints to compare %s', gitdir) my_fingerprint = grokmirror.get_repo_fingerprint(toplevel, gitdir, force=force) if my_fingerprint != culled[gitdir]['fingerprint']: logger.debug('No fingerprint match, will pull %s', gitdir) changed = True else: logger.debug('Fingerprints match, skipping %s', gitdir) else: logger.debug('Will use timestamps to compare %s', gitdir) if force: logger.debug('Will force-pull %s', gitdir) changed = True # set timestamp to 0 as well grokmirror.set_repo_timestamp(toplevel, gitdir, 0) else: ts = grokmirror.get_repo_timestamp(toplevel, gitdir) if ts < culled[gitdir]['modified']: changed = True if changed: to_pull.append(gitdir) grokmirror.unlock_repo(fullpath) continue else: logger.debug('Repo %s unchanged', gitdir) # if we don't have a fingerprint for it, add it now if culled[gitdir]['fingerprint'] is None: fpr = grokmirror.get_repo_fingerprint(toplevel, gitdir) culled[gitdir]['fingerprint'] = fpr existing.append(gitdir) grokmirror.unlock_repo(fullpath) continue else: # Newly incoming repo to_clone.append(gitdir) grokmirror.unlock_repo(fullpath) continue # If we got here, something is odd. # noinspection PyUnreachableCode logger.critical('Could not figure out what to do with %s', gitdir) grokmirror.unlock_repo(fullpath) logger.info('Compared new manifest against %s repositories in %0.2fs', len(culled), e_cmp.elapsed) e_cmp.close() if verify: if len(verify_fails): logger.critical('%s repos failed to verify', len(verify_fails)) return 1 else: logger.info('Verification successful') return 0 hookscript = config['post_update_hook'] if len(to_pull): if len(lock_fails) > 0: pull_threads -= len(lock_fails) # Don't spin up more threads than we need if pull_threads > len(to_pull): pull_threads = len(to_pull) # exit if we're ever at 0 pull_threads. Shouldn't happen, but some extra # precaution doesn't hurt if pull_threads <= 0: logger.info('Too many repositories locked. Exiting.') return 0 logger.info('Will use %d threads to pull repos', pull_threads) # noinspection PyTypeChecker e_pull = em.counter(total=len(to_pull), desc='Updating :', unit='repos', leave=False) logger.info('Updating %s repos from %s', len(to_pull), config['site']) in_queue = Queue() out_queue = Queue() for gitdir in to_pull: in_queue.put((gitdir, culled[gitdir]['fingerprint'], culled[gitdir]['modified'])) for i in range(pull_threads): logger.debug('Spun up thread %s', i) t = PullerThread(in_queue, out_queue, config, i, e_pull) t.setDaemon(True) t.start() # wait till it's all done in_queue.join() logger.info('All threads finished.') while not out_queue.empty(): # see if any of it failed (gitdir, my_fingerprint, status) = out_queue.get() # We always record our fingerprint in our manifest culled[gitdir]['fingerprint'] = my_fingerprint if not status: # To make sure we check this again during next run, # fudge the manifest accordingly. logger.debug('Will recheck %s during next run', gitdir) culled[gitdir] = mymanifest[gitdir] # this is rather hackish, but effective last_modified -= 1 logger.info('Updates completed in %0.2fs', e_pull.elapsed) e_pull.close() else: logger.info('No repositories need updating') # how many lockfiles have we seen? # If there are more lock_fails than there are # pull_threads configured, we skip cloning out of caution if len(to_clone) and len(lock_fails) > pull_threads: logger.info('Too many repositories locked. Skipping cloning new repos.') to_clone = [] if len(to_clone): # noinspection PyTypeChecker e_clone = em.counter(total=len(to_clone), desc='Cloning :', unit='repos', leave=False) logger.info('Cloning %s repos from %s', len(to_clone), config['site']) # we use "existing" to track which repos can be used as references existing.extend(to_pull) to_clone_sorted = [] clone_order(to_clone, manifest, to_clone_sorted, existing) for gitdir in to_clone_sorted: e_clone.refresh() fullpath = os.path.join(toplevel, gitdir.lstrip('/')) # Did grok-fsck request to reclone it? rfile = os.path.join(fullpath, 'grokmirror.reclone') if os.path.exists(rfile): logger.debug('Removing %s for reclone', gitdir) shutil.move(fullpath, '%s.reclone' % fullpath) shutil.rmtree('%s.reclone' % fullpath) # Do we still need to clone it, or has another process # already done this for us? ts = grokmirror.get_repo_timestamp(toplevel, gitdir) if ts > 0: logger.debug('Looks like %s already cloned, skipping', gitdir) continue try: grokmirror.lock_repo(fullpath, nonblocking=True) except IOError: logger.info('Could not lock %s, skipping', gitdir) lock_fails.append(gitdir) e_clone.update() continue reference = None if config['ignore_repo_references'] != 'yes': reference = culled[gitdir]['reference'] if reference is not None and reference in existing: # Make sure we can lock the reference repo refrepo = os.path.join(toplevel, reference.lstrip('/')) try: grokmirror.lock_repo(refrepo, nonblocking=True) success = clone_repo(toplevel, gitdir, config['site'], reference=reference) grokmirror.unlock_repo(refrepo) except IOError: logger.info('Cannot lock reference repo %s, skipping %s', reference, gitdir) if reference not in lock_fails: lock_fails.append(reference) grokmirror.unlock_repo(fullpath) e_clone.update() continue else: success = clone_repo(toplevel, gitdir, config['site']) # check dir to make sure cloning succeeded and then add to existing if os.path.exists(fullpath) and success: logger.debug('Cloning of %s succeeded, adding to existing', gitdir) existing.append(gitdir) desc = culled[gitdir].get('description') owner = culled[gitdir].get('owner') ref = culled[gitdir].get('reference') if owner is None: owner = config['default_owner'] set_repo_params(toplevel, gitdir, owner, desc, ref) set_agefile(toplevel, gitdir, culled[gitdir]['modified']) my_fingerprint = grokmirror.set_repo_fingerprint(toplevel, gitdir) culled[gitdir]['fingerprint'] = my_fingerprint run_post_update_hook(hookscript, toplevel, gitdir) else: logger.warning('Was not able to clone %s', gitdir) # Remove it from our manifest so we can try re-cloning # next time grok-pull runs del culled[gitdir] git_fails.append(gitdir) grokmirror.unlock_repo(fullpath) e_clone.update() logger.info('Clones completed in %0.2fs' % e_clone.elapsed) e_clone.close() else: logger.info('No repositories need cloning') # loop through all entries and find any symlinks we need to set # We also collect all symlinks to do purging correctly symlinks = [] for gitdir in culled.keys(): if 'symlinks' in culled[gitdir].keys(): source = os.path.join(config['toplevel'], gitdir.lstrip('/')) for symlink in culled[gitdir]['symlinks']: if symlink not in symlinks: symlinks.append(symlink) target = os.path.join(config['toplevel'], symlink.lstrip('/')) if os.path.exists(source): if os.path.islink(target): # are you pointing to where we need you? if os.path.realpath(target) != source: # Remove symlink and recreate below logger.debug('Removed existing wrong symlink %s', target) os.unlink(target) elif os.path.exists(target): logger.warning('Deleted repo %s, because it is now' ' a symlink to %s' % (target, source)) shutil.rmtree(target) # Here we re-check if we still need to do anything if not os.path.exists(target): logger.info('Symlinking %s -> %s', target, source) # Make sure the leading dirs are in place if not os.path.exists(os.path.dirname(target)): os.makedirs(os.path.dirname(target)) os.symlink(source, target) manifile = config['mymanifest'] grokmirror.manifest_lock(manifile) # Is the local manifest newer than last_modified? That would indicate # that another process has run and "culled" is no longer the latest info if os.path.exists(manifile): fstat = os.stat(manifile) if fstat[8] > last_modified: logger.info('Local manifest is newer, not saving.') grokmirror.manifest_unlock(manifile) return 0 if purge: to_purge = [] found_repos = 0 for founddir in grokmirror.find_all_gitdirs(config['toplevel']): gitdir = founddir.replace(config['toplevel'], '') found_repos += 1 if gitdir not in culled.keys() and gitdir not in symlinks: to_purge.append(founddir) if len(to_purge): # Purge-protection engage try: purge_limit = int(config['purgeprotect']) assert 1 <= purge_limit <= 99 except (ValueError, AssertionError): logger.critical('Warning: "%s" is not valid for purgeprotect.', config['purgeprotect']) logger.critical('Please set to a number between 1 and 99.') logger.critical('Defaulting to purgeprotect=5.') purge_limit = 5 purge_pc = len(to_purge) * 100 / found_repos logger.debug('purgeprotect=%s', purge_limit) logger.debug('purge prercentage=%s', purge_pc) if not forcepurge and purge_pc >= purge_limit: logger.critical('Refusing to purge %s repos (%s%%)', len(to_purge), purge_pc) logger.critical('Set purgeprotect to a higher percentage, or' ' override with --force-purge.') logger.info('Not saving local manifest') return 1 else: # noinspection PyTypeChecker e_purge = em.counter(total=len(to_purge), desc='Purging :', unit='repos', leave=False) for founddir in to_purge: e_purge.refresh() if os.path.islink(founddir): logger.info('Removing unreferenced symlink %s', gitdir) os.unlink(founddir) else: # is anything using us for alternates? gitdir = '/' + os.path.relpath(founddir, toplevel).lstrip('/') if grokmirror.is_alt_repo(toplevel, gitdir): logger.info('Not purging %s because it is used by ' 'other repos via alternates', founddir) else: try: logger.info('Purging %s', founddir) grokmirror.lock_repo(founddir, nonblocking=True) shutil.rmtree(founddir) except IOError: lock_fails.append(gitdir) logger.info('%s is locked, not purging', gitdir) e_purge.update() logger.info('Purging completed in %0.2fs', e_purge.elapsed) e_purge.close() else: logger.info('No repositories need purging') # Done with progress bars em.stop() # Go through all repos in culled and get the latest local timestamps. for gitdir in culled: ts = grokmirror.get_repo_timestamp(toplevel, gitdir) culled[gitdir]['modified'] = ts # If there were any lock failures, we fudge last_modified to always # be older than the server, which will force the next grokmirror run. if len(lock_fails): logger.info('%s repos could not be locked. Forcing next run.', len(lock_fails)) last_modified -= 1 elif len(git_fails): logger.info('%s repos failed. Forcing next run.', len(git_fails)) last_modified -= 1 # Once we're done, save culled as our new manifest grokmirror.write_manifest(manifile, culled, mtime=last_modified, pretty=pretty) grokmirror.manifest_unlock(manifile) # write out projects.list, if asked to write_projects_list(culled, config) return 127