def bake(self, ctx): if not self.page_ref.exists: logger.debug( "No page found at '%s', skipping %s archives." % (self.page_ref, self.source_name)) return logger.debug("Baking %s archives...", self.source_name) with format_timed_scope(logger, 'gathered archive years', level=logging.DEBUG, colored=False): all_years, dirty_years = self._buildDirtyYears(ctx) with format_timed_scope(logger, "baked %d %s archives." % (len(dirty_years), self.source_name)): self._bakeDirtyYears(ctx, all_years, dirty_years)
def _bakeRealmPages(self, record, pool, realm, factories): def _handler(res): entry = record.getCurrentEntry(res['path'], res['taxonomy_info']) entry.subs = res['sub_entries'] if res['errors']: entry.errors += res['errors'] self._logErrors(res['path'], res['errors']) if entry.has_any_error: record.current.success = False if entry.subs and entry.was_any_sub_baked: record.current.baked_count[realm] += 1 logger.debug("Baking %d realm pages..." % len(factories)) with format_timed_scope(logger, "baked %d pages" % len(factories), level=logging.DEBUG, colored=False, timer_env=self.app.env, timer_category='BakeJob'): jobs = [] for fac in factories: job = self._makeBakeJob(record, fac) if job is not None: jobs.append(job) ar = pool.queueJobs(jobs, handler=_handler) ar.wait()
def _loadRealmPages(self, record, pool, factories): def _handler(res): # Create the record entry for this page. # This will also update the `dirty_source_names` for the record # as we add page files whose last modification times are later # than the last bake. record_entry = BakeRecordEntry(res['source_name'], res['path']) record_entry.config = res['config'] if res['errors']: record_entry.errors += res['errors'] record.current.success = False self._logErrors(res['path'], res['errors']) record.addEntry(record_entry) logger.debug("Loading %d realm pages..." % len(factories)) with format_timed_scope(logger, "loaded %d pages" % len(factories), level=logging.DEBUG, colored=False, timer_env=self.app.env, timer_category='LoadJob'): jobs = [] for fac in factories: job = {'type': JOB_LOAD, 'job': save_factory(fac)} jobs.append(job) ar = pool.queueJobs(jobs, handler=_handler) ar.wait()
def _loadRealmPages(self, record, pool, factories): def _handler(res): # Create the record entry for this page. # This will also update the `dirty_source_names` for the record # as we add page files whose last modification times are later # than the last bake. record_entry = BakeRecordEntry(res['source_name'], res['path']) record_entry.config = res['config'] if res['errors']: record_entry.errors += res['errors'] record.current.success = False self._logErrors(res['path'], res['errors']) record.addEntry(record_entry) logger.debug("Loading %d realm pages..." % len(factories)) with format_timed_scope(logger, "loaded %d pages" % len(factories), level=logging.DEBUG, colored=False, timer_env=self.app.env, timer_category='LoadJob'): jobs = [] for fac in factories: job = { 'type': JOB_LOAD, 'job': save_factory(fac)} jobs.append(job) ar = pool.queueJobs(jobs, handler=_handler) ar.wait()
def _bakeTaxonomies(self, record, pool): logger.debug("Baking taxonomy pages...") with format_timed_scope(logger, 'built taxonomy buckets', level=logging.DEBUG, colored=False): buckets = self._buildTaxonomyBuckets(record) start_time = time.perf_counter() page_count = self._bakeTaxonomyBuckets(record, pool, buckets) logger.info(format_timed(start_time, "baked %d taxonomy pages." % page_count))
def _bakeTaxonomies(self, record, pool): logger.debug("Baking taxonomy pages...") with format_timed_scope(logger, 'built taxonomy buckets', level=logging.DEBUG, colored=False): buckets = self._buildTaxonomyBuckets(record) start_time = time.perf_counter() page_count = self._bakeTaxonomyBuckets(record, pool, buckets) logger.info( format_timed(start_time, "baked %d taxonomy pages." % page_count))
def _renderRealmPages(self, record, pool, factories): def _handler(res): entry = record.getCurrentEntry(res['path']) if res['errors']: entry.errors += res['errors'] record.current.success = False self._logErrors(res['path'], res['errors']) logger.debug("Rendering %d realm pages..." % len(factories)) with format_timed_scope(logger, "prepared %d pages" % len(factories), level=logging.DEBUG, colored=False, timer_env=self.app.env, timer_category='RenderFirstSubJob'): jobs = [] for fac in factories: record_entry = record.getCurrentEntry(fac.path) if record_entry.errors: logger.debug("Ignoring %s because it had previous " "errors." % fac.ref_spec) continue # Make sure the source and the route exist for this page, # otherwise we add errors to the record entry and we'll skip # this page for the rest of the bake. source = self.app.getSource(fac.source.name) if source is None: record_entry.errors.append( "Can't get source for page: %s" % fac.ref_spec) logger.error(record_entry.errors[-1]) continue route = self.app.getRoute(fac.source.name, fac.metadata, skip_taxonomies=True) if route is None: record_entry.errors.append("Can't get route for page: %s" % fac.ref_spec) logger.error(record_entry.errors[-1]) continue # All good, queue the job. job = {'type': JOB_RENDER_FIRST, 'job': save_factory(fac)} jobs.append(job) ar = pool.queueJobs(jobs, handler=_handler) ar.wait()
def _renderRealmPages(self, record, pool, factories): def _handler(res): entry = record.getCurrentEntry(res['path']) if res['errors']: entry.errors += res['errors'] record.current.success = False self._logErrors(res['path'], res['errors']) logger.debug("Rendering %d realm pages..." % len(factories)) with format_timed_scope(logger, "prepared %d pages" % len(factories), level=logging.DEBUG, colored=False, timer_env=self.app.env, timer_category='RenderFirstSubJob'): jobs = [] for fac in factories: record_entry = record.getCurrentEntry(fac.path) if record_entry.errors: logger.debug("Ignoring %s because it had previous " "errors." % fac.ref_spec) continue # Make sure the source and the route exist for this page, # otherwise we add errors to the record entry and we'll skip # this page for the rest of the bake. source = self.app.getSource(fac.source.name) if source is None: record_entry.errors.append( "Can't get source for page: %s" % fac.ref_spec) logger.error(record_entry.errors[-1]) continue route = self.app.getRoute(fac.source.name, fac.metadata, skip_taxonomies=True) if route is None: record_entry.errors.append( "Can't get route for page: %s" % fac.ref_spec) logger.error(record_entry.errors[-1]) continue # All good, queue the job. job = { 'type': JOB_RENDER_FIRST, 'job': save_factory(fac)} jobs.append(job) ar = pool.queueJobs(jobs, handler=_handler) ar.wait()
def bake(self, ctx): if not self.page_ref.exists: logger.debug( "No page found at '%s', skipping taxonomy '%s'." % (self.page_ref, self.taxonomy.name)) return logger.debug("Baking %s pages...", self.taxonomy.name) with format_timed_scope(logger, 'gathered taxonomy terms', level=logging.DEBUG, colored=False): all_terms, dirty_terms = self._buildDirtyTaxonomyTerms(ctx) start_time = time.perf_counter() page_count = self._bakeTaxonomyTerms(ctx, all_terms, dirty_terms) if page_count > 0: logger.info(format_timed( start_time, "baked %d %s pages for %s." % ( page_count, self.taxonomy.term_name, self.source_name)))
def _save_bake_records(records, records_path, *, rotate_previous): if rotate_previous: records_dir, records_fn = os.path.split(records_path) records_id, _ = os.path.splitext(records_fn) for i in range(8, -1, -1): suffix = '' if i == 0 else '.%d' % i records_path_i = os.path.join( records_dir, '%s%s.records' % (records_id, suffix)) if os.path.exists(records_path_i): records_path_next = os.path.join( records_dir, '%s.%s.records' % (records_id, i + 1)) if os.path.exists(records_path_next): os.remove(records_path_next) os.rename(records_path_i, records_path_next) with format_timed_scope(logger, "saved bake records.", level=logging.DEBUG, colored=False): records.save(records_path)
def run(self, src_dir_or_file=None, *, delete=True, previous_record=None, save_record=True): start_time = time.perf_counter() # Get the list of processors for this run. processors = self.app.plugin_loader.getProcessors() if self.enabled_processors is not None: logger.debug("Filtering processors to: %s" % self.enabled_processors) processors = get_filtered_processors(processors, self.enabled_processors) if self.additional_processors_factories is not None: logger.debug("Adding %s additional processors." % len(self.additional_processors_factories)) for proc_fac in self.additional_processors_factories: proc = proc_fac() self.app.env.registerTimer(proc.__class__.__name__, raise_if_registered=False) proc.initialize(self.app) processors.append(proc) # Invoke pre-processors. pipeline_ctx = PipelineContext(-1, self.app, self.out_dir, self.tmp_dir, self.force) for proc in processors: proc.onPipelineStart(pipeline_ctx) # Pre-processors can define additional ignore patterns. self.ignore_patterns += make_re( pipeline_ctx._additional_ignore_patterns) # Create the pipeline record. record = TransitionalProcessorPipelineRecord() record_cache = self.app.cache.getCache('proc') record_name = ( hashlib.md5(self.out_dir.encode('utf8')).hexdigest() + '.record') if previous_record: record.setPrevious(previous_record) elif not self.force and record_cache.has(record_name): with format_timed_scope(logger, 'loaded previous bake record', level=logging.DEBUG, colored=False): record.loadPrevious(record_cache.getCachePath(record_name)) logger.debug("Got %d entries in process record." % len(record.previous.entries)) record.current.success = True record.current.processed_count = 0 # Work! def _handler(res): entry = record.getCurrentEntry(res.path) assert entry is not None entry.flags = res.flags entry.proc_tree = res.proc_tree entry.rel_outputs = res.rel_outputs if entry.flags & FLAG_PROCESSED: record.current.processed_count += 1 if res.errors: entry.errors += res.errors record.current.success = False rel_path = os.path.relpath(res.path, self.app.root_dir) logger.error("Errors found in %s:" % rel_path) for e in entry.errors: logger.error(" " + e) jobs = [] self._process(src_dir_or_file, record, jobs) pool = self._createWorkerPool() ar = pool.queueJobs(jobs, handler=_handler) ar.wait() # Shutdown the workers and get timing information from them. reports = pool.close() record.current.timers = {} for i in range(len(reports)): timers = reports[i] if timers is None: continue worker_name = 'PipelineWorker_%d' % i record.current.timers[worker_name] = {} for name, val in timers['data'].items(): main_val = record.current.timers.setdefault(name, 0) record.current.timers[name] = main_val + val record.current.timers[worker_name][name] = val # Invoke post-processors. pipeline_ctx.record = record.current for proc in processors: proc.onPipelineEnd(pipeline_ctx) # Handle deletions. if delete: for path, reason in record.getDeletions(): logger.debug("Removing '%s': %s" % (path, reason)) try: os.remove(path) except FileNotFoundError: pass logger.info('[delete] %s' % path) # Finalize the process record. record.current.process_time = time.time() record.current.out_dir = self.out_dir record.collapseRecords() # Save the process record. if save_record: with format_timed_scope(logger, 'saved bake record', level=logging.DEBUG, colored=False): record.saveCurrent(record_cache.getCachePath(record_name)) logger.info(format_timed( start_time, "processed %d assets." % record.current.processed_count)) return record.detach()
def run(self, src_dir_or_file=None, *, delete=True, previous_record=None, save_record=True): start_time = time.perf_counter() # Get the list of processors for this run. processors = self.app.plugin_loader.getProcessors() if self.enabled_processors is not None: logger.debug("Filtering processors to: %s" % self.enabled_processors) processors = get_filtered_processors(processors, self.enabled_processors) if self.additional_processors_factories is not None: logger.debug("Adding %s additional processors." % len(self.additional_processors_factories)) for proc_fac in self.additional_processors_factories: proc = proc_fac() self.app.env.registerTimer(proc.__class__.__name__, raise_if_registered=False) proc.initialize(self.app) processors.append(proc) # Invoke pre-processors. pipeline_ctx = PipelineContext(-1, self.app, self.out_dir, self.tmp_dir, self.force) for proc in processors: proc.onPipelineStart(pipeline_ctx) # Pre-processors can define additional ignore patterns. self.ignore_patterns += make_re( pipeline_ctx._additional_ignore_patterns) # Create the pipeline record. record = TransitionalProcessorPipelineRecord() record_cache = self.app.cache.getCache('proc') record_name = (hashlib.md5(self.out_dir.encode('utf8')).hexdigest() + '.record') if previous_record: record.setPrevious(previous_record) elif not self.force and record_cache.has(record_name): with format_timed_scope(logger, 'loaded previous bake record', level=logging.DEBUG, colored=False): record.loadPrevious(record_cache.getCachePath(record_name)) logger.debug("Got %d entries in process record." % len(record.previous.entries)) record.current.success = True record.current.processed_count = 0 # Work! def _handler(res): entry = record.getCurrentEntry(res.path) assert entry is not None entry.flags = res.flags entry.proc_tree = res.proc_tree entry.rel_outputs = res.rel_outputs if entry.flags & FLAG_PROCESSED: record.current.processed_count += 1 if res.errors: entry.errors += res.errors record.current.success = False rel_path = os.path.relpath(res.path, self.app.root_dir) logger.error("Errors found in %s:" % rel_path) for e in entry.errors: logger.error(" " + e) jobs = [] self._process(src_dir_or_file, record, jobs) pool = self._createWorkerPool() ar = pool.queueJobs(jobs, handler=_handler) ar.wait() # Shutdown the workers and get timing information from them. reports = pool.close() record.current.timers = {} for i in range(len(reports)): timers = reports[i] if timers is None: continue worker_name = 'PipelineWorker_%d' % i record.current.timers[worker_name] = {} for name, val in timers['data'].items(): main_val = record.current.timers.setdefault(name, 0) record.current.timers[name] = main_val + val record.current.timers[worker_name][name] = val # Invoke post-processors. pipeline_ctx.record = record.current for proc in processors: proc.onPipelineEnd(pipeline_ctx) # Handle deletions. if delete: for path, reason in record.getDeletions(): logger.debug("Removing '%s': %s" % (path, reason)) try: os.remove(path) except FileNotFoundError: pass logger.info('[delete] %s' % path) # Finalize the process record. record.current.process_time = time.time() record.current.out_dir = self.out_dir record.collapseRecords() # Save the process record. if save_record: with format_timed_scope(logger, 'saved bake record', level=logging.DEBUG, colored=False): record.saveCurrent(record_cache.getCachePath(record_name)) logger.info( format_timed( start_time, "processed %d assets." % record.current.processed_count)) return record.detach()
def bake(self): logger.debug(" Bake Output: %s" % self.out_dir) logger.debug(" Root URL: %s" % self.app.config.get('site/root')) # Get into bake mode. start_time = time.perf_counter() self.app.config.set('baker/is_baking', True) self.app.env.base_asset_url_format = '%uri%' # Make sure the output directory exists. if not os.path.isdir(self.out_dir): os.makedirs(self.out_dir, 0o755) # Load/create the bake record. record = TransitionalBakeRecord() record_cache = self.app.cache.getCache('baker') record_id = hashlib.md5(self.out_dir.encode('utf8')).hexdigest() record_name = record_id + '.record' previous_record_path = None if not self.force and record_cache.has(record_name): with format_timed_scope(logger, "loaded previous bake record", level=logging.DEBUG, colored=False): previous_record_path = record_cache.getCachePath(record_name) record.loadPrevious(previous_record_path) record.current.success = True # Figure out if we need to clean the cache because important things # have changed. is_cache_valid = self._handleCacheValidity(record) if not is_cache_valid: previous_record_path = None # Pre-create all caches. for cache_name in ['app', 'baker', 'pages', 'renders']: self.app.cache.getCache(cache_name) # Gather all sources by realm -- we're going to bake each realm # separately so we can handle "overriding" (i.e. one realm overrides # another realm's pages, like the user realm overriding the theme # realm). sources_by_realm = {} for source in self.app.sources: srclist = sources_by_realm.setdefault(source.realm, []) srclist.append(source) # Create the worker processes. pool = self._createWorkerPool(previous_record_path) # Bake the realms. realm_list = [REALM_USER, REALM_THEME] for realm in realm_list: srclist = sources_by_realm.get(realm) if srclist is not None: self._bakeRealm(record, pool, realm, srclist) # Bake taxonomies. self._bakeTaxonomies(record, pool) # All done with the workers. Close the pool and get timing reports. reports = pool.close() record.current.timers = {} for i in range(len(reports)): timers = reports[i] if timers is None: continue worker_name = 'BakeWorker_%d' % i record.current.timers[worker_name] = {} for name, val in timers['data'].items(): main_val = record.current.timers.setdefault(name, 0) record.current.timers[name] = main_val + val record.current.timers[worker_name][name] = val # Delete files from the output. self._handleDeletetions(record) # Backup previous records. for i in range(8, -1, -1): suffix = '' if i == 0 else '.%d' % i record_path = record_cache.getCachePath('%s%s.record' % (record_id, suffix)) if os.path.exists(record_path): record_path_next = record_cache.getCachePath( '%s.%s.record' % (record_id, i + 1)) if os.path.exists(record_path_next): os.remove(record_path_next) os.rename(record_path, record_path_next) # Save the bake record. with format_timed_scope(logger, "saved bake record.", level=logging.DEBUG, colored=False): record.current.bake_time = time.time() record.current.out_dir = self.out_dir record.saveCurrent(record_cache.getCachePath(record_name)) # All done. self.app.config.set('baker/is_baking', False) logger.debug(format_timed(start_time, 'done baking')) return record.detach()
def bake(self): start_time = time.perf_counter() # Setup baker. logger.debug(" Bake Output: %s" % self.out_dir) logger.debug(" Root URL: %s" % self.app.config.get('site/root')) # Get into bake mode. self.app.config.set('baker/is_baking', True) self.app.config.set('site/asset_url_format', '%page_uri%/%filename%') stats = self.app.env.stats stats.registerTimer('LoadSourceContents', raise_if_registered=False) stats.registerTimer('CacheTemplates', raise_if_registered=False) # Make sure the output directory exists. if not os.path.isdir(self.out_dir): os.makedirs(self.out_dir, 0o755) # Load/create the bake records. records_path = get_bake_records_path( self.app, self.out_dir) if not self.force and os.path.isfile(records_path): with format_timed_scope(logger, "loaded previous bake records", level=logging.DEBUG, colored=False): previous_records = load_records(records_path) else: previous_records = MultiRecord() current_records = MultiRecord() # Figure out if we need to clean the cache because important things # have changed. is_cache_valid = self._handleCacheValidity(previous_records, current_records) if not is_cache_valid: previous_records = MultiRecord() # Create the bake records history which tracks what's up-to-date # or not since last time we baked to the given output folder. record_histories = MultiRecordHistory( previous_records, current_records) # Pre-create all caches. for cache_name in ['app', 'baker', 'pages', 'renders']: self.app.cache.getCache(cache_name) # Create the pipelines. ppmngr = self._createPipelineManager(record_histories) # Done with all the setup, let's start the actual work. logger.info(format_timed(start_time, "setup baker")) # Load all sources, pre-cache templates. load_start_time = time.perf_counter() self._populateTemplateCaches() logger.info(format_timed(load_start_time, "cache templates")) # Create the worker processes. pool_userdata = _PoolUserData(self, ppmngr) pool = self._createWorkerPool(records_path, pool_userdata) # Bake the realms. self._bakeRealms(pool, ppmngr, record_histories) # Handle deletions, collapse records, etc. ppmngr.postJobRun() ppmngr.deleteStaleOutputs() ppmngr.collapseRecords(self.keep_unused_records) # All done with the workers. Close the pool and get reports. pool_stats = pool.close() current_records.stats = _merge_execution_stats(stats, *pool_stats) # Shutdown the pipelines. ppmngr.shutdownPipelines() # Backup previous records, save the current ones. current_records.bake_time = time.time() current_records.out_dir = self.out_dir _save_bake_records(current_records, records_path, rotate_previous=self.rotate_bake_records) # All done. self.app.config.set('baker/is_baking', False) logger.debug(format_timed(start_time, 'done baking')) return current_records
def bake(self): logger.debug(" Bake Output: %s" % self.out_dir) logger.debug(" Root URL: %s" % self.app.config.get('site/root')) # Get into bake mode. start_time = time.perf_counter() self.app.config.set('baker/is_baking', True) self.app.env.base_asset_url_format = '%uri%' # Make sure the output directory exists. if not os.path.isdir(self.out_dir): os.makedirs(self.out_dir, 0o755) # Load/create the bake record. record = TransitionalBakeRecord() record_cache = self.app.cache.getCache('baker') record_id = hashlib.md5(self.out_dir.encode('utf8')).hexdigest() record_name = record_id + '.record' previous_record_path = None if not self.force and record_cache.has(record_name): with format_timed_scope(logger, "loaded previous bake record", level=logging.DEBUG, colored=False): previous_record_path = record_cache.getCachePath(record_name) record.loadPrevious(previous_record_path) record.current.success = True # Figure out if we need to clean the cache because important things # have changed. is_cache_valid = self._handleCacheValidity(record) if not is_cache_valid: previous_record_path = None # Pre-create all caches. for cache_name in ['app', 'baker', 'pages', 'renders']: self.app.cache.getCache(cache_name) # Gather all sources by realm -- we're going to bake each realm # separately so we can handle "overriding" (i.e. one realm overrides # another realm's pages, like the user realm overriding the theme # realm). sources_by_realm = {} for source in self.app.sources: srclist = sources_by_realm.setdefault(source.realm, []) srclist.append(source) # Create the worker processes. pool = self._createWorkerPool(previous_record_path) # Bake the realms. realm_list = [REALM_USER, REALM_THEME] for realm in realm_list: srclist = sources_by_realm.get(realm) if srclist is not None: self._bakeRealm(record, pool, realm, srclist) # Bake taxonomies. self._bakeTaxonomies(record, pool) # All done with the workers. Close the pool and get timing reports. reports = pool.close() record.current.timers = {} for i in range(len(reports)): timers = reports[i] if timers is None: continue worker_name = 'BakeWorker_%d' % i record.current.timers[worker_name] = {} for name, val in timers['data'].items(): main_val = record.current.timers.setdefault(name, 0) record.current.timers[name] = main_val + val record.current.timers[worker_name][name] = val # Delete files from the output. self._handleDeletetions(record) # Backup previous records. for i in range(8, -1, -1): suffix = '' if i == 0 else '.%d' % i record_path = record_cache.getCachePath( '%s%s.record' % (record_id, suffix)) if os.path.exists(record_path): record_path_next = record_cache.getCachePath( '%s.%s.record' % (record_id, i + 1)) if os.path.exists(record_path_next): os.remove(record_path_next) os.rename(record_path, record_path_next) # Save the bake record. with format_timed_scope(logger, "saved bake record.", level=logging.DEBUG, colored=False): record.current.bake_time = time.time() record.current.out_dir = self.out_dir record.saveCurrent(record_cache.getCachePath(record_name)) # All done. self.app.config.set('baker/is_baking', False) logger.debug(format_timed(start_time, 'done baking')) return record.detach()