def feed_event(self, event): futures = [] with concurrent.futures.ProcessPoolExecutor(max_workers=8) as executor: ################################################################### # This needs to be first, as other tasks will need to write in # # the resulting folders. # ################################################################### # Depends on folder: 'static/' if event & STATIC_FOLDER: create_site_structure(static_path=STATIC_PATH) print_progress(text='Create _site') ################################################################### # We then reload data in memory, before generating the site # ################################################################### # Depends on folder: 'data/' if self.data_source is None or event & DATA_FOLDER: # class where all data can be accessed from data_source = DataSource() print_progress(text='Load data sources') # Depends on: 'blog/' if self.blog_posts is None or event & BLOG_FOLDER: self.blog_posts = load_blog_posts() print_progress(text='Load blog posts') ################################################################### # Once site structure has been created and data is refreshed, we # # can build all parts of the site in parallel, since there is no # # dependencies between them. # ################################################################### # Depends on: 'templates/', 'data/' if event & DATA_FOLDER or event & TEMPLATES_FOLDER: print_progress(text='Generate error pages') copy_custom_error_pages(data=data_source) def batched_job(inp, batch_fn, batch_size, message): batches = [] input_size = len(inp) for batch in [ inp[i:i + batch_size] for i in range(0, input_size, batch_size) ]: submission = executor.submit(batch_fn, batch=batch) batches.append(submission) futures.append(submission) for i, f in enumerate( concurrent.futures.as_completed(batches)): print_progress( text= f"{message} {min((i+1) * batch_size, input_size)}/{input_size}" ) return batches # Explorer: depends on 'data/' if event & DATA_FOLDER or event & STATIC_FOLDER: futures.append(executor.submit(build_explorer, )) # Depends on: 'data/', 'blog/', 'templates/' if event & DATA_FOLDER or event & BLOG_FOLDER or event & TEMPLATES_FOLDER: futures.append( executor.submit(generate_sitemap, blog_posts=self.blog_posts)) # Depends on: 'data/', 'templates/' if event & DATA_FOLDER or event & TEMPLATES_FOLDER: # Home build_home(data=data_source) build_privacy_policy(data=data_source) # Trackers trackers = [id for id, _ in data_source.trackers.iter()] batched_job(trackers, build_tracker_page_batch, 150, "Generate tracker pages") build_trackers_list(data=data_source) # Websites websites = list( enumerate([id for id, _ in data_source.sites.iter()])) batched_job(websites, build_website_pages_batch, 400, "Generate website pages") build_website_list(data=data_source) # Companies build_company_reach_chart_page(data=data_source) # Depends on: 'data/', 'blog/', 'templates/' if event & DATA_FOLDER or event & BLOG_FOLDER or event & TEMPLATES_FOLDER: futures.append( executor.submit(build_blogpost_pages, blog_posts=self.blog_posts)) futures.append( executor.submit(build_rss_feeds, blog_posts=self.blog_posts)) build_blogpost_list(data=data_source, blog_posts=self.blog_posts) if event & DATA_FOLDER: build_tracker_db() trackers = [id for id, _ in data_source.trackers.iter()] data_dir = Path('_site/data/trackers/global') if not data_dir.exists(): data_dir.mkdir(parents=True) batched_job(trackers, build_tracker_api_batch, 150, "Generate Tracker API pages") site_data_dir = Path('_site/data/sites/global') if not site_data_dir.exists(): site_data_dir.mkdir(parents=True) sites = [id for id, _ in data_source.sites.iter()] batched_job(sites, build_website_api_batch, 400, "Generate Website API pages") # TODO: uncomment when company profiles are ready # if args['site'] or args['companies']: # company_process = Process(target=build_company_pages, args=(data_source,)) # company_process.start() # Wait for all jobs to finish concurrent.futures.wait(futures) # Getting the `result` of each promise (although none is expected) # allows to re-raise exception happening in children processes. If # we don't do it, exceptions will be silently ignored. for future in futures: future.result() print('Done')
def feed_event(self, event): futures = [] with concurrent.futures.ThreadPoolExecutor() as executor: ################################################################### # This needs to be first, as other tasks will need to write in # # the resulting folders. # ################################################################### # Depends on folder: 'static/' if event & STATIC_FOLDER: create_site_structure(static_path=STATIC_PATH) print_progress(text='Create _site') ################################################################### # We then reload data in memory, before generating the site # ################################################################### # Depends on folder: 'data/' if self.data_source is None or event & DATA_FOLDER: # class where all data can be accessed from data_source = DataSource() print_progress(text='Load data sources') # Depends on: 'blog/' if self.blog_posts is None or event & BLOG_FOLDER: self.blog_posts = load_blog_posts() print_progress(text='Load blog posts') ################################################################### # Once site structure has been created and data is refreshed, we # # can build all parts of the site in parallel, since there is no # # dependencies between them. # ################################################################### # Depends on: 'templates/', 'data/' if event & DATA_FOLDER or event & TEMPLATES_FOLDER: print_progress(text='Generate error pages') copy_custom_error_pages(data=data_source) # Depends on: 'data/', 'templates/' if event & DATA_FOLDER or event & TEMPLATES_FOLDER: # Home futures.append(executor.submit(build_home, data=data_source)) # Trackers futures.append( executor.submit(build_trackers_list, data=data_source)) futures.append( executor.submit(build_tracker_pages, data=data_source)) # Websites futures.append( executor.submit(build_website_list, data=data_source)) futures.append( executor.submit(build_website_pages, data=data_source)) # Depends on: 'data/', 'blog/', 'templates/' if event & DATA_FOLDER or event & BLOG_FOLDER or event & TEMPLATES_FOLDER: futures.append( executor.submit(build_blogpost_list, data=data_source, blog_posts=self.blog_posts)) futures.append( executor.submit(build_blogpost_pages, data=data_source, blog_posts=self.blog_posts)) # Depends on: 'data/', 'blog/', 'templates/' if event & DATA_FOLDER or event & BLOG_FOLDER or event & TEMPLATES_FOLDER: futures.append( executor.submit(generate_sitemap, data=data_source, blog_posts=self.blog_posts)) # TODO: uncomment when company profiles are ready # if args['site'] or args['companies']: # company_process = Process(target=build_company_pages, args=(data_source,)) # company_process.start() # Wait for all jobs to finish concurrent.futures.wait(futures) # Getting the `result` of each promise (although none is expected) # allows to re-raise exception happening in children processes. If # we don't do it, exceptions will be silently ignored. for future in futures: future.result() print('Done')