def process_sessions_real(coordinators, updates_directory, index_filename, pickle_root, result_pickle_root, num_workers=None): if num_workers != 0: pool = Pool(processes=num_workers) session_context_manager = SessionContextManager() session_context_manager.declare_persistent_state( 'filenames_processed', set, None) session_context_manager.declare_persistent_state( 'last_sequence_number_processed', return_negative_one, None) for coordinator in coordinators: for name, (init_func, merge_func) \ in coordinator.persistent_state.iteritems(): session_context_manager.declare_persistent_state( name, init_func, merge_func) for name, (init_func, merge_func) \ in coordinator.ephemeral_state.iteritems(): session_context_manager.declare_ephemeral_state( name, init_func, merge_func) print 'Preparing processors' process_args = [] index = UpdatesIndex(index_filename) for session in index.sessions: processors = [] for coordinator in coordinators: processors.append(coordinator.create_processor(session)) update_files = index.session_data(session) process_args.append((session, session_context_manager, pickle_root, result_pickle_root, processors, update_files, updates_directory)) print 'Processing sessions' global_context = GlobalContext() if num_workers == 0: for args in process_args: pickle_path = process_session_wrapper(args) session_context = session_context_manager.load_context(pickle_path) session_context_manager.merge_contexts(session_context, global_context) del session_context else: results = pool.imap_unordered(process_session_wrapper, process_args) for pickle_path in results: session_context = session_context_manager.load_context(pickle_path) session_context_manager.merge_contexts(session_context, global_context) del session_context pool.close() pool.join() print 'Post-processing' for coordinator in coordinators: coordinator.finished_processing(global_context)