# Group paragraphs into dict of chapters def group_by_chapter(p): return p.part, p.chapter quotes = {} for key, grp in itertools.groupby(paragraphs, key=group_by_chapter): quotes[key] = list(grp) # Collecting statistics logger.info('Total %d chapters' % len(quotes.keys())) logger.debug('with chapters: %s ' % ', '.join(map(str, sorted(quotes.keys())))) logger.debug('Total paragraphs: %d' % len(paragraphs)) out_pickle_pth = './parsed_1984.pkl' logger.info('Exported to pickle %s' % out_pickle_pth) with open(out_pickle_pth, 'wb') as f: pickle.dump(quotes, f) logger.info('Program ends') if __name__ == '__main__': from utils import colorify_log_handler ch = logging.StreamHandler() colorify_log_handler(ch) root_logger = logging.getLogger() root_logger.setLevel(logging.DEBUG) root_logger.addHandler(ch) main()
logger.info('All coroutines complete in {:.2f} seconds'.format( (t_end - t_start).total_seconds() )) quotes = [fut.result() for fut in futures] return quotes def main(): loop = asyncio.get_event_loop() quotes = loop.run_until_complete( quote_many(2000, conn_limit=100, step=20) ) loop.close() out_pth = 'quotes.txt' logger.info('Write quotes to %s' % out_pth) with open(out_pth, 'w') as f: for quote in quotes: print(quote, file=f) logger.info('Done. Program ends.') if __name__ == '__main__': ch = logging.StreamHandler() colorify_log_handler(ch, time_fmt='%H:%M:%S') root_logger = logging.getLogger() root_logger.setLevel(logging.DEBUG) root_logger.addHandler(ch) logger.info(PY35_GREETINGS) main()