def run_all_worker(): try: # Producer is on !!! url_jobs = mp.JoinableQueue() topic_results = mp.JoinableQueue() article_results = mp.JoinableQueue() for _ in range(6): # different process, different speed parse_article_proc = mp.Process(target=wxarticle_generator, args=(url_jobs, article_results)) parse_article_proc.daemon = True parse_article_proc.start() write_topic_proc = mp.Process(target=topic_db_writer, args=(topic_results, )) write_topic_proc.daemon = True write_topic_proc.start() for _ in range(6): # 4 processes to write article info into db write_article_proc = mp.Process(target=article_db_writer, args=(article_results, )) write_article_proc.daemon = True write_article_proc.start() try: seven_days_ago = (dt.today() - timedelta(6)).strftime("%Y-%m-%d") cp = mp.current_process() print dt.now().strftime( "%Y-%m-%d %H:%M:%S"), "Run All Word Process pid is %d" % ( cp.pid) conn = connect_database() if not conn: return False list_of_kw = read_topics_from_db(conn.cursor(), seven_days_ago)[::-1] wxurl_generator(list_of_kw, url_jobs, topic_results) topic_results.join() article_results.join() url_jobs.join() # if url_jobs.empty(): # print "-"*20, "url_jobs is empty ..." # if topic_results.empty(): # print "-"*20, "topic_results is empty ..." # if article_results.empty(): # print "-"*20, "article_results is empty ..." except mdb.OperationalError as e: traceback.print_exc() print dt.now().strftime("%Y-%m-%d %H:%M:%S") except Exception as e: traceback.print_exc() finally: conn.close() return True except Exception as e: traceback.print_exc() print dt.now().strftime( "%Y-%m-%d %H:%M:%S"), "Exception raise in Rn all Work" except KeyboardInterrupt: print dt.now().strftime( "%Y-%m-%d %H:%M:%S" ), "Interrupted by you and quit in force, but save the results"
def test_parse_baidu_results(): try: conn = connect_database() if not conn: return False list_of_kw = read_topics_from_db(conn) for kw in list_of_kw: for dr in DATE_ERANGES: # do 3 times search, look whether solve the problem: day>week>month baidu_result = parse_baidu_search_page(kw, dr) print baidu_result['data']['search_url'], baidu_result['data'][ 'hit_num'] except Exception as e: traceback.print_exc() finally: conn.close()
def run_all_worker(concurrency=5): try: # Producer is on !!! conn = connect_database() list_of_kw = read_topics_from_db(conn)[:100] pool = MPool(concurrency) # Processes pool pool.map(wxurl_generator, list_of_kw) # Keep up generate keywords pool.close() pool.join() # why join except KeyboardInterrupt: print "Interrupted by you and quit in force, but save the results" # Consummer followes wxarticle_generator() article_db_writer() topic_db_writer()
def add_topic_jobs(target, start_date): todo = 0 try: conn = connect_database() if not conn: return False list_of_kw = read_topics_from_db(conn.cursor(), start_date) for kw in list_of_kw: todo += 1 target.put(kw) except mdb.OperationalError as e: traceback.print_exc() print dt.now().strftime("%Y-%m-%d %H:%M:%S") except Exception as e: traceback.print_exc() finally: conn.close() return todo
def add_topic_jobs(target): todo = 0 one_week_ago = (dt.today() - timedelta(6)).strftime("%Y-%m-%d") try: conn = connect_database() if not conn: return False list_of_kw = read_topics_from_db(conn, one_week_ago) for kw in list_of_kw: todo += 1 target.put(kw) except mdb.OperationalError as e: traceback.print_exc() print dt.now().strftime("%Y-%m-%d %H:%M:%S") except Exception as e: traceback.print_exc() finally: conn.close() return todo