def main(): #global qtimes,qerr usage="<program> times_to_extend_url_list processes iterations" if len(sys.argv) < 4: print("Not enough arguments. "+usage) return times_to_extend_url_list = int(sys.argv[1]) processes=int(sys.argv[2]) iterations=int(sys.argv[3]) urls = nextend(settings.URLS, times_to_extend_url_list) random.shuffle(urls) n = len(urls) timeout_secs=max(n/10, 60) logger = dry.logger.setup_log_size_rotating("log/urlclient_gevent_pool.log", logname="urlclientgeventpool") logger.info("START|times_to_extend_url_list:{i}|processes:{p}|timeout:{t}|urls:{n}|type:gevent".format(i=times_to_extend_url_list, p=processes, t=timeout_secs, n=n)) pool = Pool(max(processes, 4)) qurltime = queue.Queue() elapsed_time = [] for i in range(iterations): start = time.time() try: jobs = [pool.spawn(load_url, url, logger, logstatus=True) for url in urls] # jobs = [pool.spawn(load_url, url, logger, timeout=60, logstatus=True) for url in urls] pool.join(timeout=timeout_secs) except Exception as e: logger.error(e) elapsed_time.append(time.time() - start) elapsed_time_str = "" for t in elapsed_time: elapsed_time_str += str(t)+"," elapsed_time_str.strip,(",") print("g,{u},{np},{et}".format(u=len(urls), np=processes, et=elapsed_time_str))
def main(): start = time.time() logger = dry.logger.setup_log_size_rotating("log/bigfile_futures_threadpool.log", logname='bigfilefuturesthreads') logger.info("START") elapsed_time = [] sfile = settings.BIG_FILE fsize = os.path.getsize(sfile) with open(sfile, "r") as fh: #A list of tuples (chunk_start, chunk_size) chunks = size_chunks(fh, fsize, num_chunks=settings.BIGFILE_FUTURES_CHUNKS) pattern = re.compile(settings.TARGET_USERNAME) file_handles = [] for j in range(len(chunks)): file_handles.append(open(sfile, "r")) with futures.ThreadPoolExecutor(max_workers=settings.BIGFILE_FUTURES_CHUNKS) as executor: future_to_chunk = dict( (executor.submit(find_noq, file_handles[i], chunks[i], pattern), "") \ for i in range(len(chunks)) ) recsmatch = 0 try: for future in futures.as_completed(future_to_chunk, timeout=60): recsmatch += future.result() except Exception as e: #traceback.print_exc(file=sys.stdout) logger.error("recsmatch={m} e={e}".format(m=recsmatch, e=e)) return elapsed_time.append(time.time() - start) elapsed_time_str = "" for t in elapsed_time: elapsed_time_str += str(t)+"," elapsed_time_str.strip,(",") print("{r}".format(r=recsmatch)) logger.info("STOP|elapsedtime:{et}|recsmatch:{r}".format(et=elapsed_time, r=recsmatch))
def main(): usage="urlclient_futures t|p times_to_extend_url_list processes iterations" if len(sys.argv) < 5: print("Not enough arguments. "+usage) return if sys.argv[1] .lower() == "t": process_type="threads" elif sys.argv[1].lower() == "p": process_type = "mp" else: print("Must specify t or p for thread or process as first argument.") print(usage) return times_to_extend_url_list = int(sys.argv[2]) processes=int(sys.argv[3]) iterations = int(sys.argv[4]) urls = nextend(settings.URLS, times_to_extend_url_list) n = len(urls) timeout_secs=max(n*3, 60) logger = dry.logger.setup_log_size_rotating("log/urlclient_futures.log", logname='urlclientfutures') logger.info("times_to_extend_url_list={i} processes={p} timeout={t} n={n} type={y}".format(i=times_to_extend_url_list, p=processes, t=timeout_secs, n=n, y=process_type)) elapsed_time = [] for i in range(iterations): j = 0 start = time.time() if process_type == "threads": with futures.ThreadPoolExecutor(max_workers=processes) as executor: future_to_url = dict( (executor.submit(load_url, url, logger, timeout_secs=30,logstatus=True), url) for url in urls ) else: #urls=urls[:9] try: with futures.ProcessPoolExecutor(max_workers=processes) as executor: future_to_url = dict( (executor.submit(load_url_mp, url, timeout_secs=10), url) for url in urls ) except Exception as e: print(e) #logger.error(e) try: for future in futures.as_completed(future_to_url, timeout=timeout_secs): j += 1 url = future_to_url[future] except Exception as e: #traceback.print_exc(file=sys.stdout) logger.error("j={j} url={u} e={e}".format(j=j, e=e, u=url)) elapsed_time.append(time.time() - start) elapsed_time_str = "" for t in elapsed_time: elapsed_time_str += str(t)+"," elapsed_time_str.strip,(",") print("{pt},{u},{np},{et}".format(pt=process_type, u=len(urls), np=processes, et=elapsed_time_str))