def main(): if len(sys.argv) != 3: print("USAGE: {} <http URL> <redis URL>".format(sys.argv[0])) sys.exit(1) context = Context() context.success = False url = sys.argv[1] if url[:7].lower() != "http://" and url[:8].lower() != "https://": url = "https://" + url context.http_url = url url = sys.argv[2] if url[:8].lower() != "redis://" and url[:9].lower() != "rediss://": url = "redis://" + url context.redis_url = url task = wf.create_http_task(context.http_url, 3, 2, http_callback) req = task.get_req() req.add_header_pair("Accept", "*/*") req.add_header_pair("User-Agent", "Wget/1.14 (linux-gnu)") req.add_header_pair("Connection", "close") # Limit the http response to size 20M task.get_resp().set_size_limit(20 * 1024 * 1024) # no more than 30 seconds receiving http response task.set_receive_timeout(30 * 1000) series = wf.create_series_work(task, series_callback) series.set_context(context) series.start() wf.wait_finish()
def process(t): req = t.get_req() series = wf.series_of(t) ctx = Context() ctx.url = req.get_request_uri() ctx.proxy_task = t series.set_context(ctx) ctx.is_keep_alive = req.is_keep_alive() http_task = wf.create_http_task(req.get_request_uri(), 0, 0, http_callback) req.set_request_uri(http_task.get_req().get_request_uri()) req.move_to(http_task.get_req()) http_task.get_resp().set_size_limit(200 * 1024 * 1024) series << http_task
def main(): if len(sys.argv) != 2: print("USAGE: {} <http URL>".format(sys.argv[0])) sys.exit(1) url = sys.argv[1] if url[:7].lower() != "http://" and url[:8].lower() != "https://": url = "http://" + url task = wf.create_http_task(url, redirect_max=4, retry_max=2, callback=wget_callback) req = task.get_req() req.add_header_pair("Accept", "*/*") req.add_header_pair("User-Agent", "Wget/1.14 (linux-gnu)") req.add_header_pair("Connection", "close") task.start() wf.wait_finish()
def main(): parallel_work = wf.create_parallel_work(parallel_callback) for url in sys.argv[1:]: if (url[:7].lower() != "http://" and url[:8].lower() != "https://"): url = "http://" + url task = wf.create_http_task(url, redirect_max=4, retry_max=2, callback=http_callback) req = task.get_req() req.add_header_pair("Accept", "*/*") req.add_header_pair("User-Agent", "Wget/1.14 (linux-gnu)") req.add_header_pair("Connection", "close") ctx = Context() ctx.url = url series = wf.create_series_work(task, None) series.set_context(ctx) parallel_work.add_series(series) wf.start_series_work(parallel_work, None) wf.wait()