def crawl_handler_perf_test(): file_path = "normalize_urls.tsv" urls = misc.load_file(file_path) for url in urls: message = { "url" : url, "source" : "parsed", "crawl_priority" : 1, "root_url" : "http://www.baidu.com/", "parent_url" : "http://www.baidu.com/", "crawl_depth" : 2, } ccrawler.handler.handler.HandlerRepository.process("crawl_request", message)
def valid_crawl_url_perf_test(): file_path = "./normalize_urls.tsv" urls = misc.load_file(file_path) for url in urls: CrawlUrlHelper.valid_crawl_url(url, None)