Exemplos de Crawler.close em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: crawler.crawler

Classe / Tipo: Crawler

Método / Função: close

Exemplos em hotexamples.com: 7

Crawler.close em Python - 7 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de crawler.crawler.Crawler.close em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

Crawler(30)

crawl(20)

close(7)

run(6)

start(5)

load_and_tokenize(3)

__init__(3)

attempt_login(2)

stop(2)

fetch_stock_data(2)

scrape(2)

max_depth(1)

retrieve_user_comments(1)

raw_report(1)

report(1)

retrieve_information(1)

retrieve_total_user_comments_score(1)

retrieve_user_avg_karma(1)

progress_bar(1)

scan(1)

retrieve_user_posts(1)

return_all_content(1)

save_found_weburls(1)

launch(1)

scrape_links(1)

search(1)

soupify(1)

start_bfs(1)

start_dfs(1)

start_poll(1)

steps_count(1)

learn(1)

get_tag_by_id(1)

get_user_by_post_id(1)

getMostFrequentWords(1)

add_rules(1)

add_seeds(1)

build_post_data(1)

crawl_dest(1)

crawl_files(1)

crawl_next_url(1)

create_remote_dir(1)

documents(1)

fetch_case_detail_link(1)

fill_disallow_urls(1)

find_all_urls(1)

getText(1)

get_url(1)

get_8k_form(1)

get_dependency_list(1)

Métodos Frequentes

Crawler (30)

crawl (20)

close (7)

run (6)

start (5)

load_and_tokenize (3)

__init__ (3)

attempt_login (2)

stop (2)

fetch_stock_data (2)

Métodos Frequentes

scrape (2)

max_depth (1)

retrieve_user_comments (1)

raw_report (1)

report (1)

retrieve_information (1)

retrieve_total_user_comments_score (1)

retrieve_user_avg_karma (1)

progress_bar (1)

scan (1)

retrieve_user_posts (1)

return_all_content (1)

save_found_weburls (1)

launch (1)

scrape_links (1)

search (1)

soupify (1)

start_bfs (1)

start_dfs (1)

start_poll (1)

Métodos Frequentes

retrieve_user_posts (1)

return_all_content (1)

save_found_weburls (1)

launch (1)

scrape_links (1)

search (1)

soupify (1)

start_bfs (1)

start_dfs (1)

start_poll (1)

steps_count (1)

learn (1)

get_tag_by_id (1)

get_user_by_post_id (1)

getMostFrequentWords (1)

add_rules (1)

add_seeds (1)

build_post_data (1)

crawl_dest (1)

crawl_files (1)

crawl_next_url (1)

create_remote_dir (1)

documents (1)

fetch_case_detail_link (1)

fill_disallow_urls (1)

find_all_urls (1)

getText (1)

get_url (1)

get_8k_form (1)

get_dependency_list (1)

Métodos Frequentes

steps_count (1)

learn (1)

get_tag_by_id (1)

get_user_by_post_id (1)

getMostFrequentWords (1)

add_rules (1)

add_seeds (1)

build_post_data (1)

crawl_dest (1)

crawl_files (1)

crawl_next_url (1)

create_remote_dir (1)

documents (1)

fetch_case_detail_link (1)

fill_disallow_urls (1)

find_all_urls (1)

getText (1)

get_url (1)

get_8k_form (1)

get_dependency_list (1)

get_document (1)

get_html (1)

get_html_with_cookie (1)

get_links (1)

get_master_indices (1)

get_post_date (1)

get_post_id_list (1)

get_post_msg (1)

get_request (1)

to_csv (1)

Exemplo n.º 1

0

Exibir arquivo

def test_fill_disallow_urls_from_robot(self): with patch.object(requests, 'get') as mock_get: with open('fake_robots.txt', 'r') as fake_robots_txt: mock_get.return_value = FakeResponse() mock_get.return_value.text = fake_robots_txt.read() test_crawler = Crawler( 'https://a/', [''], {}) test_crawler.fill_disallow_urls(URL('https://a/')) test_crawler.close() self.assertEqual({re.compile('https://a/b.+', re.IGNORECASE)}, test_crawler.disallow_urls)

Exemplo n.º 2

0

Exibir arquivo

def test_searcher_with_seen_urls(self): with patch.object(Crawler, 'get_html') as mock_get_html: mock_get_html.return_value = '<a href=http://scala-lang.org></a>' \ '<a href=https://scala11.html></a>' with patch.object(Crawler, 'write_html') as mock_write_html: mock_write_html.return_value = None test_crawler = Crawler( 'https://docs.scala-lang.org/ru/tour/tour-of-scala.html', ['scala'], {}, 2) test_crawler.seen_urls.add(Page(URL('http://scala-lang.org'))) test_result = test_crawler.crawl() test_crawler.close() assert 'http://scala-lang.org' not in test_result

Exemplo n.º 3

0

Exibir arquivo

def test_crawler_zero_result(self): with patch.object(Crawler, 'get_html') as mock_get_html: mock_get_html.return_value = '<a href=https://scala1.html></a>' with patch.object(Crawler, 'write_html') as mock_write_html: mock_write_html.return_value = None test_crawler = Crawler( 'https://docs.scala-lang.org/ru/tour/tour-of-scala.html', ['dog'], {}, 2) test_result = test_crawler.crawl() test_crawler.close() self.assertEqual(test_result, set())

Exemplo n.º 4

0

Exibir arquivo

def test_update_parents(self): with patch.object(Crawler, 'get_html') as mock_get_html: mock_get_html.return_value = '<a href=http://a/c/></a>' \ '<a href=http://a/b/></a>' with patch.object(Crawler, 'write_html') as mock_write_html: mock_write_html.return_value = None test_crawler = Crawler( 'http://a', [''], {}, max_urls_count=3) test_result = test_crawler.crawl() test_crawler.close() for page in test_result: if page.parent: self.assertEqual(page.parent, Page(URL('http://a')))

Exemplo n.º 5

0

Exibir arquivo

def crawler_sqlmap(entry_url, depth=-1, level=1, threads=2, timeout=30, checkhost=True): """启动sqlmap扫描的入口函数。 :param entry_url: 扫描网站的入口地址 :param depth: 网页爬虫爬取页面深度，－1则表示不设置深度，默认－1 :param level: sqlmap扫描测试等级：1-5（默认为1），等级越高使用的测试样例越多，结果越精确，时间也越长 :param threads: sqlmap多线程扫描设置（默认为2） :param timeout: sqlmap扫描超时时间（默认30s） :param checkhost: 检查爬取链接是否属于同一域 :return: 返回值为四元组（ret, url, simple, content） ret: 执行结果, False为失败, True为成功 url: 扫描目标地址 simple: 解析content抽取重要数据生成的报告，字典类型 content: sqlmap返回的完整报告，字典类型若执行结果为False，那么把扫描错误信息存在扫描关键结果（simple）这个位置 """ settings = Setting(handle=False) settings.depth = depth settings.nocheckhost = not checkhost settings.level = level settings.threads = threads settings.timeout = timeout sqlmap, crawler = None, None try: sqlmap, ip, port = start_sqlmap() # crawler的创建必须在sqlmap启动之后, 才能正确获取sqlmap的端口号 crawler = Crawler(BASE_DIR, ip, port, entry_url, setting=settings) crawler.run() cont, simple = crawler.raw_report() return True, entry_url, simple, cont except: logger.error(traceback.format_exc()) return False, entry_url, traceback.format_exc(), {} finally: if crawler: crawler.close() if sqlmap: sqlmap.terminate()

Exemplo n.º 6

0

Exibir arquivo

def test_searcher_with_result(self): with patch.object(Crawler, 'get_html') as mock_get_html: mock_get_html.return_value = '<a href=https://scala1.html></a>' \ '<a href=https://scala2.html></a>' \ '<a href=https://scala3.html></a>' \ '<a href=https://scala4.html></a>' \ '<a href=https://scala5.html></a>' \ '<a href=https://scala6.html></a>' \ '<a href=https://scala7.html></a>' \ '<a href=https://scala8.html></a>' \ '<a href=https://scala9.html></a>' \ '<a href=https://scala10.html></a>' \ '<a href=https://scala11.html></a>' with patch.object(Crawler, 'write_html') as mock_write_html: mock_write_html.return_value = None test_crawler = Crawler( 'https://docs.scala-lang.org/ru/tour/tour-of-scala.html', ['scala'], {}) test_result = test_crawler.crawl() test_crawler.close() self.assertEqual(len(test_result), 10)

Exemplo n.º 7

0

Exibir arquivo

default='pages', help='Directory for downloaded pages') arguments_parser.add_argument('-g', action='store_true', help='Show graph') arguments_parser.add_argument('-w', action='store_true', help='Save founded pages') args = arguments_parser.parse_args() white_domains = [] for domain in args.wildcard: if domain.startswith('*'): white_domains.append(re.compile(fr'[^.]+.{domain[1::]}')) else: white_domains.append(domain) if args.start_url[-1] == '/': url = args.start_url[:-1] else: url = args.start_url crawler = Crawler(url, args.request, white_domains, args.d, args.f, args.w) try: result = crawler.crawl() if args.g: show_graph(result) for link in result: print(link) print('Program is completed') plt.show() except KeyboardInterrupt: print('Program is completed') finally: crawler.close()