Exemplos de mirror em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: core.mirror

Método / Função: mirror

Exemplos em hotexamples.com: 2

mirror em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de core.mirror.mirror em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Exemplo n.º 1

0

Exibir arquivo

def extractor(url): """Extract details from the response body.""" response = requester(url, main_url, delay, cook, headers, timeout, host, proxies, user_agents, failed, processed) if clone: mirror(url, response) matches = rhref.findall(response) for link in matches: # Remove everything after a "#" to deal with in-page anchors link = link[1].replace('\'', '').replace('"', '').split('#')[0] # Checks if the URLs should be crawled if is_link(link, processed, files): if link[:4] == 'http': if link.startswith(main_url): verb('Internal page', link) internal.add(link) else: verb('External page', link) external.add(link) elif link[:2] == '//': if link.split('/')[2].startswith(host): verb('Internal page', link) internal.add(schema + '://' + link) else: verb('External page', link) external.add(link) elif link[:1] == '/': verb('Internal page', link) internal.add(remove_file(url) + link) else: verb('Internal page', link) usable_url = remove_file(url) if usable_url.endswith('/'): internal.add(usable_url + link) elif link.startswith('/'): internal.add(usable_url + link) else: internal.add(usable_url + '/' + link) if not only_urls: intel_extractor(url, response) js_extractor(response) if args.regex and not supress_regex: regxy(args.regex, response, supress_regex, custom) if api: matches = rentropy.findall(response) for match in matches: if entropy(match) >= 4: verb('Key', match) keys.add(url + ': ' + match)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: photon.py Projeto: security-geeks/Photon

def extractor(url): """Extract details from the response body.""" response = requester(url, main_url, delay, cook, headers, timeout, host, proxies, user_agents, failed, processed) if clone: mirror(url, response) matches = rhref.findall(response) for link in matches: # Remove everything after a "#" to deal with in-page anchors link = link[1].replace('\'', '').replace('"', '').split('#')[0] # Checks if the URLs should be crawled if is_link(link, processed, files): if link[:4] == 'http': if link.startswith(main_url): verb('Internal page', link) internal.add(link) else: verb('External page', link) external.add(link) elif link[:2] == '//': if link.split('/')[2].startswith(host): verb('Internal page', link) internal.add(schema + '://' + link) else: verb('External page', link) external.add(link) elif link[:1] == '/': verb('Internal page', link) internal.add(remove_file(url) + link) else: verb('Internal page', link) usable_url = remove_file(url) if usable_url.endswith('/'): internal.add(usable_url + link) elif link.startswith('/'): internal.add(usable_url + link) else: internal.add(usable_url + '/' + link) if not only_urls: intel_extractor(url, response) js_extractor(response) if args.regex and not supress_regex: regxy(args.regex, response, supress_regex, custom) if api: matches = rentropy.findall(response) for match in matches: if entropy(match) >= 4: verb('Key', match) keys.add(url + ': ' + match)