Python canonicalize примеры использования

Язык программирования: Python

Пространство имен/Пакет: e01extract

Метод/Функция: canonicalize

Примеров на hotexamples.com: 5

Python canonicalize - 5 примеров найдено. Это лучшие примеры Python кода для e01extract.canonicalize, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: e06asyncextract.py Проект: jeromeku/pycon2014

def extract_async(url):
    data = yield from fetch_async(url)
    found_urls = set()
    for match in URL_EXPR.finditer(data):
        found = canonicalize(match.group('url'))
        if same_domain(url, found):
            found_urls.add(urljoin(url, found))
    return url, data, sorted(found_urls)

Пример #2

Показать файл

Файл: e03parallel.py Проект: jeromeku/pycon2014

def crawl_parallel(start_url, max_depth):
    fetch_queue = Queue()  # (crawl_depth, url)
    fetch_queue.put((0, canonicalize(start_url)))

    seen_urls, result = set(), []
    func = lambda: consumer(fetch_queue, max_depth, seen_urls, result)
    for _ in range(3):
        Thread(target=func, daemon=True).start()

    fetch_queue.join()
    return result

Пример #3

Показать файл

def crawl_async(start_url, max_depth):
    seen_urls = set()
    to_fetch = [canonicalize(start_url)]
    results = []
    for depth in range(max_depth + 1):
        batch = yield from extract_multi_async(to_fetch, seen_urls)
        to_fetch = []
        for url, data, found_urls in batch:
            results.append((depth, url, data))
            to_fetch.extend(found_urls)

    return results

Пример #4

Показать файл

Файл: e06asyncextract.py Проект: jeromeku/pycon2014

def main():
    url = canonicalize(argv[1])

    # Bridge the gap between sync and async
    future = asyncio.Task(extract_async(url))
    loop = asyncio.get_event_loop()
    loop.run_until_complete(future)
    loop.close()

    _, data, found_urls = future.result()  # Will raise exception
    print('%s is %d bytes, %d urls:\n%s' %
          (url, len(data), len(found_urls), '\n'.join(found_urls)))

Пример #5

Показать файл

Файл: e04twostage.py Проект: jeromeku/pycon2014

def parallel_wordcount(start_url, max_depth, word_length):
    fetch_queue = Queue()  # (crawl_depth, url)
    fetch_queue.put((0, canonicalize(start_url)))
    count_queue = Queue()  # (url, data)

    seen_urls = set()
    func = lambda: fetcher(fetch_queue, max_depth, seen_urls, count_queue)
    for _ in range(3):
        Thread(target=func, daemon=True).start()

    result = []
    func = lambda: counter(count_queue, word_length, result)
    for _ in range(3):
        Thread(target=func, daemon=True).start()

    fetch_queue.join()
    count_queue.join()
    return result