Python Url примеры использования

Язык программирования: Python

Пространство имен/Пакет: retrouve.database.url

Класс/Тип: Url

Примеров на hotexamples.com: 9

Python Url - 9 примеров найдено. Это лучшие примеры Python кода для retrouve.database.url.Url, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Url(4)

geturl(2)

find(1)

insert(1)

insert_bare(1)

Пример #1

Показать файл

Файл: spider.py Проект: mbernson/iscp-search-engine

    def work(self):
        """
        :inheritdoc:
        """
        try:
            job = self.jobs.reserve_job(self.queue)
            if job is False:
                return False

            url = Url.find(job.payload['url_id'])

            if not url or not can_crawl_url(url):
                self.jobs.clear_job(job)
                return False

            response = self.fetch(url)

            doc = Document.from_response(response, url)
            doc.purge_docs_for_url(url)
            doc.insert()

            if doc.can_index:
                doc.discover_urls()
                doc.discover_excerpts()
                doc.discover_images()

            # Schedule the job to be repeated after some period of time
            recrawl_at = datetime.now() + self.repeat_delta
            self.jobs.reschedule_job(job, recrawl_at)
        except Exception as e:
            # Release the job back on to the queue if an error occurs
            self.jobs.release_job(job)
            print("Releasing job %d because an exception occurred" % job.id)
            raise e

Пример #2

Показать файл

    def work(self):
        """
        :inheritdoc:
        """
        try:
            job = self.jobs.reserve_job(self.queue)
            if job is False:
                return False

            url = Url.find(job.payload['url_id'])

            if not url or not can_crawl_url(url):
                self.jobs.clear_job(job)
                return False

            response = self.fetch(url)

            doc = Document.from_response(response, url)
            doc.purge_docs_for_url(url)
            doc.insert()

            if doc.can_index:
                doc.discover_urls()
                doc.discover_excerpts()
                doc.discover_images()

            # Schedule the job to be repeated after some period of time
            recrawl_at = datetime.now() + self.repeat_delta
            self.jobs.reschedule_job(job, recrawl_at)
        except Exception as e:
            # Release the job back on to the queue if an error occurs
            self.jobs.release_job(job)
            print("Releasing job %d because an exception occurred" % job.id)
            raise e

Пример #3

Показать файл

Файл: document.py Проект: mbernson/iscp-search-engine

    def discover_urls(self):
        """
        Discover URL's in the document and save them in the database.
        """
        allowed_domains = get_allowed_domains()

        def is_allowed(u):
            return u.domain() in allowed_domains or u.domain() == ''

        insert_count = 0
        cursor = self.db.cursor()
        for link in self.soup.find_all('a'):
            url = Url(url=link.get('href'), base=self.url)
            if is_allowed(url):
                url.insert_bare(cursor)
                insert_count += 1
        
        if insert_count > 0:
            print("Discovered %d new URLs" % insert_count)

        self.db.commit()
        cursor.close()

Пример #4

Показать файл

Файл: document.py Проект: mbernson/iscp-search-engine

    def discover_urls(self):
        """
        Discover URL's in the document and save them in the database.
        """
        allowed_domains = get_allowed_domains()

        def is_allowed(u):
            return u.domain() in allowed_domains or u.domain() == ''

        insert_count = 0
        cursor = self.db.cursor()
        for link in self.soup.find_all('a'):
            url = Url(url=link.get('href'), base=self.url)
            if is_allowed(url):
                url.insert_bare(cursor)
                insert_count += 1

        if insert_count > 0:
            print("Discovered %d new URLs" % insert_count)

        self.db.commit()
        cursor.close()

Пример #5

Показать файл

Файл: url_tests.py Проект: mbernson/iscp-search-engine

def test_default_url_parsing():
    location = 'https://syntaxleiden.nl/foo'
    u = Url(url=location)
    assert_equal(u.geturl(), location)

Пример #6

Показать файл

Файл: url_tests.py Проект: mbernson/iscp-search-engine

def test_relative_url_parsing_with_scheme():
    location = '/foo'
    u = Url(url=location, base='https://syntaxleiden.nl')
    assert_equal(u.geturl(), 'https://syntaxleiden.nl/foo')

Пример #7

Показать файл

Файл: url_tests.py Проект: mbernson/iscp-search-engine

def test_relative_url_parsing_with_scheme():
    location = '/foo'
    u = Url(url=location, base='https://syntaxleiden.nl')
    assert_equal(u.geturl(), 'https://syntaxleiden.nl/foo')

Пример #8

Показать файл

Файл: url_tests.py Проект: mbernson/iscp-search-engine

def test_default_url_parsing():
    location = 'https://syntaxleiden.nl/foo'
    u = Url(url=location)
    assert_equal(u.geturl(), location)

Пример #9

Показать файл

Файл: app.py Проект: mbernson/iscp-search-engine

def add_url():
    if request.method == 'POST':
        url = Url(url=request.form['url'])
        return render_template('add_url.html', added=url.insert(), url=url)
    else:
        return render_template('add_url.html', added=None)