Exemplo n.º 1
0
    def set_status_code_for_crawled_url(self, url: Url, status_code: int):
        """Set status code for a crawled url.

        Args:
            url: Url to set status code of
            status_code: the status code of the http request to the url
        """
        self.es.update(index=Index.CRAWLED,
                       doc_type='url',
                       id=url.hash(),
                       retry_on_conflict=3,
                       body={'doc': {
                           'status_code': status_code
                       }})
Exemplo n.º 2
0
    def lock_crawled_url(self, url: Url, refresh_rate: Type[RefreshRate]):
        """Lock a crawld url.

        Place a lock on a crawled url for a given refresh rate.

        Args:
            url: Url to lock
            refresh_rate: Refresh rate to use (Hourly, Daily, etc.)
        """
        self.es.update(index=Index.CRAWLED,
                       doc_type='url',
                       id=url.hash(),
                       retry_on_conflict=3,
                       body={
                           'doc': {
                               'lock_format': refresh_rate.lock_format(),
                               'lock_value': refresh_rate().lock(),
                           }
                       })