def set_status_code_for_crawled_url(self, url: Url, status_code: int): """Set status code for a crawled url. Args: url: Url to set status code of status_code: the status code of the http request to the url """ self.es.update(index=Index.CRAWLED, doc_type='url', id=url.hash(), retry_on_conflict=3, body={'doc': { 'status_code': status_code }})
def lock_crawled_url(self, url: Url, refresh_rate: Type[RefreshRate]): """Lock a crawld url. Place a lock on a crawled url for a given refresh rate. Args: url: Url to lock refresh_rate: Refresh rate to use (Hourly, Daily, etc.) """ self.es.update(index=Index.CRAWLED, doc_type='url', id=url.hash(), retry_on_conflict=3, body={ 'doc': { 'lock_format': refresh_rate.lock_format(), 'lock_value': refresh_rate().lock(), } })