def flush(self): """Delete all run-time data generated by this crawler.""" Tag.delete(self.name) Event.delete(self.name) Result.delete(self.name) session.commit() signals.crawler_flush.send(self)
def flush(self): """Delete all run-time data generated by this crawler.""" Queue.flush(self) Tag.delete(self) Event.delete(self) CrawlerState.flush(self) CrawlerRun.flush(self)
def skip_incremental(self, *criteria): """Perform an incremental check on a set of criteria. This can be used to execute a part of a crawler only once per an interval (which is specified by the ``expire`` setting). If the operation has already been performed (and should thus be skipped), this will return ``True``. If the operation needs to be executed, the returned value will be ``False``. """ if not self.incremental: return False # this is pure convenience, and will probably backfire at some point. key = make_key(criteria) if key is None: return False # this is used to re-run parts of a scrape after a certain interval, # e.g. half a year, or a year since = None if self.crawler.expire > 0: delta = timedelta(days=-1 * self.crawler.expire) since = datetime.utcnow() - delta if Tag.exists(self.crawler, key, since=since): return True self.set_tag(key, None) return False
def skip_incremental(self, *criteria): """Perform an incremental check on a set of criteria. This can be used to execute a part of a crawler only once per an interval (which is specified by the ``expire`` setting). If the operation has already been performed (and should thus be skipped), this will return ``True``. If the operation needs to be executed, the returned value will be ``False``. """ if not self.incremental: return False # this is pure convenience, and will probably backfire at some point. key = make_key(*criteria) if key is None: return False if Tag.exists(self.crawler, key): return True self.set_tag(key, None) return False
def flush(self): """Delete all run-time data generated by this crawler.""" Tag.delete(self.name) Operation.delete(self.name) session.commit()
def check_tag(self, key): return Tag.exists(self.crawler, key)
def get_tag(self, key): return Tag.find(self.crawler, key)
def set_tag(self, key, value): return Tag.save(self.crawler, key, value)
def get_tag(self, key): tag = Tag.find(self.crawler, key) if tag is not None: return tag.value