Пример #1
0
 def flush(self):
     """Delete all run-time data generated by this crawler."""
     Tag.delete(self.name)
     Event.delete(self.name)
     Result.delete(self.name)
     session.commit()
     signals.crawler_flush.send(self)
Пример #2
0
 def flush(self):
     """Delete all run-time data generated by this crawler."""
     Queue.flush(self)
     Tag.delete(self)
     Event.delete(self)
     CrawlerState.flush(self)
     CrawlerRun.flush(self)
Пример #3
0
    def skip_incremental(self, *criteria):
        """Perform an incremental check on a set of criteria.

        This can be used to execute a part of a crawler only once per an
        interval (which is specified by the ``expire`` setting). If the
        operation has already been performed (and should thus be skipped),
        this will return ``True``. If the operation needs to be executed,
        the returned value will be ``False``.
        """
        if not self.incremental:
            return False

        # this is pure convenience, and will probably backfire at some point.
        key = make_key(criteria)
        if key is None:
            return False

        # this is used to re-run parts of a scrape after a certain interval,
        # e.g. half a year, or a year
        since = None
        if self.crawler.expire > 0:
            delta = timedelta(days=-1 * self.crawler.expire)
            since = datetime.utcnow() - delta

        if Tag.exists(self.crawler, key, since=since):
            return True
        self.set_tag(key, None)
        return False
Пример #4
0
    def skip_incremental(self, *criteria):
        """Perform an incremental check on a set of criteria.

        This can be used to execute a part of a crawler only once per an
        interval (which is specified by the ``expire`` setting). If the
        operation has already been performed (and should thus be skipped),
        this will return ``True``. If the operation needs to be executed,
        the returned value will be ``False``.
        """
        if not self.incremental:
            return False

        # this is pure convenience, and will probably backfire at some point.
        key = make_key(*criteria)
        if key is None:
            return False

        if Tag.exists(self.crawler, key):
            return True

        self.set_tag(key, None)
        return False
Пример #5
0
 def flush(self):
     """Delete all run-time data generated by this crawler."""
     Tag.delete(self.name)
     Operation.delete(self.name)
     session.commit()
Пример #6
0
 def check_tag(self, key):
     return Tag.exists(self.crawler, key)
Пример #7
0
 def get_tag(self, key):
     return Tag.find(self.crawler, key)
Пример #8
0
 def set_tag(self, key, value):
     return Tag.save(self.crawler, key, value)
Пример #9
0
 def get_tag(self, key):
     tag = Tag.find(self.crawler, key)
     if tag is not None:
         return tag.value