def bytes_received(self, data, request, spider): if (not spider.pluck or spider.dont_truncate # We only limit bytes received for final requests (i.e. where the callback is the default `parse` method). or request.callback # ijson will parse the value at `root_path`, which can go to the end of the file. # https://github.com/ICRAR/ijson/issues/43 or spider.root_path # XLSX files must be read in full. or spider.unflatten): return self.total_bytes_received += len(data) if self.total_bytes_received >= self.max_bytes: raise StopDownload(fail=False)
def headers_received(self, headers, body_length, request, spider): self.meta["headers_received"] = headers raise StopDownload(fail=True)
def bytes_received(self, data, request, spider): self.meta["bytes_received"] = data raise StopDownload(fail=True)
def headers_received(self, headers, body_length, request, spider): super().headers_received(headers, body_length, request, spider) raise StopDownload(fail=False)
def bytes_received(self, data, request, spider): super().bytes_received(data, request, spider) raise StopDownload(fail=False)