Exemplo n.º 1
0
    def fetch(self):
        """Lazily trigger download of the data when requested."""
        if self._file_path is not None:
            return self._file_path
        temp_path = self.context.work_path
        if self._content_hash is not None:
            self._file_path = storage.load_file(self._content_hash,
                                                temp_path=temp_path)
            return self._file_path
        if self.response is not None:
#            self._file_path = random_filename(temp_path)
            if self.content_type == 'text/html':
                self._file_path = html_filename(self.url, temp_path)
            else:
                self._file_path = file_filename(self.url, temp_path)
            content_hash = sha1()
            with open(self._file_path, 'wb') as fh:
                for chunk in self.response.iter_content(chunk_size=8192):
                    content_hash.update(chunk)
                    fh.write(chunk)
            self._remove_file = True
            chash = content_hash.hexdigest()
            self._content_hash = storage.archive_file(self._file_path,
                                                      content_hash=chash)
            if self.http.cache and self.ok:
                self.context.set_tag(self.request_id, self.serialize())
            self.retrieved_at = datetime.utcnow().isoformat()
        return self._file_path
Exemplo n.º 2
0
    def _stream_content(self):
        """Lazily trigger download of the data when requested."""
        if self.response is None:
            self._file_path = storage.load_file(self._content_hash)
        else:
            fd, self._file_path = tempfile.mkstemp()
            os.close(fd)
            content_hash = sha1()
            with open(self._file_path, 'wb') as fh:
                for chunk in self.response.iter_content(chunk_size=8192):
                    content_hash.update(chunk)
                    fh.write(chunk)
            self._remove_file = True
            chash = content_hash.hexdigest()
            self._content_hash = storage.archive_file(self._file_path,
                                                      content_hash=chash)

            if self.http.cache and self.ok:
                self.context.set_tag(self.request_id, self.serialize())
        return self._file_path
Exemplo n.º 3
0
 def store_file(self, file_path, content_hash=None):
     """Put a file into permanent storage so it can be visible to other
     stages."""
     return storage.archive_file(file_path, content_hash=content_hash)