Exemplo n.º 1
0
    def start(self):
        # initiate the download, check for status errors, and calculate download size
        try:
            self.response = self.session.get(self.source,
                                             stream=True,
                                             timeout=self.timeout)
            self.response.raise_for_status()
        except Exception as e:
            retry = retry_import(e)
            if not retry:
                raise
            # Catch exceptions to check if we should resume file downloading
            self.resume()
            if self.cancel_check():
                self._kill_gracefully()

        try:
            self.total_size = int(self.response.headers["content-length"])
        except KeyError:
            # When a compressed file is saved on Google Cloud Storage,
            # content-length is not available in the header,
            # but we can use X-Goog-Stored-Content-Length.
            gcs_content_length = self.response.headers.get(
                "X-Goog-Stored-Content-Length")
            if gcs_content_length:
                self.total_size = int(gcs_content_length)
            else:
                # Get size of response content when file is compressed through nginx.
                self.total_size = len(self.response.content)

        self.started = True
Exemplo n.º 2
0
    def resume(self):
        logger.info("Waiting 30s before retrying import: {}".format(
            self.source))
        for i in range(30):
            if self.cancel_check():
                logger.info("Canceling import: {}".format(self.source))
                return
            sleep(1)

        try:

            byte_range_resume = None
            # When internet connection is lost at the beginning of start(),
            # self.response does not get an assigned value
            if hasattr(self, "response"):
                # Use Accept-Ranges and Content-Length header to check if range
                # requests are supported. For example, range requests are not
                # supported on compressed files
                byte_range_resume = self.response.headers.get(
                    "accept-ranges", None) and self.response.headers.get(
                        "content-length", None)
                resume_headers = self.response.request.headers

                # Only use byte-range file resuming when sources support range requests
                if byte_range_resume:
                    range_headers = {
                        "Range": "bytes={}-".format(self.transferred_size)
                    }
                    resume_headers.update(range_headers)

                self.response = self.session.get(
                    self.source,
                    headers=resume_headers,
                    stream=True,
                    timeout=self.timeout,
                )
            else:
                self.response = self.session.get(self.source,
                                                 stream=True,
                                                 timeout=self.timeout)
            self.response.raise_for_status()
            self._content_iterator = self.response.iter_content(
                self.block_size)

            # Remove the existing content in dest_file_object when range requests are not supported
            if byte_range_resume is None:
                self.dest_file_obj.seek(0)
                self.dest_file_obj.truncate()
        except Exception as e:
            logger.error("Error reading download stream: {}".format(e))
            retry = retry_import(e)
            if not retry:
                raise

            self.resume()
Exemplo n.º 3
0
    def next(self):
        if self.cancel_check():
            self._kill_gracefully()

        try:
            chunk = super(FileDownload, self).next()
            self.transferred_size = self.transferred_size + self.block_size
            return chunk
        except Exception as e:
            retry = retry_import(e)
            if not retry:
                raise

            logger.error("Error reading download stream: {}".format(e))
            self.resume()
            return self.next()
Exemplo n.º 4
0
    def _start_file_transfer(self, f, filetransfer, overall_progress_update):
        """
        Start to transfer the file from network/disk to the destination.
        Return value:
            * True, FILE_TRANSFERRED - successfully transfer the file.
            * True, FILE_SKIPPED - the file does not exist so it is skipped.
            * True, FILE_NOT_TRANSFERRED - the transfer is cancelled.
            * False, FILE_NOT_TRANSFERRED - the transfer fails and needs to retry.
        """
        try:
            # Save the current progress value
            original_value = self.progresstrackers[0].progress
            original_progress = self.progresstrackers[0].get_progress()

            with filetransfer, self.start_progress(
                    total=filetransfer.total_size) as file_dl_progress_update:
                for chunk in filetransfer:
                    if self.is_cancelled():
                        filetransfer.cancel()
                        return True, FILE_NOT_TRANSFERRED
                    length = len(chunk)
                    overall_progress_update(length)
                    file_dl_progress_update(length)

                # Ensure that if for some reason the total file size for the transfer
                # is less than what we have marked in the database that we make up
                # the difference so that the overall progress is never incorrect.
                # This could happen, for example for a local transfer if a file
                # has been replaced or corrupted (which we catch below)
                overall_progress_update(f.file_size - filetransfer.total_size)

                # If checksum of the destination file is different from the localfile
                # id indicated in the database, it means that the destination file
                # is corrupted, either from origin or during import. Skip importing
                # this file.
                checksum_correctness = compare_checksums(
                    filetransfer.dest, f.id)
                if not checksum_correctness:
                    e = "File {} is corrupted.".format(filetransfer.source)
                    logger.error(
                        "An error occurred during content import: {}".format(
                            e))
                    os.remove(filetransfer.dest)
                    return True, FILE_SKIPPED

            return True, FILE_TRANSFERRED

        except Exception as e:
            logger.error(
                "An error occurred during content import: {}".format(e))
            retry = retry_import(e, skip_404=True)

            if retry:
                # Restore the previous progress so that the progress bar will
                # not reach over 100% later
                self.progresstrackers[0].progress = original_value

                self.progresstrackers[0].update_callback(
                    original_progress.progress_fraction, original_progress)

                logger.info(
                    "Waiting for 30 seconds before retrying import: {}\n".
                    format(filetransfer.source))
                sleep(30)
                return False, FILE_NOT_TRANSFERRED
            else:
                overall_progress_update(f.file_size)
                return True, FILE_SKIPPED