Пример #1
0
 def cover_file(self):
     if self.cover_url:
         filename, extension = util.filename_from_url(self.cover_url)
         if not filename:
             filename = hashlib.sha512(self.cover_url.encode('utf-8')).hexdigest()
         return os.path.join(self.save_dir, filename)
     return None
Пример #2
0
    def _get_enclosure_url(self, episode_dict):
        url = episode_dict.get('link')
        if url is not None:
            base, extension = util.filename_from_url(url)
            if util.file_type_by_extension(extension) in ('audio', 'video'):
                logger.debug('Using link for enclosure URL: %s', url)
                return url

        return None
Пример #3
0
    def from_podcastparser_entry(cls, entry, channel):
        episode = cls(channel)
        episode.guid = entry['guid']
        episode.title = entry['title']
        episode.link = entry['link']
        episode.description = entry['description']
        if entry.get('description_html'):
            episode.description_html = entry['description_html']
        # TODO: This really should be handled in podcastparser and not here.
        elif util.is_html(entry['description']):
            episode.description_html = entry['description']
            episode.description = util.remove_html_tags(entry['description'])

        episode.total_time = entry['total_time']
        episode.published = entry['published']
        episode.payment_url = entry['payment_url']

        audio_available = any(enclosure['mime_type'].startswith('audio/') for enclosure in entry['enclosures'])
        video_available = any(enclosure['mime_type'].startswith('video/') for enclosure in entry['enclosures'])

        for enclosure in entry['enclosures']:
            episode.mime_type = enclosure['mime_type']

            # Skip images in feeds if audio or video is available (bug 979)
            # This must (and does) also look in Media RSS enclosures (bug 1430)
            if episode.mime_type.startswith('image/') and (audio_available or video_available):
                continue

            # If we have audio or video available later on, skip
            # 'application/octet-stream' data types (fixes Linux Outlaws)
            if episode.mime_type == 'application/octet-stream' and (audio_available or video_available):
                continue

            episode.url = util.normalize_feed_url(enclosure['url'])
            if not episode.url:
                continue

            episode.file_size = enclosure['file_size']
            return episode

        # Brute-force detection of the episode link
        episode.url = util.normalize_feed_url(entry['link'])
        if not episode.url:
            return None

        if any(mod.is_video_link(episode.url) for mod in (youtube, vimeo, escapist_videos)):
            return episode

        # Check if we can resolve this link to a audio/video file
        filename, extension = util.filename_from_url(episode.url)
        file_type = util.file_type_by_extension(extension)

        # The link points to a audio or video file - use it!
        if file_type is not None:
            return episode

        return None
Пример #4
0
    def _get_enclosure_url(self, episode_dict):
        url = episode_dict.get('link')
        if url is not None:
            base, extension = util.filename_from_url(url)
            if util.file_type_by_extension(extension) in ('audio', 'video'):
                logger.debug('Using link for enclosure URL: %s', url)
                return url

        return None
Пример #5
0
 def extension(self, may_call_local_filename=True):
     filename, ext = util.filename_from_url(self.url)
     if may_call_local_filename:
         filename = self.local_filename(create=False)
         if filename is not None:
             filename, ext = os.path.splitext(filename)
     # if we can't detect the extension from the url fallback on the mimetype
     if ext == '' or util.file_type_by_extension(ext) is None:
         ext = util.extension_from_mimetype(self.mime_type)
     return ext
Пример #6
0
 def extension(self, may_call_local_filename=True):
     filename, ext = util.filename_from_url(self.url)
     if may_call_local_filename:
         filename = self.local_filename(create=False)
         if filename is not None:
             filename, ext = os.path.splitext(filename)
     # if we can't detect the extension from the url fallback on the mimetype
     if ext == "" or util.file_type_by_extension(ext) is None:
         ext = util.extension_from_mimetype(self.mime_type)
     return ext
Пример #7
0
    def from_podcastparser_entry(cls, entry, channel):
        episode = cls(channel)
        episode.guid = entry["guid"]
        episode.title = entry["title"]
        episode.link = entry["link"]
        episode.description = entry["description"]
        episode.total_time = entry["total_time"]
        episode.published = entry["published"]
        episode.payment_url = entry["payment_url"]

        audio_available = any(enclosure["mime_type"].startswith("audio/") for enclosure in entry["enclosures"])
        video_available = any(enclosure["mime_type"].startswith("video/") for enclosure in entry["enclosures"])

        for enclosure in entry["enclosures"]:
            episode.mime_type = enclosure["mime_type"]

            # Skip images in feeds if audio or video is available (bug 979)
            # This must (and does) also look in Media RSS enclosures (bug 1430)
            if episode.mime_type.startswith("image/") and (audio_available or video_available):
                continue

            # If we have audio or video available later on, skip
            # 'application/octet-stream' data types (fixes Linux Outlaws)
            if episode.mime_type == "application/octet-stream" and (audio_available or video_available):
                continue

            episode.url = util.normalize_feed_url(enclosure["url"])
            if not episode.url:
                continue

            episode.file_size = enclosure["file_size"]
            return episode

        # Brute-force detection of the episode link
        episode.url = util.normalize_feed_url(entry["link"])
        if not episode.url:
            return None

        if any(mod.is_video_link(episode.url) for mod in (youtube, vimeo, escapist_videos)):
            return episode

        # Check if we can resolve this link to a audio/video file
        filename, extension = util.filename_from_url(episode.url)
        file_type = util.file_type_by_extension(extension)

        # The link points to a audio or video file - use it!
        if file_type is not None:
            return episode

        return None
Пример #8
0
            if new_mimetype != old_mimetype or util.wrong_extension(ext):
                logger.info('Updating mime type: %s => %s', old_mimetype, new_mimetype)
                old_extension = self.__episode.extension()
                self.__episode.mime_type = new_mimetype
                new_extension = self.__episode.extension()

                # If the desired filename extension changed due to the new
                # mimetype, we force an update of the local filename to fix the
                # extension.
                if old_extension != new_extension or util.wrong_extension(ext):
                    self.filename = self.__episode.local_filename(create=True, force_update=True)

            # In some cases, the redirect of a URL causes the real filename to
            # be revealed in the final URL (e.g. http://gpodder.org/bug/1423)
            if real_url != url and not util.is_known_redirecter(real_url):
                realname, realext = util.filename_from_url(real_url)

                # Only update from redirect if the redirected-to filename has
                # a proper extension (this is needed for e.g. YouTube)
                if not util.wrong_extension(realext):
                    real_filename = ''.join((realname, realext))
                    self.filename = self.__episode.local_filename(create=True,
                            force_update=True, template=real_filename)
                    logger.info('Download was redirected (%s). New filename: %s',
                            real_url, os.path.basename(self.filename))

            # Look at the Content-disposition header; use if if available
            disposition_filename = get_header_param(headers, \
                    'filename', 'content-disposition')

            # Some servers do send the content-disposition header, but provide
Пример #9
0
    def local_filename(self, create, force_update=False, check_only=False, template=None, return_wanted_filename=False):
        """Get (and possibly generate) the local saving filename

        Pass create=True if you want this function to generate a
        new filename if none exists. You only want to do this when
        planning to create/download the file after calling this function.

        Normally, you should pass create=False. This will only
        create a filename when the file already exists from a previous
        version of gPodder (where we used md5 filenames). If the file
        does not exist (and the filename also does not exist), this
        function will return None.

        If you pass force_update=True to this function, it will try to
        find a new (better) filename and move the current file if this
        is the case. This is useful if (during the download) you get
        more information about the file, e.g. the mimetype and you want
        to include this information in the file name generation process.

        If check_only=True is passed to this function, it will never try
        to rename the file, even if would be a good idea. Use this if you
        only want to check if a file exists.

        If "template" is specified, it should be a filename that is to
        be used as a template for generating the "real" filename.

        The generated filename is stored in the database for future access.

        If return_wanted_filename is True, the filename will not be written to
        the database, but simply returned by this function (for use by the
        "import external downloads" feature).
        """
        if self.download_filename is None and (check_only or not create):
            return None

        ext = self.extension(may_call_local_filename=False).encode("utf-8", "ignore")

        if not check_only and (force_update or not self.download_filename):
            # Avoid and catch gPodder bug 1440 and similar situations
            if template == "":
                logger.warn("Empty template. Report this podcast URL %s", self.channel.url)
                template = None

            # Try to find a new filename for the current file
            if template is not None:
                # If template is specified, trust the template's extension
                episode_filename, ext = os.path.splitext(template)
            else:
                episode_filename, _ = util.filename_from_url(self.url)
            fn_template = util.sanitize_filename(episode_filename, self.MAX_FILENAME_LENGTH)

            if "redirect" in fn_template and template is None:
                # This looks like a redirection URL - force URL resolving!
                logger.warn("Looks like a redirection to me: %s", self.url)
                url = util.get_real_url(self.channel.authenticate_url(self.url))
                logger.info("Redirection resolved to: %s", url)
                episode_filename, _ = util.filename_from_url(url)
                fn_template = util.sanitize_filename(episode_filename, self.MAX_FILENAME_LENGTH)

            # Use title for YouTube, Vimeo and Soundcloud downloads
            if (
                youtube.is_video_link(self.url)
                or vimeo.is_video_link(self.url)
                or escapist_videos.is_video_link(self.url)
                or fn_template == "stream"
            ):
                sanitized = util.sanitize_filename(self.title, self.MAX_FILENAME_LENGTH)
                if sanitized:
                    fn_template = sanitized

            # If the basename is empty, use the md5 hexdigest of the URL
            if not fn_template or fn_template.startswith("redirect."):
                logger.error("Report this feed: Podcast %s, episode %s", self.channel.url, self.url)
                fn_template = hashlib.md5(self.url).hexdigest()

            # Find a unique filename for this episode
            wanted_filename = self.find_unique_file_name(fn_template, ext)

            if return_wanted_filename:
                # return the calculated filename without updating the database
                return wanted_filename

            # The old file exists, but we have decided to want a different filename
            if self.download_filename and wanted_filename != self.download_filename:
                # there might be an old download folder crawling around - move it!
                new_file_name = os.path.join(self.channel.save_dir, wanted_filename)
                old_file_name = os.path.join(self.channel.save_dir, self.download_filename)
                if os.path.exists(old_file_name) and not os.path.exists(new_file_name):
                    logger.info("Renaming %s => %s", old_file_name, new_file_name)
                    os.rename(old_file_name, new_file_name)
                elif force_update and not os.path.exists(old_file_name):
                    # When we call force_update, the file might not yet exist when we
                    # call it from the downloading code before saving the file
                    logger.info("Choosing new filename: %s", new_file_name)
                else:
                    logger.warn("%s exists or %s does not", new_file_name, old_file_name)
                logger.info('Updating filename of %s to "%s".', self.url, wanted_filename)
            elif self.download_filename is None:
                logger.info("Setting download filename: %s", wanted_filename)
            self.download_filename = wanted_filename
            self.save()

        return os.path.join(
            util.sanitize_encoding(self.channel.save_dir), util.sanitize_encoding(self.download_filename)
        )
Пример #10
0
                            new_mimetype)
                old_extension = self.__episode.extension()
                self.__episode.mime_type = new_mimetype
                new_extension = self.__episode.extension()

                # If the desired filename extension changed due to the new
                # mimetype, we force an update of the local filename to fix the
                # extension.
                if old_extension != new_extension or util.wrong_extension(ext):
                    self.filename = self.__episode.local_filename(
                        create=True, force_update=True)

            # In some cases, the redirect of a URL causes the real filename to
            # be revealed in the final URL (e.g. http://gpodder.org/bug/1423)
            if real_url != url and not util.is_known_redirecter(real_url):
                realname, realext = util.filename_from_url(real_url)

                # Only update from redirect if the redirected-to filename has
                # a proper extension (this is needed for e.g. YouTube)
                if not util.wrong_extension(realext):
                    real_filename = ''.join((realname, realext))
                    self.filename = self.__episode.local_filename(
                        create=True, force_update=True, template=real_filename)
                    logger.info(
                        'Download was redirected (%s). New filename: %s',
                        real_url, os.path.basename(self.filename))

            # Look at the Content-disposition header; use if if available
            disposition_filename = get_header_param(headers, \
                    'filename', 'content-disposition')
Пример #11
0
    def run(self):
        # Speed calculation (re-)starts here
        self.__start_time = 0
        self.__start_blocks = 0

        # If the download has already been cancelled, skip it
        if self.status == DownloadTask.CANCELLED:
            util.delete_file(self.tempname)
            self.progress = 0.0
            self.speed = 0.0
            self.recycle()
            return False

        # We only start this download if its status is "downloading"
        if self.status != DownloadTask.DOWNLOADING:
            return False

        # We are downloading this file right now
        self.status = DownloadTask.DOWNLOADING
        self._notification_shown = False

        # Restore a reference to this task in the episode
        # when running a recycled task following a pause or failed
        # see #649
        if not self.episode.download_task:
            self.episode.download_task = self

        url = self.__episode.url
        try:
            if url == '':
                raise DownloadNoURLException()

            if self.downloader:
                downloader = self.downloader.custom_downloader(
                    self._config, self.episode)
            else:
                downloader = registry.custom_downloader.resolve(
                    self._config, None, self.episode)

            if downloader:
                logger.info('Downloading %s with %s', url, downloader)
            else:
                downloader = DefaultDownloader.custom_downloader(
                    self._config, self.episode)

            headers, real_url = downloader.retrieve_resume(
                self.tempname, self.status_updated)

            new_mimetype = headers.get('content-type',
                                       self.__episode.mime_type)
            old_mimetype = self.__episode.mime_type
            _basename, ext = os.path.splitext(self.filename)
            if new_mimetype != old_mimetype or util.wrong_extension(ext):
                logger.info('Updating mime type: %s => %s', old_mimetype,
                            new_mimetype)
                old_extension = self.__episode.extension()
                self.__episode.mime_type = new_mimetype
                # don't call local_filename because we'll get the old download name
                new_extension = self.__episode.extension(
                    may_call_local_filename=False)

                # If the desired filename extension changed due to the new
                # mimetype, we force an update of the local filename to fix the
                # extension.
                if old_extension != new_extension or util.wrong_extension(ext):
                    self.filename = self.__episode.local_filename(
                        create=True, force_update=True)

            # In some cases, the redirect of a URL causes the real filename to
            # be revealed in the final URL (e.g. http://gpodder.org/bug/1423)
            if real_url != url and not util.is_known_redirecter(real_url):
                realname, realext = util.filename_from_url(real_url)

                # Only update from redirect if the redirected-to filename has
                # a proper extension (this is needed for e.g. YouTube)
                if not util.wrong_extension(realext):
                    real_filename = ''.join((realname, realext))
                    self.filename = self.__episode.local_filename(
                        create=True, force_update=True, template=real_filename)
                    logger.info(
                        'Download was redirected (%s). New filename: %s',
                        real_url, os.path.basename(self.filename))

            # Look at the Content-disposition header; use if if available
            disposition_filename = util.get_header_param(
                headers, 'filename', 'content-disposition')

            # Some servers do send the content-disposition header, but provide
            # an empty filename, resulting in an empty string here (bug 1440)
            if disposition_filename is not None and disposition_filename != '':
                # The server specifies a download filename - try to use it
                # filename_from_url to remove query string; see #591
                fn, ext = util.filename_from_url(disposition_filename)
                logger.debug(
                    "converting disposition filename '%s' to local filename '%s%s'",
                    disposition_filename, fn, ext)
                disposition_filename = fn + ext
                self.filename = self.__episode.local_filename(
                    create=True,
                    force_update=True,
                    template=disposition_filename)
                new_mimetype, encoding = mimetypes.guess_type(self.filename)
                if new_mimetype is not None:
                    logger.info('Using content-disposition mimetype: %s',
                                new_mimetype)
                    self.__episode.mime_type = new_mimetype

            # Re-evaluate filename and tempname to take care of podcast renames
            # while downloads are running (which will change both file names)
            self.filename = self.__episode.local_filename(create=False)
            self.tempname = os.path.join(os.path.dirname(self.filename),
                                         os.path.basename(self.tempname))
            shutil.move(self.tempname, self.filename)

            # Model- and database-related updates after a download has finished
            self.__episode.on_downloaded(self.filename)
        except DownloadCancelledException:
            logger.info('Download has been cancelled/paused: %s', self)
            if self.status == DownloadTask.CANCELLED:
                util.delete_file(self.tempname)
                self.progress = 0.0
                self.speed = 0.0
        except DownloadNoURLException:
            self.status = DownloadTask.FAILED
            self.error_message = _('Episode has no URL to download')
        except urllib.error.ContentTooShortError as ctse:
            self.status = DownloadTask.FAILED
            self.error_message = _('Missing content from server')
        except ConnectionError as ce:
            # special case request exception
            self.status = DownloadTask.FAILED
            logger.error('Download failed: %s', str(ce), exc_info=True)
            d = {'host': ce.args[0].pool.host, 'port': ce.args[0].pool.port}
            self.error_message = _(
                "Couldn't connect to server %(host)s:%(port)s" % d)
        except RequestException as re:
            # extract MaxRetryError to shorten the exception message
            if isinstance(re.args[0], MaxRetryError):
                re = re.args[0]
            logger.error('%s while downloading "%s"',
                         str(re),
                         self.__episode.title,
                         exc_info=True)
            self.status = DownloadTask.FAILED
            d = {'error': str(re)}
            self.error_message = _('Request Error: %(error)s') % d
        except IOError as ioe:
            logger.error('%s while downloading "%s": %s',
                         ioe.strerror,
                         self.__episode.title,
                         ioe.filename,
                         exc_info=True)
            self.status = DownloadTask.FAILED
            d = {'error': ioe.strerror, 'filename': ioe.filename}
            self.error_message = _('I/O Error: %(error)s: %(filename)s') % d
        except gPodderDownloadHTTPError as gdhe:
            logger.error('HTTP %s while downloading "%s": %s',
                         gdhe.error_code,
                         self.__episode.title,
                         gdhe.error_message,
                         exc_info=True)
            self.status = DownloadTask.FAILED
            d = {'code': gdhe.error_code, 'message': gdhe.error_message}
            self.error_message = _('HTTP Error %(code)s: %(message)s') % d
        except Exception as e:
            self.status = DownloadTask.FAILED
            logger.error('Download failed: %s', str(e), exc_info=True)
            self.error_message = _('Error: %s') % (str(e), )

        if self.status == DownloadTask.DOWNLOADING:
            # Everything went well - we're done
            self.status = DownloadTask.DONE
            if self.total_size <= 0:
                self.total_size = util.calculate_size(self.filename)
                logger.info('Total size updated to %d', self.total_size)
            self.progress = 1.0
            gpodder.user_extensions.on_episode_downloaded(self.__episode)
            return True

        self.speed = 0.0

        # We finished, but not successfully (at least not really)
        return False
Пример #12
0
    def run(self):
        # Speed calculation (re-)starts here
        self.__start_time = 0
        self.__start_blocks = 0

        # If the download has already been cancelled, skip it
        if self.status == DownloadTask.CANCELLED:
            util.delete_file(self.tempname)
            self.progress = 0.0
            self.speed = 0.0
            return False

        # We only start this download if its status is "downloading"
        if self.status != DownloadTask.DOWNLOADING:
            return False

        # We are downloading this file right now
        self.status = DownloadTask.DOWNLOADING
        self._notification_shown = False

        # Restore a reference to this task in the episode
        # when running a recycled task following a pause or failed
        # see #649
        if not self.episode.download_task:
            self.episode.download_task = self

        try:

            custom_downloader = registry.custom_downloader.resolve(
                self._config, None, self.episode)

            url = self.__episode.url
            if custom_downloader:
                logger.info('Downloading %s with %s', url, custom_downloader)
                headers, real_url = custom_downloader.retrieve_resume(
                    self.tempname, reporthook=self.status_updated)
            else:
                # Resolve URL and start downloading the episode
                res = registry.download_url.resolve(self._config, None,
                                                    self.episode)
                if res:
                    url = res
                if url == self.__episode.url:
                    # don't modify custom urls (#635 - vimeo breaks if * is unescaped)
                    url = url.strip()
                    url = util.iri_to_url(url)

                logger.info("Downloading %s", url)
                downloader = DownloadURLOpener(self.__episode.channel)

                # HTTP Status codes for which we retry the download
                retry_codes = (408, 418, 504, 598, 599)
                max_retries = max(0, self._config.auto.retries)

                # Retry the download on timeout (bug 1013)
                for retry in range(max_retries + 1):
                    if retry > 0:
                        logger.info('Retrying download of %s (%d)', url, retry)
                        time.sleep(1)

                    try:
                        headers, real_url = downloader.retrieve_resume(
                            url, self.tempname, reporthook=self.status_updated)
                        # If we arrive here, the download was successful
                        break
                    except urllib.error.ContentTooShortError as ctse:
                        if retry < max_retries:
                            logger.info('Content too short: %s - will retry.',
                                        url)
                            continue
                        raise
                    except socket.timeout as tmout:
                        if retry < max_retries:
                            logger.info('Socket timeout: %s - will retry.',
                                        url)
                            continue
                        raise
                    except gPodderDownloadHTTPError as http:
                        if retry < max_retries and http.error_code in retry_codes:
                            logger.info('HTTP error %d: %s - will retry.',
                                        http.error_code, url)
                            continue
                        raise

            new_mimetype = headers.get('content-type',
                                       self.__episode.mime_type)
            old_mimetype = self.__episode.mime_type
            _basename, ext = os.path.splitext(self.filename)
            if new_mimetype != old_mimetype or util.wrong_extension(ext):
                logger.info('Updating mime type: %s => %s', old_mimetype,
                            new_mimetype)
                old_extension = self.__episode.extension()
                self.__episode.mime_type = new_mimetype
                new_extension = self.__episode.extension()

                # If the desired filename extension changed due to the new
                # mimetype, we force an update of the local filename to fix the
                # extension.
                if old_extension != new_extension or util.wrong_extension(ext):
                    self.filename = self.__episode.local_filename(
                        create=True, force_update=True)

            # In some cases, the redirect of a URL causes the real filename to
            # be revealed in the final URL (e.g. http://gpodder.org/bug/1423)
            if real_url != url and not util.is_known_redirecter(real_url):
                realname, realext = util.filename_from_url(real_url)

                # Only update from redirect if the redirected-to filename has
                # a proper extension (this is needed for e.g. YouTube)
                if not util.wrong_extension(realext):
                    real_filename = ''.join((realname, realext))
                    self.filename = self.__episode.local_filename(
                        create=True, force_update=True, template=real_filename)
                    logger.info(
                        'Download was redirected (%s). New filename: %s',
                        real_url, os.path.basename(self.filename))

            # Look at the Content-disposition header; use if if available
            disposition_filename = get_header_param(headers, 'filename',
                                                    'content-disposition')

            # Some servers do send the content-disposition header, but provide
            # an empty filename, resulting in an empty string here (bug 1440)
            if disposition_filename is not None and disposition_filename != '':
                # The server specifies a download filename - try to use it
                # filename_from_url to remove query string; see #591
                fn, ext = util.filename_from_url(disposition_filename)
                logger.debug(
                    "converting disposition filename '%s' to local filename '%s%s'",
                    disposition_filename, fn, ext)
                disposition_filename = fn + ext
                self.filename = self.__episode.local_filename(
                    create=True,
                    force_update=True,
                    template=disposition_filename)
                new_mimetype, encoding = mimetypes.guess_type(self.filename)
                if new_mimetype is not None:
                    logger.info('Using content-disposition mimetype: %s',
                                new_mimetype)
                    self.__episode.mime_type = new_mimetype

            # Re-evaluate filename and tempname to take care of podcast renames
            # while downloads are running (which will change both file names)
            self.filename = self.__episode.local_filename(create=False)
            self.tempname = os.path.join(os.path.dirname(self.filename),
                                         os.path.basename(self.tempname))
            shutil.move(self.tempname, self.filename)

            # Model- and database-related updates after a download has finished
            self.__episode.on_downloaded(self.filename)
        except DownloadCancelledException:
            logger.info('Download has been cancelled/paused: %s', self)
            if self.status == DownloadTask.CANCELLED:
                util.delete_file(self.tempname)
                self.progress = 0.0
                self.speed = 0.0
        except urllib.error.ContentTooShortError as ctse:
            self.status = DownloadTask.FAILED
            self.error_message = _('Missing content from server')
        except IOError as ioe:
            logger.error('%s while downloading "%s": %s',
                         ioe.strerror,
                         self.__episode.title,
                         ioe.filename,
                         exc_info=True)
            self.status = DownloadTask.FAILED
            d = {'error': ioe.strerror, 'filename': ioe.filename}
            self.error_message = _('I/O Error: %(error)s: %(filename)s') % d
        except gPodderDownloadHTTPError as gdhe:
            logger.error('HTTP %s while downloading "%s": %s',
                         gdhe.error_code,
                         self.__episode.title,
                         gdhe.error_message,
                         exc_info=True)
            self.status = DownloadTask.FAILED
            d = {'code': gdhe.error_code, 'message': gdhe.error_message}
            self.error_message = _('HTTP Error %(code)s: %(message)s') % d
        except Exception as e:
            self.status = DownloadTask.FAILED
            logger.error('Download failed: %s', str(e), exc_info=True)
            self.error_message = _('Error: %s') % (str(e), )

        if self.status == DownloadTask.DOWNLOADING:
            # Everything went well - we're done
            self.status = DownloadTask.DONE
            if self.total_size <= 0:
                self.total_size = util.calculate_size(self.filename)
                logger.info('Total size updated to %d', self.total_size)
            self.progress = 1.0
            gpodder.user_extensions.on_episode_downloaded(self.__episode)
            return True

        self.speed = 0.0

        # We finished, but not successfully (at least not really)
        return False
Пример #13
0
    def from_feedparser_entry(cls, entry, channel):
        episode = cls(channel)
        episode.guid = entry.get('id', '')

        # Replace multi-space and newlines with single space (Maemo bug 11173)
        episode.title = re.sub('\s+', ' ', entry.get('title', ''))
        episode.link = entry.get('link', '')
        if 'content' in entry and len(entry['content']) and \
                entry['content'][0].get('type', '') == 'text/html':
            episode.description = entry['content'][0].value
        else:
            episode.description = entry.get('summary', '')

        # Fallback to subtitle if summary is not available
        if not episode.description:
            episode.description = entry.get('subtitle', '')

        try:
            total_time = 0

            # Parse iTunes-specific podcast duration metadata
            itunes_duration = entry.get('itunes_duration', '')
            if itunes_duration:
                total_time = util.parse_time(itunes_duration)

            # Parse time from YouTube descriptions if it's a YouTube feed
            if youtube.is_youtube_guid(episode.guid):
                result = re.search(r'Time:<[^>]*>\n<[^>]*>([:0-9]*)<',
                        episode.description)
                if result:
                    youtube_duration = result.group(1)
                    total_time = util.parse_time(youtube_duration)

            episode.total_time = total_time
        except:
            pass

        episode.published = feedcore.get_pubdate(entry)

        enclosures = entry.get('enclosures', [])
        media_rss_content = entry.get('media_content', [])
        audio_available = any(e.get('type', '').startswith('audio/') \
                for e in enclosures + media_rss_content)
        video_available = any(e.get('type', '').startswith('video/') \
                for e in enclosures + media_rss_content)

        # XXX: Make it possible for hooks/extensions to override this by
        # giving them a list of enclosures and the "self" object (podcast)
        # and letting them sort and/or filter the list of enclosures to
        # get the desired enclosure picked by the algorithm below.
        filter_and_sort_enclosures = lambda x: x

        # read the flattr auto-url, if exists
        payment_info = [link['href'] for link in entry.get('links', [])
            if link['rel'] == 'payment']
        if payment_info:
            episode.payment_url = sorted(payment_info, key=get_payment_priority)[0]

        # Enclosures
        for e in filter_and_sort_enclosures(enclosures):
            episode.mime_type = e.get('type', 'application/octet-stream')
            if episode.mime_type == '':
                # See Maemo bug 10036
                logger.warn('Fixing empty mimetype in ugly feed')
                episode.mime_type = 'application/octet-stream'

            if '/' not in episode.mime_type:
                continue

            # Skip images in feeds if audio or video is available (bug 979)
            # This must (and does) also look in Media RSS enclosures (bug 1430)
            if episode.mime_type.startswith('image/') and \
                    (audio_available or video_available):
                continue

            # If we have audio or video available later on, skip
            # 'application/octet-stream' data types (fixes Linux Outlaws)
            if episode.mime_type == 'application/octet-stream' and \
                    (audio_available or video_available):
                continue

            episode.url = util.normalize_feed_url(e.get('href', ''))
            if not episode.url:
                continue

            try:
                episode.file_size = int(e.length) or -1
            except:
                episode.file_size = -1

            return episode

        # Media RSS content
        for m in filter_and_sort_enclosures(media_rss_content):
            episode.mime_type = m.get('type', 'application/octet-stream')
            if '/' not in episode.mime_type:
                continue

            # Skip images in Media RSS if we have audio/video (bug 1444)
            if episode.mime_type.startswith('image/') and \
                    (audio_available or video_available):
                continue

            episode.url = util.normalize_feed_url(m.get('url', ''))
            if not episode.url:
                continue

            try:
                episode.file_size = int(m.get('filesize', 0)) or -1
            except:
                episode.file_size = -1

            try:
                episode.total_time = int(m.get('duration', 0)) or 0
            except:
                episode.total_time = 0

            return episode

        # Brute-force detection of any links
        for l in entry.get('links', ()):
            episode.url = util.normalize_feed_url(l.get('href', ''))
            if not episode.url:
                continue

            if (youtube.is_video_link(episode.url) or \
                    vimeo.is_video_link(episode.url)):
                return episode

            # Check if we can resolve this link to a audio/video file
            filename, extension = util.filename_from_url(episode.url)
            file_type = util.file_type_by_extension(extension)
            if file_type is None and hasattr(l, 'type'):
                extension = util.extension_from_mimetype(l.type)
                file_type = util.file_type_by_extension(extension)

            # The link points to a audio or video file - use it!
            if file_type is not None:
                return episode

        return None
Пример #14
0
    def run(self):
        # Speed calculation (re-)starts here
        self.__start_time = 0
        self.__start_blocks = 0

        # If the download has already been cancelled, skip it
        if self.status == DownloadTask.CANCELLED:
            util.delete_file(self.tempname)
            self.progress = 0.0
            self.speed = 0.0
            return False

        # We only start this download if its status is "queued"
        if self.status != DownloadTask.QUEUED:
            return False

        # We are downloading this file right now
        self.status = DownloadTask.DOWNLOADING
        self._notification_shown = False

        try:
            # Resolve URL and start downloading the episode
            url = youtube.get_real_download_url(self.__episode.url, \
                    self._config.youtube_preferred_fmt_id)
            downloader =  DownloadURLOpener(self.__episode.channel)
            headers, real_url = downloader.retrieve_resume(url, \
                    self.tempname, reporthook=self.status_updated)

            new_mimetype = headers.get('content-type', self.__episode.mime_type)
            old_mimetype = self.__episode.mime_type
            _basename, ext = os.path.splitext(self.filename)
            if new_mimetype != old_mimetype or util.wrong_extension(ext):
                logger.info('Updating mime type: %s => %s', old_mimetype, new_mimetype)
                old_extension = self.__episode.extension()
                self.__episode.mime_type = new_mimetype
                new_extension = self.__episode.extension()

                # If the desired filename extension changed due to the new
                # mimetype, we force an update of the local filename to fix the
                # extension.
                if old_extension != new_extension or util.wrong_extension(ext):
                    self.filename = self.__episode.local_filename(create=True, force_update=True)

            # In some cases, the redirect of a URL causes the real filename to
            # be revealed in the final URL (e.g. http://gpodder.org/bug/1423)
            if real_url != url and not util.is_known_redirecter(real_url):
                realname, realext = util.filename_from_url(real_url)

                # Only update from redirect if the redirected-to filename has
                # a proper extension (this is needed for e.g. YouTube)
                if not util.wrong_extension(realext):
                    real_filename = ''.join((realname, realext))
                    self.filename = self.__episode.local_filename(create=True,
                            force_update=True, template=real_filename)
                    logger.info('Download was redirected (%s). New filename: %s',
                            real_url, os.path.basename(self.filename))

            # Look at the Content-disposition header; use if if available
            disposition_filename = get_header_param(headers, \
                    'filename', 'content-disposition')

            # Some servers do send the content-disposition header, but provide
            # an empty filename, resulting in an empty string here (bug 1440)
            if disposition_filename is not None and disposition_filename != '':
                # The server specifies a download filename - try to use it
                disposition_filename = os.path.basename(disposition_filename)
                self.filename = self.__episode.local_filename(create=True, \
                        force_update=True, template=disposition_filename)
                new_mimetype, encoding = mimetypes.guess_type(self.filename)
                if new_mimetype is not None:
                    logger.info('Using content-disposition mimetype: %s',
                            new_mimetype)
                    self.__episode.set_mimetype(new_mimetype, commit=True)

            # Re-evaluate filename and tempname to take care of podcast renames
            # while downloads are running (which will change both file names)
            self.filename = self.__episode.local_filename(create=False)
            self.tempname = os.path.join(os.path.dirname(self.filename),
                    os.path.basename(self.tempname))
            shutil.move(self.tempname, self.filename)

            # Model- and database-related updates after a download has finished
            self.__episode.on_downloaded(self.filename)
        except DownloadCancelledException:
            logger.info('Download has been cancelled/paused: %s', self)
            if self.status == DownloadTask.CANCELLED:
                util.delete_file(self.tempname)
                self.progress = 0.0
                self.speed = 0.0
        except urllib.ContentTooShortError, ctse:
            self.status = DownloadTask.FAILED
            self.error_message = _('Missing content from server')
Пример #15
0
    def run(self):
        # Speed calculation (re-)starts here
        self.__start_time = 0
        self.__start_blocks = 0

        # If the download has already been cancelled, skip it
        if self.status == DownloadTask.CANCELLED:
            util.delete_file(self.tempname)
            self.progress = 0.0
            self.speed = 0.0
            return False

        # We only start this download if its status is "queued"
        if self.status != DownloadTask.QUEUED:
            return False

        # We are downloading this file right now
        self.status = DownloadTask.DOWNLOADING
        self._notification_shown = False

        try:
            # Resolve URL and start downloading the episode
            url = registry.download_url.resolve(self.__episode, self.url, self._config)

            downloader = DownloadURLOpener(self.__episode.podcast)

            # HTTP Status codes for which we retry the download
            retry_codes = (408, 418, 504, 598, 599)
            max_retries = max(0, self._config.auto.retries)

            # Retry the download on timeout (bug 1013)
            for retry in range(max_retries + 1):
                if retry > 0:
                    logger.info('Retrying download of %s (%d)', url, retry)
                    time.sleep(1)

                try:
                    headers, real_url = downloader.retrieve_resume(url, self.tempname,
                                                                   reporthook=self.status_updated)
                    # If we arrive here, the download was successful
                    break
                except urllib.error.ContentTooShortError as ctse:
                    if retry < max_retries:
                        logger.info('Content too short: %s - will retry.', url)
                        continue
                    raise
                except socket.timeout as tmout:
                    if retry < max_retries:
                        logger.info('Socket timeout: %s - will retry.', url)
                        continue
                    raise
                except gPodderDownloadHTTPError as http:
                    if retry < max_retries and http.error_code in retry_codes:
                        logger.info('HTTP error %d: %s - will retry.', http.error_code, url)
                        continue
                    raise

            new_mimetype = headers.get('content-type', self.__episode.mime_type)
            old_mimetype = self.__episode.mime_type
            _basename, ext = os.path.splitext(self.filename)
            if new_mimetype != old_mimetype or util.wrong_extension(ext):
                logger.info('Updating mime type: %s => %s', old_mimetype, new_mimetype)
                old_extension = self.__episode.extension()
                self.__episode.mime_type = new_mimetype
                new_extension = self.__episode.extension()

                # If the desired filename extension changed due to the new
                # mimetype, we force an update of the local filename to fix the
                # extension.
                if old_extension != new_extension or util.wrong_extension(ext):
                    self.filename = self.__episode.local_filename(create=True, force_update=True)

            # In some cases, the redirect of a URL causes the real filename to
            # be revealed in the final URL (e.g. http://gpodder.org/bug/1423)
            if real_url != url:
                realname, realext = util.filename_from_url(real_url)

                # Only update from redirect if the redirected-to filename has
                # a proper extension (this is needed for e.g. YouTube)
                if not util.wrong_extension(realext):
                    real_filename = ''.join((realname, realext))
                    self.filename = self.__episode.local_filename(create=True, force_update=True,
                                                                  template=real_filename)
                    logger.info('Download was redirected (%s). New filename: %s', real_url,
                                os.path.basename(self.filename))

            # Look at the Content-disposition header; use if if available
            disposition_filename = get_header_param(headers, 'filename', 'content-disposition')

            # Some servers do send the content-disposition header, but provide
            # an empty filename, resulting in an empty string here (bug 1440)
            if disposition_filename is not None and disposition_filename != '':
                # The server specifies a download filename - try to use it
                disposition_filename = os.path.basename(disposition_filename)
                self.filename = self.__episode.local_filename(create=True, force_update=True,
                                                              template=disposition_filename)
                new_mimetype, encoding = mimetypes.guess_type(self.filename)
                if new_mimetype is not None:
                    logger.info('Using content-disposition mimetype: %s', new_mimetype)
                    self.__episode.mime_type = new_mimetype

            # Re-evaluate filename and tempname to take care of podcast renames
            # while downloads are running (which will change both file names)
            self.filename = self.__episode.local_filename(create=False)
            self.tempname = os.path.join(os.path.dirname(self.filename),
                                         os.path.basename(self.tempname))
            shutil.move(self.tempname, self.filename)

            # Model- and database-related updates after a download has finished
            self.__episode.on_downloaded(self.filename)
        except DownloadCancelledException:
            logger.info('Download has been cancelled/paused: %s', self)
            if self.status == DownloadTask.CANCELLED:
                util.delete_file(self.tempname)
                self.progress = 0.0
                self.speed = 0.0
        except urllib.error.ContentTooShortError as ctse:
            self.status = DownloadTask.FAILED
            self.error_message = 'Missing content from server'
        except IOError as ioe:
            logger.error('%s while downloading "%s": %s', ioe.strerror,
                         self.__episode.title, ioe.filename, exc_info=True)
            self.status = DownloadTask.FAILED
            d = {'error': ioe.strerror, 'filename': ioe.filename}
            self.error_message = 'I/O Error: %(error)s: %(filename)s' % d
        except gPodderDownloadHTTPError as gdhe:
            logger.error('HTTP %s while downloading "%s": %s',
                         gdhe.error_code, self.__episode.title, gdhe.error_message, exc_info=True)
            self.status = DownloadTask.FAILED
            d = {'code': gdhe.error_code, 'message': gdhe.error_message}
            self.error_message = 'HTTP Error %(code)s: %(message)s' % d
        except Exception as e:
            self.status = DownloadTask.FAILED
            logger.error('Download failed: %s', str(e), exc_info=True)
            self.error_message = 'Error: %s' % (str(e),)

        if self.status == DownloadTask.DOWNLOADING:
            # Everything went well - we're done
            self.status = DownloadTask.DONE
            if self.total_size <= 0:
                self.total_size = util.calculate_size(self.filename)
                logger.info('Total size updated to %d', self.total_size)
            self.progress = 1.0
            registry.after_download.call_each(self.__episode)
            return True

        self.speed = 0.0

        # We finished, but not successfully (at least not really)
        return False
Пример #16
0
    def local_filename(self,
                       create,
                       force_update=False,
                       check_only=False,
                       template=None,
                       return_wanted_filename=False):
        """Get (and possibly generate) the local saving filename

        Pass create=True if you want this function to generate a
        new filename if none exists. You only want to do this when
        planning to create/download the file after calling this function.

        Normally, you should pass create=False. This will only
        create a filename when the file already exists from a previous
        version of gPodder (where we used md5 filenames). If the file
        does not exist (and the filename also does not exist), this
        function will return None.

        If you pass force_update=True to this function, it will try to
        find a new (better) filename and move the current file if this
        is the case. This is useful if (during the download) you get
        more information about the file, e.g. the mimetype and you want
        to include this information in the file name generation process.

        If check_only=True is passed to this function, it will never try
        to rename the file, even if would be a good idea. Use this if you
        only want to check if a file exists.

        If "template" is specified, it should be a filename that is to
        be used as a template for generating the "real" filename.

        The generated filename is stored in the database for future access.

        If return_wanted_filename is True, the filename will not be written to
        the database, but simply returned by this function (for use by the
        "import external downloads" feature).
        """
        if self.download_filename is None and (check_only or not create):
            return None

        ext = self.extension(may_call_local_filename=False).encode(
            'utf-8', 'ignore')

        if not check_only and (force_update or not self.download_filename):
            # Avoid and catch gPodder bug 1440 and similar situations
            if template == '':
                logger.warn('Empty template. Report this podcast URL %s',
                            self.channel.url)
                template = None

            # Try to find a new filename for the current file
            if template is not None:
                # If template is specified, trust the template's extension
                episode_filename, ext = os.path.splitext(template)
            else:
                episode_filename, _ = util.filename_from_url(self.url)
            fn_template = util.sanitize_filename(episode_filename,
                                                 self.MAX_FILENAME_LENGTH)

            if 'redirect' in fn_template and template is None:
                # This looks like a redirection URL - force URL resolving!
                logger.warn('Looks like a redirection to me: %s', self.url)
                url = util.get_real_url(self.channel.authenticate_url(
                    self.url))
                logger.info('Redirection resolved to: %s', url)
                episode_filename, _ = util.filename_from_url(url)
                fn_template = util.sanitize_filename(episode_filename,
                                                     self.MAX_FILENAME_LENGTH)

            # Use title for YouTube, Vimeo and Soundcloud downloads
            if (youtube.is_video_link(self.url)
                    or vimeo.is_video_link(self.url)
                    or escapist_videos.is_video_link(self.url)
                    or fn_template == 'stream'):
                sanitized = util.sanitize_filename(self.title,
                                                   self.MAX_FILENAME_LENGTH)
                if sanitized:
                    fn_template = sanitized

            # If the basename is empty, use the md5 hexdigest of the URL
            if not fn_template or fn_template.startswith('redirect.'):
                logger.error('Report this feed: Podcast %s, episode %s',
                             self.channel.url, self.url)
                fn_template = hashlib.md5(self.url).hexdigest()

            # Find a unique filename for this episode
            wanted_filename = self.find_unique_file_name(fn_template, ext)

            if return_wanted_filename:
                # return the calculated filename without updating the database
                return wanted_filename

            # The old file exists, but we have decided to want a different filename
            if self.download_filename and wanted_filename != self.download_filename:
                # there might be an old download folder crawling around - move it!
                new_file_name = os.path.join(self.channel.save_dir,
                                             wanted_filename)
                old_file_name = os.path.join(self.channel.save_dir,
                                             self.download_filename)
                if os.path.exists(
                        old_file_name) and not os.path.exists(new_file_name):
                    logger.info('Renaming %s => %s', old_file_name,
                                new_file_name)
                    os.rename(old_file_name, new_file_name)
                elif force_update and not os.path.exists(old_file_name):
                    # When we call force_update, the file might not yet exist when we
                    # call it from the downloading code before saving the file
                    logger.info('Choosing new filename: %s', new_file_name)
                else:
                    logger.warn('%s exists or %s does not', new_file_name,
                                old_file_name)
                logger.info('Updating filename of %s to "%s".', self.url,
                            wanted_filename)
            elif self.download_filename is None:
                logger.info('Setting download filename: %s', wanted_filename)
            self.download_filename = wanted_filename
            self.save()

        return os.path.join(util.sanitize_encoding(self.channel.save_dir),
                            util.sanitize_encoding(self.download_filename))
Пример #17
0
    def run(self):
        # Speed calculation (re-)starts here
        self.__start_time = 0
        self.__start_blocks = 0

        # If the download has already been cancelled, skip it
        if self.status == DownloadTask.CANCELLED:
            util.delete_file(self.tempname)
            self.progress = 0.0
            self.speed = 0.0
            return False

        # We only start this download if its status is "downloading"
        if self.status != DownloadTask.DOWNLOADING:
            return False

        # We are downloading this file right now
        self.status = DownloadTask.DOWNLOADING
        self._notification_shown = False

        try:
            # Resolve URL and start downloading the episode
            fmt_ids = youtube.get_fmt_ids(self._config.youtube)
            url = youtube.get_real_download_url(self.__episode.url, fmt_ids)
            url = vimeo.get_real_download_url(url,
                                              self._config.vimeo.fileformat)
            url = escapist_videos.get_real_download_url(url)
            url = url.strip()

            # Properly escapes Unicode characters in the URL path section
            # TODO: Explore if this should also handle the domain
            # Based on: http://stackoverflow.com/a/18269491/1072626
            # In response to issue: https://github.com/gpodder/gpodder/issues/232
            def iri_to_url(url):
                url = urllib.parse.urlsplit(url)
                url = list(url)
                # First unquote to avoid escaping quoted content
                url[2] = urllib.parse.unquote(url[2])
                url[2] = urllib.parse.quote(url[2])
                url = urllib.parse.urlunsplit(url)
                return url

            url = iri_to_url(url)

            downloader = DownloadURLOpener(self.__episode.channel)

            # HTTP Status codes for which we retry the download
            retry_codes = (408, 418, 504, 598, 599)
            max_retries = max(0, self._config.auto.retries)

            # Retry the download on timeout (bug 1013)
            for retry in range(max_retries + 1):
                if retry > 0:
                    logger.info('Retrying download of %s (%d)', url, retry)
                    time.sleep(1)

                try:
                    headers, real_url = downloader.retrieve_resume(
                        url, self.tempname, reporthook=self.status_updated)
                    # If we arrive here, the download was successful
                    break
                except urllib.error.ContentTooShortError as ctse:
                    if retry < max_retries:
                        logger.info('Content too short: %s - will retry.', url)
                        continue
                    raise
                except socket.timeout as tmout:
                    if retry < max_retries:
                        logger.info('Socket timeout: %s - will retry.', url)
                        continue
                    raise
                except gPodderDownloadHTTPError as http:
                    if retry < max_retries and http.error_code in retry_codes:
                        logger.info('HTTP error %d: %s - will retry.',
                                    http.error_code, url)
                        continue
                    raise

            new_mimetype = headers.get('content-type',
                                       self.__episode.mime_type)
            old_mimetype = self.__episode.mime_type
            _basename, ext = os.path.splitext(self.filename)
            if new_mimetype != old_mimetype or util.wrong_extension(ext):
                logger.info('Updating mime type: %s => %s', old_mimetype,
                            new_mimetype)
                old_extension = self.__episode.extension()
                self.__episode.mime_type = new_mimetype
                new_extension = self.__episode.extension()

                # If the desired filename extension changed due to the new
                # mimetype, we force an update of the local filename to fix the
                # extension.
                if old_extension != new_extension or util.wrong_extension(ext):
                    self.filename = self.__episode.local_filename(
                        create=True, force_update=True)

            # In some cases, the redirect of a URL causes the real filename to
            # be revealed in the final URL (e.g. http://gpodder.org/bug/1423)
            if real_url != url and not util.is_known_redirecter(real_url):
                realname, realext = util.filename_from_url(real_url)

                # Only update from redirect if the redirected-to filename has
                # a proper extension (this is needed for e.g. YouTube)
                if not util.wrong_extension(realext):
                    real_filename = ''.join((realname, realext))
                    self.filename = self.__episode.local_filename(
                        create=True, force_update=True, template=real_filename)
                    logger.info(
                        'Download was redirected (%s). New filename: %s',
                        real_url, os.path.basename(self.filename))

            # Look at the Content-disposition header; use if if available
            disposition_filename = get_header_param(headers, 'filename',
                                                    'content-disposition')

            if disposition_filename is not None:
                try:
                    disposition_filename.decode('ascii')
                except:
                    logger.warn(
                        'Content-disposition header contains non-ASCII characters - ignoring'
                    )
                    disposition_filename = None

            # Some servers do send the content-disposition header, but provide
            # an empty filename, resulting in an empty string here (bug 1440)
            if disposition_filename is not None and disposition_filename != '':
                # The server specifies a download filename - try to use it
                disposition_filename = os.path.basename(disposition_filename)
                self.filename = self.__episode.local_filename(create=True, \
                        force_update=True, template=disposition_filename)
                new_mimetype, encoding = mimetypes.guess_type(self.filename)
                if new_mimetype is not None:
                    logger.info('Using content-disposition mimetype: %s',
                                new_mimetype)
                    self.__episode.mime_type = new_mimetype

            # Re-evaluate filename and tempname to take care of podcast renames
            # while downloads are running (which will change both file names)
            self.filename = self.__episode.local_filename(create=False)
            self.tempname = os.path.join(os.path.dirname(self.filename),
                                         os.path.basename(self.tempname))
            shutil.move(self.tempname, self.filename)

            # Model- and database-related updates after a download has finished
            self.__episode.on_downloaded(self.filename)
        except DownloadCancelledException:
            logger.info('Download has been cancelled/paused: %s', self)
            if self.status == DownloadTask.CANCELLED:
                util.delete_file(self.tempname)
                self.progress = 0.0
                self.speed = 0.0
        except urllib.error.ContentTooShortError as ctse:
            self.status = DownloadTask.FAILED
            self.error_message = _('Missing content from server')
        except IOError as ioe:
            logger.error('%s while downloading "%s": %s',
                         ioe.strerror,
                         self.__episode.title,
                         ioe.filename,
                         exc_info=True)
            self.status = DownloadTask.FAILED
            d = {'error': ioe.strerror, 'filename': ioe.filename}
            self.error_message = _('I/O Error: %(error)s: %(filename)s') % d
        except gPodderDownloadHTTPError as gdhe:
            logger.error('HTTP %s while downloading "%s": %s',
                         gdhe.error_code,
                         self.__episode.title,
                         gdhe.error_message,
                         exc_info=True)
            self.status = DownloadTask.FAILED
            d = {'code': gdhe.error_code, 'message': gdhe.error_message}
            self.error_message = _('HTTP Error %(code)s: %(message)s') % d
        except Exception as e:
            self.status = DownloadTask.FAILED
            logger.error('Download failed: %s', str(e), exc_info=True)
            self.error_message = _('Error: %s') % (str(e), )

        if self.status == DownloadTask.DOWNLOADING:
            # Everything went well - we're done
            self.status = DownloadTask.DONE
            if self.total_size <= 0:
                self.total_size = util.calculate_size(self.filename)
                logger.info('Total size updated to %d', self.total_size)
            self.progress = 1.0
            gpodder.user_extensions.on_episode_downloaded(self.__episode)
            return True

        self.speed = 0.0

        # We finished, but not successfully (at least not really)
        return False
Пример #18
0
    def local_filename(self, create, force_update=False, check_only=False,
            template=None):
        """Get (and possibly generate) the local saving filename

        Pass create=True if you want this function to generate a
        new filename if none exists. You only want to do this when
        planning to create/download the file after calling this function.

        Normally, you should pass create=False. This will only
        create a filename when the file already exists from a previous
        version of gPodder (where we used md5 filenames). If the file
        does not exist (and the filename also does not exist), this
        function will return None.

        If you pass force_update=True to this function, it will try to
        find a new (better) filename and move the current file if this
        is the case. This is useful if (during the download) you get
        more information about the file, e.g. the mimetype and you want
        to include this information in the file name generation process.

        If check_only=True is passed to this function, it will never try
        to rename the file, even if would be a good idea. Use this if you
        only want to check if a file exists.

        If "template" is specified, it should be a filename that is to
        be used as a template for generating the "real" filename.

        The generated filename is stored in the database for future access.
        """
        ext = self.extension(may_call_local_filename=False).encode('utf-8', 'ignore')

        # For compatibility with already-downloaded episodes, we
        # have to know md5 filenames if they are downloaded already
        urldigest = hashlib.md5(self.url).hexdigest()

        if not create and self.filename is None:
            urldigest_filename = os.path.join(self.channel.save_dir, urldigest+ext)
            if os.path.exists(urldigest_filename):
                # The file exists, so set it up in our database
                log('Recovering pre-0.15.0 file: %s', urldigest_filename, sender=self)
                self.filename = urldigest+ext
                self.auto_filename = 1
                self.save()
                return urldigest_filename
            return None

        # We only want to check if the file exists, so don't try to
        # rename the file, even if it would be reasonable. See also:
        # http://bugs.gpodder.org/attachment.cgi?id=236
        if check_only:
            if self.filename is None:
                return None
            else:
                return os.path.join(self.channel.save_dir, self.filename)

        if self.filename is None or force_update or (self.auto_filename and self.filename == urldigest+ext):
            # Try to find a new filename for the current file
            if template is not None:
                # If template is specified, trust the template's extension
                episode_filename, ext = os.path.splitext(template)
            else:
                episode_filename, extension_UNUSED = util.filename_from_url(self.url)
            fn_template = util.sanitize_filename(episode_filename, self.MAX_FILENAME_LENGTH)

            if 'redirect' in fn_template and template is None:
                # This looks like a redirection URL - force URL resolving!
                log('Looks like a redirection to me: %s', self.url, sender=self)
                url = util.get_real_url(self.channel.authenticate_url(self.url))
                log('Redirection resolved to: %s', url, sender=self)
                (episode_filename, extension_UNUSED) = util.filename_from_url(url)
                fn_template = util.sanitize_filename(episode_filename, self.MAX_FILENAME_LENGTH)

            # Use the video title for YouTube downloads
            for yt_url in ('http://youtube.com/', 'http://www.youtube.com/'):
                if self.url.startswith(yt_url):
                    fn_template = util.sanitize_filename(os.path.basename(self.title), self.MAX_FILENAME_LENGTH)

            # If the basename is empty, use the md5 hexdigest of the URL
            if len(fn_template) == 0 or fn_template.startswith('redirect.'):
                log('Report to bugs.gpodder.org: Podcast at %s with episode URL: %s', self.channel.url, self.url, sender=self)
                fn_template = urldigest

            # Find a unique filename for this episode
            wanted_filename = self.find_unique_file_name(self.url, fn_template, ext)

            # We populate the filename field the first time - does the old file still exist?
            if self.filename is None and os.path.exists(os.path.join(self.channel.save_dir, urldigest+ext)):
                log('Found pre-0.15.0 downloaded file: %s', urldigest, sender=self)
                self.filename = urldigest+ext

            # The old file exists, but we have decided to want a different filename
            if self.filename is not None and wanted_filename != self.filename:
                # there might be an old download folder crawling around - move it!
                new_file_name = os.path.join(self.channel.save_dir, wanted_filename)
                old_file_name = os.path.join(self.channel.save_dir, self.filename)
                if os.path.exists(old_file_name) and not os.path.exists(new_file_name):
                    log('Renaming %s => %s', old_file_name, new_file_name, sender=self)
                    os.rename(old_file_name, new_file_name)
                elif force_update and not os.path.exists(old_file_name):
                    # When we call force_update, the file might not yet exist when we
                    # call it from the downloading code before saving the file
                    log('Choosing new filename: %s', new_file_name, sender=self)
                else:
                    log('Warning: %s exists or %s does not.', new_file_name, old_file_name, sender=self)
                log('Updating filename of %s to "%s".', self.url, wanted_filename, sender=self)
            elif self.filename is None:
                log('Setting filename to "%s".', wanted_filename, sender=self)
            else:
                log('Should update filename. Stays the same (%s). Good!', \
                        wanted_filename, sender=self)
            self.filename = wanted_filename
            self.save()
            self.db.commit()

        return os.path.join(self.channel.save_dir, self.filename)
Пример #19
0
    def from_feedparser_entry(entry, channel):
        episode = PodcastEpisode(channel)

        episode.title = entry.get('title', '')
        episode.link = entry.get('link', '')
        episode.description = entry.get('summary', '')

        # Fallback to subtitle if summary is not available0
        if not episode.description:
            episode.description = entry.get('subtitle', '')

        episode.guid = entry.get('id', '')
        if entry.get('updated_parsed', None):
            episode.pubDate = rfc822.mktime_tz(entry.updated_parsed+(0,))

        # Enclosures
        for e in entry.get('enclosures', ()):
            episode.mimetype = e.get('type', 'application/octet-stream')
            if '/' not in episode.mimetype:
                continue

            episode.url = util.normalize_feed_url(e.get('href', ''))
            if not episode.url:
                continue

            try:
                episode.length = int(e.length) or -1
            except:
                episode.length = -1

            return episode

        # Media RSS content
        for m in entry.get('media_content', ()):
            episode.mimetype = m.get('type', 'application/octet-stream')
            if '/' not in episode.mimetype:
                continue

            episode.url = util.normalize_feed_url(m.get('url', ''))
            if not episode.url:
                continue

            try:
                episode.length = int(m.fileSize) or -1
            except:
                episode.length = -1

            return episode

        # Brute-force detection of any links
        for l in entry.get('links', ()):
            episode.url = util.normalize_feed_url(l.get('href', ''))
            if not episode.url:
                continue

            if youtube.is_video_link(episode.url):
                return episode

            # Check if we can resolve this link to a audio/video file
            filename, extension = util.filename_from_url(episode.url)
            file_type = util.file_type_by_extension(extension)
            if file_type is None and hasattr(l, 'type'):
                extension = util.extension_from_mimetype(l.type)
                file_type = util.file_type_by_extension(extension)

            # The link points to a audio or video file - use it!
            if file_type is not None:
                return episode

        # Scan MP3 links in description text
        mp3s = re.compile(r'http://[^"]*\.mp3')
        for content in entry.get('content', ()):
            html = content.value
            for match in mp3s.finditer(html):
                episode.url = match.group(0)
                return episode

        return None
Пример #20
0
    def from_feedparser_entry(entry, channel, mimetype_prefs=''):
        episode = PodcastEpisode(channel)
        #log("from_feedparser_entry(%s)" % entry.get('title',''))
        # Replace multi-space and newlines with single space (Maemo bug 11173)
        episode.title = re.sub('\s+', ' ', entry.get('title', ''))
        episode.link = entry.get('link', '')
        #print("summary=%s" % entry.summary)
        if 'content' in entry and len(entry['content']) and \
                entry['content'][0].get('type', '') == 'text/html':
            episode.description = entry['content'][0].value
        else:
            episode.description = entry.get('summary', '')

        try:
            # Parse iTunes-specific podcast duration metadata
            total_time = util.parse_time(entry.get('itunes_duration', ''))
            episode.total_time = total_time
        except:
            pass

        # Fallback to subtitle if summary is not available0
        if not episode.description:
            episode.description = entry.get('subtitle', '')
        #print("episode %s description=%s" % (episode.title,episode.description))
        episode.guid = entry.get('id', '')
        if entry.get('updated_parsed', None):
            episode.pubDate = rfc822.mktime_tz(entry.updated_parsed+(0,))

        enclosures = entry.get('enclosures', [])
        media_rss_content = entry.get('media_content', [])
        audio_available = any(e.get('type', '').startswith('audio/') \
                for e in enclosures + media_rss_content)
        video_available = any(e.get('type', '').startswith('video/') \
                for e in enclosures + media_rss_content)

        # Create the list of preferred mime types
        mimetype_prefs = mimetype_prefs.split(',')

        def calculate_preference_value(enclosure):
            """Calculate preference value of an enclosure

            This is based on mime types and allows users to prefer
            certain mime types over others (e.g. MP3 over AAC, ...)
            """
            mimetype = enclosure.get('type', None)
            try:
                # If the mime type is found, return its (zero-based) index
                return mimetype_prefs.index(mimetype)
            except ValueError:
                # If it is not found, assume it comes after all listed items
                return len(mimetype_prefs)

        # Enclosures
        for e in sorted(enclosures, key=calculate_preference_value):
            episode.mimetype = e.get('type', 'application/octet-stream')
            if episode.mimetype == '':
                # See Maemo bug 10036
                log('Fixing empty mimetype in ugly feed', sender=episode)
                episode.mimetype = 'application/octet-stream'

            if '/' not in episode.mimetype:
                continue

            # Skip images in feeds if audio or video is available (bug 979)
            # This must (and does) also look in Media RSS enclosures (bug 1430)
            if episode.mimetype.startswith('image/') and \
                    (audio_available or video_available):
                continue

            episode.url = util.normalize_feed_url(e.get('href', ''))
            if not episode.url:
                continue

            try:
                episode.length = int(e.length) or -1
            except:
                episode.length = -1

            return episode

        # Media RSS content
        for m in sorted(media_rss_content, key=calculate_preference_value):
            episode.mimetype = m.get('type', 'application/octet-stream')
            if '/' not in episode.mimetype:
                continue

            # Skip images in Media RSS if we have audio/video (bug 1444)
            if episode.mimetype.startswith('image/') and \
                    (audio_available or video_available):
                continue

            episode.url = util.normalize_feed_url(m.get('url', ''))
            if not episode.url:
                continue

            try:
                episode.length = int(m.fileSize) or -1
            except:
                episode.length = -1

            return episode

        # Brute-force detection of any links
        for l in entry.get('links', ()):
            episode.url = util.normalize_feed_url(l.get('href', ''))
            if not episode.url:
                continue

            if youtube.is_video_link(episode.url):
                return episode

            # Check if we can resolve this link to a audio/video file
            filename, extension = util.filename_from_url(episode.url)
            file_type = util.file_type_by_extension(extension)
            if file_type is None and hasattr(l, 'type'):
                extension = util.extension_from_mimetype(l.type)
                file_type = util.file_type_by_extension(extension)

            # The link points to a audio or video file - use it!
            if file_type is not None:
                return episode

        # Scan MP3 links in description text
        mp3s = re.compile(r'http://[^"]*\.mp3')
        for content in entry.get('content', ()):
            html = content.value
            for match in mp3s.finditer(html):
                episode.url = match.group(0)
                return episode
               
        #don't return None : for non-podcast channels
        episode.state = gpodder.STATE_NORMAL
        episode.url = ''
        return episode