def downloadOpdsCatalog(self, gui, opdsCatalogUrl): print "downloading catalog: %s" % opdsCatalogUrl opdsCatalogFeed = feedparser.parse(opdsCatalogUrl) self.books = self.makeMetadataFromParsedOpds(opdsCatalogFeed.entries) self.filterBooks() QCoreApplication.processEvents() nextUrl = self.findNextUrl(opdsCatalogFeed.feed) while nextUrl is not None: nextFeed = feedparser.parse(nextUrl) self.books = self.books + self.makeMetadataFromParsedOpds(nextFeed.entries) self.filterBooks() QCoreApplication.processEvents() nextUrl = self.findNextUrl(nextFeed.feed)
def downloadOpdsRootCatalog(self, gui, opdsUrl, displayDialogOnErrors): feed = feedparser.parse(opdsUrl) if 'bozo_exception' in feed: exception = feed['bozo_exception'] message = 'Failed opening the OPDS URL ' + opdsUrl + ': ' reason = '' if hasattr(exception, 'reason') : reason = str(exception.reason) error_dialog(gui, _('Failed opening the OPDS URL'), message, reason, displayDialogOnErrors) return (None, {}) self.serverHeader = feed.headers['server'] print "serverHeader: %s" % self.serverHeader print "feed.entries: %s" % feed.entries catalogEntries = {} firstTitle = None for entry in feed.entries: title = entry.get('title', 'No title') if firstTitle is None: firstTitle = title links = entry.get('links', []) firstLink = next(iter(links), None) if firstLink is not None: print "firstLink: %s" % firstLink catalogEntries[title] = firstLink.href return (firstTitle, catalogEntries)
def downloadOpdsRootCatalog(self, gui, opdsUrl, displayDialogOnErrors): feed = feedparser.parse(opdsUrl) if "bozo_exception" in feed: exception = feed["bozo_exception"] message = "Failed opening the OPDS URL " + opdsUrl + ": " reason = "" if hasattr(exception, "reason"): reason = str(exception.reason) error_dialog(gui, _("Failed opening the OPDS URL"), message, reason, displayDialogOnErrors) return (None, {}) self.serverHeader = feed.headers["server"] print("serverHeader: %s" % self.serverHeader) print("feed.entries: %s" % feed.entries) catalogEntries = {} firstTitle = None for entry in feed.entries: title = entry.get("title", "No title") if firstTitle is None: firstTitle = title links = entry.get("links", []) firstLink = next(iter(links), None) if firstLink is not None: print("firstLink: %s" % firstLink) catalogEntries[title] = firstLink.href return (firstTitle, catalogEntries)
def downloadOpdsRootCatalog(self, gui, opdsUrl, displayDialogOnErrors): feed = feedparser.parse(opdsUrl) if 'bozo_exception' in feed: exception = feed['bozo_exception'] message = 'Failed opening the OPDS URL ' + opdsUrl + ': ' reason = '' if hasattr(exception, 'reason'): reason = str(exception.reason) error_dialog(gui, _('Failed opening the OPDS URL'), message, reason, displayDialogOnErrors) return (None, {}) self.serverHeader = feed.headers['server'] print "serverHeader: %s" % self.serverHeader print "feed.entries: %s" % feed.entries catalogEntries = {} firstTitle = None for entry in feed.entries: title = entry.get('title', 'No title') if firstTitle is None: firstTitle = title links = entry.get('links', []) firstLink = next(iter(links), None) if firstLink is not None: print "firstLink: %s" % firstLink catalogEntries[title] = firstLink.href return (firstTitle, catalogEntries)
def feed_from_xml(raw_xml, title=None, oldest_article=7, max_articles_per_feed=100, get_article_url=lambda item: item.get('link', None), log=default_log): from calibre.web.feeds.feedparser import parse # Handle unclosed escaped entities. They trip up feedparser and HBR for one # generates them raw_xml = re.sub(br'(&#\d+)([^0-9;])', br'\1;\2', raw_xml) feed = parse(raw_xml) pfeed = Feed(get_article_url=get_article_url, log=log) pfeed.populate_from_feed(feed, title=title, oldest_article=oldest_article, max_articles_per_feed=max_articles_per_feed) return pfeed
def feed_from_xml(raw_xml, title=None, oldest_article=7, max_articles_per_feed=100, get_article_url=lambda item: item.get('link', None), log=default_log): from calibre.web.feeds.feedparser import parse # Handle unclosed escaped entities. They trip up feedparser and HBR for one # generates them raw_xml = re.sub(r'(&#\d+)([^0-9;])', r'\1;\2', raw_xml) feed = parse(raw_xml) pfeed = Feed(get_article_url=get_article_url, log=log) pfeed.populate_from_feed(feed, title=title, oldest_article=oldest_article, max_articles_per_feed=max_articles_per_feed) return pfeed