def get_book_list(server): """Ask the server for a list of books. Floss Manual TWikis keep such a list at /bin/view/TWiki/WebLeftBarWebsList?skin=text but it needs a bit of processing If BOOK_LIST_CACHE is non-zero, the book list won't be re-fetched in that many seconds, rather it will be read from disk. """ if config.BOOK_LIST_CACHE: cache_name = os.path.join(config.CACHE_DIR, '%s.booklist' % server) if (os.path.exists(cache_name) and os.stat(cache_name).st_mtime + config.BOOK_LIST_CACHE > time.time()): f = open(cache_name) s = f.read() f.close() return s.split() url = config.CHAPTER_URL % (server, 'TWiki', 'WebLeftBarWebsList') #url = 'http://%s/bin/view/TWiki/WebLeftBarWebsList?skin=text' % server #XXX should use lxml log('getting booklist: %s' % url) s = url_fetch(url) items = sorted(x for x in re.findall(r'/bin/view/([\w/]+)/WebHome', s) if x not in config.IGNORABLE_TWIKI_BOOKS) if config.BOOK_LIST_CACHE: f = open(cache_name, 'w') f.write('\n'.join(items)) f.close() return items
def get_chapter_html(self, chapter, wrapped=False): url = config.CHAPTER_URL % (self.server, self.book, chapter) log('getting chapter: %s' % url) html = url_fetch(url) if wrapped: html = CHAPTER_TEMPLATE % { 'title': '%s: %s' % (self.book, chapter), 'text': html, 'dir': self.dir } return html
def fetch_if_necessary(self, url, target=None, use_cache=True): if url in self._fetched: return self._fetched[url] if target is None: target = url_to_filename(url, self.prefix) if use_cache and os.path.exists(self.cache_dir + target): log("used cache for %s" % target) return target try: data = url_fetch(url) except HTTPError, e: # if it is missing, assume it will be missing every time # after, otherwise, you can get into endless waiting self._fetched[url] = None log("Wanting '%s', got error %s" %(url, e)) return None
def fetch_if_necessary(self, url, target=None, use_cache=True): if url in self._fetched: return self._fetched[url] if target is None: target = url_to_filename(url, self.prefix) if use_cache and os.path.exists(self.cache_dir + target): log("used cache for %s" % target) return target try: data = url_fetch(url) except HTTPError, e: # if it is missing, assume it will be missing every time # after, otherwise, you can get into endless waiting self._fetched[url] = None log("Wanting '%s', got error %s" % (url, e)) return None