# Download the online documentation pages. print "Retrieving", url + p html = URL(url + p).download(cached=False) # Parse the actual documentation, we don't need the website header, footer, navigation, search. html = Document(html) html = html.by_id("content-area") html = html.by_class("node-type-page")[0] html = html.source html = strip_javascript(html) html = strip_between('<div id="navbar">', '/#navbar -->', html) html = strip_between('<div id="sidebar-right">', '/#sidebar-right -->', html) html = strip_between('<div id="footer">', '/#footer -->', html) html = strip_between('<a class="twitter-share-button"', '</a>', html) # Link to local pages and images. # Link to online media. html = html.replace('href="/pages/MBSP"', 'href="%sMBSP"' % url) # MBSP docs (online) html = re.sub('href="/pages/(pattern-examples.*?)"', 'href="%s\\1"' % url, html) # examples (online) html = re.sub('href="/pages/(using-.*?)"', 'href="%s\\1"' % url, html) # examples (online) html = re.sub('href="/pages/(modeling-.*?)"', 'href="%s\\1"' % url, html) # examples (online) html = re.sub('href="/pages/(.*?)([#|"])', 'href="\\1.html\\2', html) # pages (offline) html = html.replace('src="/media/', 'src="../g/') # images (offline) html = html.replace('src="/sites/all/themes/clips/g/', 'src="../g/') # images (offline) html = html.replace('href="/media/', 'href="%smedia/' % url.replace("pages/", "")) # downloads (online) # Apply the simplified template + set page titles. html = template % (p, url+p, url+p, title, html) # Generate offline HTML file. f = codecs.open(os.path.join("html", "%s.html" % p), "w", encoding="utf-8") f.write(html) f.close() # Create index.html (which simply redirects to pattern.html).