def download(self): f = open(self.tempname, "ab") size = os.fstat(f.fileno())[stat.ST_SIZE] response = requests.get(self.href, stream = True, headers = {"Range": "bytes=%u-" % size}) remaining = int(response.headers["Content-Length"]) r = response.raw while True: data = r.read(4096) remaining -= len(data) if data == "": break f.write(data) f.flush() os.fsync(f.fileno()) f.close() if remaining > 0: # download terminated early, retry fileset.remove(self.name) return if not self.verify(): # download corrupt, delete and retry msg("WARN: verify failed for %s" % self.name) os.unlink(self.tempname) fileset.remove(self.name) return common.rename(self.tempname, self.name) common.mkro(self.name) if "Last-Modified" in response.headers: mtime = calendar.timegm(time.strptime(response.headers["Last-Modified"], "%a, %d %b %Y %H:%M:%S %Z")) os.utime(self.name, (mtime, mtime))
def download(item, db, tries): if item["href"] in db: path = db.get(item["href"]) else: f = common.retrieve_m(config["clearspace-root"] + item["href"], tries=tries) doc = WikiDoc(f.read()) f.close() path = doc.path + "/" + doc.filename if want(path): skip = False if os.path.exists(path): st = os.stat(path) if st.st_mtime == doc.mtime: skip = True if not skip: common.mkdirs(doc.path) common.retrieve(config["clearspace-root"] + doc.filehref, path, force=True, tries=tries) common.mkro(path) os.utime(path, (doc.mtime, doc.mtime)) updatedbs(db, keep, item["href"], path)
def download(url, path): with lock: if path in files: return files.add(path) if os.path.exists(path): r = tls.s.head(url) mtime = common.parse_last_modified(r.headers["Last-Modified"]) if os.path.getmtime(path) == mtime and \ os.path.getsize(path) == int(r.headers["Content-Length"]): return common.mkdirs(os.path.dirname(path)) log(url + " -> " + path) r = tls.s.get(url, stream=True) temppath = common.mktemppath(path) with open(temppath, "wb") as f: for data in r.iter_content(4096): f.write(data) f.flush() os.fsync(f.fileno()) mtime = common.parse_last_modified(r.headers["Last-Modified"]) os.utime(temppath, (mtime, mtime)) common.mkro(temppath) common.rename(temppath, path)
def download(url, path, mtime): with lock: if path in files: return files.add(path) if os.path.exists(path) and os.path.getmtime(path) == mtime: return common.mkdirs(os.path.dirname(path)) log(url + " -> " + path) r = get(url, stream = True) p = os.path.split(path) temppath = os.path.join(p[0], "." + p[1]) with open(temppath, "wb") as f: for data in r.iter_content(4096): f.write(data) f.flush() os.fsync(f.fileno()) os.utime(temppath, (mtime, mtime)) common.mkro(temppath) common.rename(temppath, path)
def download(url, path, mtime): with lock: if path in files: return files.add(path) if os.path.exists(path) and os.path.getmtime(path) == mtime: return common.mkdirs(os.path.dirname(path)) log(url + " -> " + path) r = get(url, stream=True) p = os.path.split(path) temppath = os.path.join(p[0], "." + p[1]) with open(temppath, "wb") as f: for data in r.iter_content(4096): f.write(data) f.flush() os.fsync(f.fileno()) os.utime(temppath, (mtime, mtime)) common.mkro(temppath) common.rename(temppath, path)
def download(url, dest): if url.startswith("data:"): return if fileset.tas(dest): return common.mkdirs(os.path.split(dest)[0]) try: common.retrieve(url, dest) if args["type"] == "html-single" and dest.endswith(".html"): get_deps_html(url, dest) if args["type"] == "html-single" and dest.endswith(".css"): get_deps_css(url, dest) common.mkro(dest) except urllib.error.HTTPError as e: if e.code == 403 or e.code == 404: warn("WARNING: %s on %s, continuing..." % (e, url)) else: raise
def download(url, path): with lock: if path in files: return files.add(path) if os.path.exists(path): r = tls.s.head(url) mtime = common.parse_last_modified(r.headers["Last-Modified"]) if os.path.getmtime(path) == mtime and \ os.path.getsize(path) == int(r.headers["Content-Length"]): return common.mkdirs(os.path.dirname(path)) log(url + " -> " + path) r = tls.s.get(url, stream = True) temppath = common.mktemppath(path) with open(temppath, "wb") as f: for data in r.iter_content(4096): f.write(data) f.flush() os.fsync(f.fileno()) mtime = common.parse_last_modified(r.headers["Last-Modified"]) os.utime(temppath, (mtime, mtime)) common.mkro(temppath) common.rename(temppath, path)
def download(url, dest, username, password): pm = urllib2.HTTPPasswordMgrWithDefaultRealm() pm.add_password(None, url, username, password) opener = urllib2.build_opener(urllib2.HTTPBasicAuthHandler(pm)) common.mkdirs(os.path.split(dest)[0]) common.retrieve(url, dest, opener = opener, tries = 10, force = True) common.mkro(dest)
def download(url, dest, username, password): pm = urllib.request.HTTPPasswordMgrWithDefaultRealm() pm.add_password(None, url, username, password) opener = urllib.request.build_opener( urllib.request.HTTPBasicAuthHandler(pm)) common.mkdirs(os.path.split(dest)[0]) common.retrieve(url, dest, opener=opener, tries=10, force=True) common.mkro(dest)
def download_item(item, extension, tries = 1): dstfile = os.path.join(item.type_, item.pageurl.split("/")[-1]) + extension common.mkdirs(item.type_) try: print("\r[%u]" % item.number, end = "", file = sys.stderr) common.retrieve(item.dlurl, dstfile, tries = tries) common.mkro(dstfile) except urllib.error.HTTPError as e: warn("can't download item at %s (#%u, %s, %s) (%s), continuing..." % \ (item.dlurl, item.number, item.title, item.type_, e))
def download_item(item, extension, tries=1): dstfile = os.path.join(item.type_, item.pageurl.split("/")[-1]) + extension common.mkdirs(item.type_) try: print("\r[%u]" % item.number, end="", file=sys.stderr) common.retrieve(item.dlurl, dstfile, tries=tries) common.mkro(dstfile) except urllib.error.HTTPError as e: warn("can't download item at %s (#%u, %s, %s) (%s), continuing..." % \ (item.dlurl, item.number, item.title, item.type_, e))
def sync(query, keep): xml = common.retrieve_m(config["gsa-url"] + "?client=internal&output=xml&num=1000&filter=0&q=" + query, tries = 10) xml = lxml.etree.parse(xml) if int(xml.xpath("//M/text()")[0]) == 1000: raise Exception("search returned too many results") for result in xml.xpath("//U/text()"): dest = result.split("//")[1] dest = dest.replace("~", "") common.mkdirs(os.path.split(dest)[0]) common.retrieve(result, dest, tries = 10) common.mkro(dest) keep.add(dest)
def extract(path): if config["attachments-enabled"] != "1": return print("Extracting attachments from %s..." % path, file = sys.stderr) mbox = mailbox.mbox(config["lists-base"] + "/" + path) for msg in mbox.keys(): index = 0 for part in mbox[msg].walk(): fn = part.get_filename() typ = part.get_content_type() if fn is not None \ and not mailindex.decode(part.get("Content-Disposition", "inline")).startswith("inline") \ and typ not in \ ('application/pgp-signature', 'application/pkcs7-signature', 'application/x-pkcs7-signature', 'image/x-icon', 'message/external-body', 'message/rfc822', 'text/calendar', 'text/x-vcard'): p = config["attachments-base"] + "/" + path try: fn = cleanfilename(fn) if config["attachments-odponly"] != "1" or \ fn.lower().endswith(".odp") or \ typ.lower().startswith("application/vnd.oasis.opendocument.presentation"): common.mkdirs(p) p += "/%03u-%03u-%s" % (msg, index, fn) if not os.path.exists(p): temppath = common.mktemppath(p) f = open(temppath, "wb") f.write(part.get_payload(decode = True)) f.flush() os.fsync(f.fileno()) f.close() common.rename(temppath, p) common.mkro(p) except UnicodeEncodeError: pass index += 1
def sync(query, keep): xml = common.retrieve_m( config["gsa-url"] + "?client=internal&output=xml&num=1000&filter=0&q=" + query, tries=10) xml = lxml.etree.parse(xml) if int(xml.xpath("//M/text()")[0]) == 1000: raise Exception("search returned too many results") for result in xml.xpath("//U/text()"): dest = result.split("//")[1] dest = dest.replace("~", "") common.mkdirs(os.path.split(dest)[0]) common.retrieve(result, dest, tries=10) common.mkro(dest) keep.add(dest)
return bytes == "%PDF" if __name__ == "__main__": global config config = common.load_config() print >>sys.stderr, "Utility needs update since relaunch of www.redhat.com, feel free to submit patches..." sys.exit(1) common.mkdirs(config["references-base"]) os.chdir(config["references-base"]) lock = common.Lock(".lock") common.retrieve("http://www.redhat.com/customersuccess/", "index.html") common.mkro("index.html") toc = lxml.html.soupparser.parse("index.html").getroot() for url in toc.xpath("//a[substring-after(@href, '.') = 'pdf']/../../.."): url = copy.deepcopy(url) title = url.xpath("//h4//a/text()")[0].replace("/", "_") href = url.xpath("//a[substring-after(@href, '.') = 'pdf']/@href")[0] print >>sys.stderr, title f = common.retrieve_tmpfile("http://www.redhat.com" + href) if ispdf(f): # a few links on /customersuccess are currently broken HTML files common.sendfile_disk(f, title + ".pdf") common.mkro(title + ".pdf") f.close()
query = " ".join(map(quote, args["querystring"])) maildb = mailindex.MailDB(args["base"] + "/.index") common.mkdirs(os.path.split(config["lgrep-mailbox"])[0]) common.unlink(config["lgrep-mailbox"]) mbox = open(config["lgrep-mailbox"], "w") for row in maildb.search(query): f = open(os.sep.join((args["base"], row["path"]))) f.seek(row["offset"]) mbox.write(f.read(row["length"])) f.close() mbox.close() common.mkro(config["lgrep-mailbox"]) maildb.close() execpath = execpath.replace("%filename", os.path.split(config["lgrep-mailbox"])[1]) execpath = execpath.replace("%path", config["lgrep-mailbox"]) execpath = execpath.split(" ") try: os.execvp(execpath[0], execpath) except OSError: print >>sys.stderr, "Failed to exec \"%s\", please edit $HOME/.satools." % path sys.exit(1)
f = common.retrieve_tmpfile(url + "/" + href, credentials) except urllib2.HTTPError, e: if e.code == 403: print >>sys.stderr, "WARNING: %s, continuing..." % e warnings += 1 continue raise if isgzip(f): g = gzip.GzipFile(fileobj = f, mode = "r") common.sendfile_disk(g, path) g.close() else: common.sendfile_disk(f, path) f.close() common.mkro(path) mailindex.index(".", _list, path) attachments.extract(path) thunderbird.link(path) if not (tm.tm_year == now.tm_year and tm.tm_mon == now.tm_mon): db.add(path) with open(".sync-done", "w") as f: pass if warnings: print >>sys.stderr, "WARNING: %u warnings occurred." % warnings
raise if isgzip(f): try: g = gzip.GzipFile(fileobj=f, mode="r") common.sendfile_disk(g, path) g.close() except Exception as e: print("WARNING: %s, continuing..." % e, file=sys.stderr) warnings += 1 continue else: common.sendfile_disk(f, path) f.close() common.mkro(path) mailindex.index(".", _list, path) attachments.extract(path) thunderbird.link(path) if not (tm.tm_year == now.tm_year and tm.tm_mon == now.tm_mon): db.add(path) with open(".sync-done", "w") as f: pass if warnings: print("WARNING: %u warnings occurred." % warnings, file=sys.stderr)
def download(opener, href, dest): common.mkdirs(os.path.split(dest)[0]) common.retrieve(href, dest, opener = opener, tries = 10) common.mkro(dest)
def download(url, dest): if not os.path.exists(dest): common.retrieve(url, dest) common.mkro(dest) return True return False
query = " ".join(args["querystring"]) maildb = mailindex.MailDB(args["base"] + "/.index") common.mkdirs(os.path.split(config["lgrep-mailbox"])[0]) common.unlink(config["lgrep-mailbox"]) mbox = open(config["lgrep-mailbox"], "wb") for row in maildb.search(query): f = open(os.sep.join((args["base"], row["path"])), "rb") f.seek(row["offset"]) mbox.write(f.read(row["length"])) f.close() mbox.close() common.mkro(config["lgrep-mailbox"]) maildb.close() execpath = execpath.replace("%filename", os.path.split(config["lgrep-mailbox"])[1]) execpath = execpath.replace("%path", config["lgrep-mailbox"]) execpath = execpath.split(" ") try: os.execvp(execpath[0], execpath) except OSError: print("Failed to exec \"%s\", please edit $HOME/.satools." % path, file=sys.stderr) sys.exit(1)
{ "xhtml" : "http://www.w3.org/1999/xhtml" }) if __name__ == "__main__": warnings = 0 global config config = common.load_config() args = parse_args() common.mkdirs(config["product-docs-base"]) os.chdir(config["product-docs-base"]) lock = common.Lock(".lock") urlbase = "http://docs.redhat.com/docs/%(locale)s/" % args common.retrieve(urlbase + "toc.html", "toc.html") common.mkro("toc.html") toc = lxml.etree.parse("toc.html").getroot() for url in xpath(toc, "//xhtml:a[@class='type' and text()='%(type)s']/@href" % args): url = url[2:] # trim leading ./ path = url[:url.index("/%(type)s/" % args)].replace("_", " ") common.mkdirs(path) path = path + "/" + url.split("/")[-1] try: common.retrieve(urlbase + url, path) except urllib2.HTTPError, e: if e.code == 403: print >>sys.stderr, "WARNING: %s on %s, continuing..." % (e, urlbase + url) warnings += 1 continue