Пример #1
0
    def download(self):
        f = open(self.tempname, "ab")
        size = os.fstat(f.fileno())[stat.ST_SIZE]
        response = requests.get(self.href, stream = True,
                                headers = {"Range": "bytes=%u-" % size})
        remaining = int(response.headers["Content-Length"])
        r = response.raw
        while True:
            data = r.read(4096)
            remaining -= len(data)
            if data == "": break
            f.write(data)
        f.flush()
        os.fsync(f.fileno())
        f.close()

        if remaining > 0:
            # download terminated early, retry
            fileset.remove(self.name)
            return

        if not self.verify():
            # download corrupt, delete and retry
            msg("WARN: verify failed for %s" % self.name)
            os.unlink(self.tempname)
            fileset.remove(self.name)
            return

        common.rename(self.tempname, self.name)
        common.mkro(self.name)

        if "Last-Modified" in response.headers:
            mtime = calendar.timegm(time.strptime(response.headers["Last-Modified"],
                                                  "%a, %d %b %Y %H:%M:%S %Z"))
            os.utime(self.name, (mtime, mtime))
Пример #2
0
def download(item, db, tries):
    if item["href"] in db:
        path = db.get(item["href"])

    else:
        f = common.retrieve_m(config["clearspace-root"] + item["href"], tries=tries)
        doc = WikiDoc(f.read())
        f.close()

        path = doc.path + "/" + doc.filename

        if want(path):
            skip = False
            if os.path.exists(path):
                st = os.stat(path)
                if st.st_mtime == doc.mtime:
                    skip = True

            if not skip:
                common.mkdirs(doc.path)
                common.retrieve(config["clearspace-root"] + doc.filehref, path, force=True, tries=tries)
                common.mkro(path)
                os.utime(path, (doc.mtime, doc.mtime))

    updatedbs(db, keep, item["href"], path)
Пример #3
0
def download(url, path):
    with lock:
        if path in files:
            return
        files.add(path)

    if os.path.exists(path):
        r = tls.s.head(url)
        mtime = common.parse_last_modified(r.headers["Last-Modified"])

        if os.path.getmtime(path) == mtime and \
           os.path.getsize(path) == int(r.headers["Content-Length"]):
            return

    common.mkdirs(os.path.dirname(path))

    log(url + " -> " + path)
    r = tls.s.get(url, stream=True)

    temppath = common.mktemppath(path)

    with open(temppath, "wb") as f:
        for data in r.iter_content(4096):
            f.write(data)

        f.flush()
        os.fsync(f.fileno())

    mtime = common.parse_last_modified(r.headers["Last-Modified"])
    os.utime(temppath, (mtime, mtime))
    common.mkro(temppath)
    common.rename(temppath, path)
Пример #4
0
def download(url, path, mtime):
  with lock:
    if path in files:
      return
    files.add(path)

  if os.path.exists(path) and os.path.getmtime(path) == mtime:
    return

  common.mkdirs(os.path.dirname(path))

  log(url + " -> " + path)
  r = get(url, stream = True)

  p = os.path.split(path)
  temppath = os.path.join(p[0], "." + p[1])

  with open(temppath, "wb") as f:
    for data in r.iter_content(4096):
      f.write(data)

    f.flush()
    os.fsync(f.fileno())

  os.utime(temppath, (mtime, mtime))
  common.mkro(temppath)
  common.rename(temppath, path)
Пример #5
0
def download(url, path, mtime):
    with lock:
        if path in files:
            return
        files.add(path)

    if os.path.exists(path) and os.path.getmtime(path) == mtime:
        return

    common.mkdirs(os.path.dirname(path))

    log(url + " -> " + path)
    r = get(url, stream=True)

    p = os.path.split(path)
    temppath = os.path.join(p[0], "." + p[1])

    with open(temppath, "wb") as f:
        for data in r.iter_content(4096):
            f.write(data)

        f.flush()
        os.fsync(f.fileno())

    os.utime(temppath, (mtime, mtime))
    common.mkro(temppath)
    common.rename(temppath, path)
Пример #6
0
def download(url, dest):
    if url.startswith("data:"):
        return

    if fileset.tas(dest):
        return

    common.mkdirs(os.path.split(dest)[0])

    try:
        common.retrieve(url, dest)

        if args["type"] == "html-single" and dest.endswith(".html"):
            get_deps_html(url, dest)

        if args["type"] == "html-single" and dest.endswith(".css"):
            get_deps_css(url, dest)

        common.mkro(dest)
       
    except urllib.error.HTTPError as e:
        if e.code == 403 or e.code == 404:
            warn("WARNING: %s on %s, continuing..." % (e, url))
        else:
            raise
Пример #7
0
def download(url, path):
    with lock:
        if path in files:
            return
        files.add(path)

    if os.path.exists(path):
        r = tls.s.head(url)
        mtime = common.parse_last_modified(r.headers["Last-Modified"])

        if os.path.getmtime(path) == mtime and \
           os.path.getsize(path) == int(r.headers["Content-Length"]):
            return

    common.mkdirs(os.path.dirname(path))

    log(url + " -> " + path)
    r = tls.s.get(url, stream = True)

    temppath = common.mktemppath(path)

    with open(temppath, "wb") as f:
        for data in r.iter_content(4096):
            f.write(data)

        f.flush()
        os.fsync(f.fileno())

    mtime = common.parse_last_modified(r.headers["Last-Modified"])
    os.utime(temppath, (mtime, mtime))
    common.mkro(temppath)
    common.rename(temppath, path)
Пример #8
0
def download(url, dest, username, password):
    pm = urllib2.HTTPPasswordMgrWithDefaultRealm()
    pm.add_password(None, url, username, password)
    opener = urllib2.build_opener(urllib2.HTTPBasicAuthHandler(pm))

    common.mkdirs(os.path.split(dest)[0])
    common.retrieve(url, dest, opener = opener, tries = 10, force = True)
    common.mkro(dest)
Пример #9
0
def download(url, dest, username, password):
    pm = urllib.request.HTTPPasswordMgrWithDefaultRealm()
    pm.add_password(None, url, username, password)
    opener = urllib.request.build_opener(
        urllib.request.HTTPBasicAuthHandler(pm))

    common.mkdirs(os.path.split(dest)[0])
    common.retrieve(url, dest, opener=opener, tries=10, force=True)
    common.mkro(dest)
Пример #10
0
def download_item(item, extension, tries = 1):
    dstfile = os.path.join(item.type_, item.pageurl.split("/")[-1]) + extension

    common.mkdirs(item.type_)
    try:
        print("\r[%u]" % item.number, end = "", file = sys.stderr)
        common.retrieve(item.dlurl, dstfile, tries = tries)
        common.mkro(dstfile)
    except urllib.error.HTTPError as e:
        warn("can't download item at %s (#%u, %s, %s) (%s), continuing..." % \
                 (item.dlurl, item.number, item.title, item.type_, e))
Пример #11
0
def download_item(item, extension, tries=1):
    dstfile = os.path.join(item.type_, item.pageurl.split("/")[-1]) + extension

    common.mkdirs(item.type_)
    try:
        print("\r[%u]" % item.number, end="", file=sys.stderr)
        common.retrieve(item.dlurl, dstfile, tries=tries)
        common.mkro(dstfile)
    except urllib.error.HTTPError as e:
        warn("can't download item at %s (#%u, %s, %s) (%s), continuing..." % \
                 (item.dlurl, item.number, item.title, item.type_, e))
Пример #12
0
def sync(query, keep):
    xml = common.retrieve_m(config["gsa-url"] + "?client=internal&output=xml&num=1000&filter=0&q=" + query, tries = 10)
    xml = lxml.etree.parse(xml)

    if int(xml.xpath("//M/text()")[0]) == 1000:
        raise Exception("search returned too many results")

    for result in xml.xpath("//U/text()"):
        dest = result.split("//")[1]
        dest = dest.replace("~", "")
        common.mkdirs(os.path.split(dest)[0])
        common.retrieve(result, dest, tries = 10)
        common.mkro(dest)
        keep.add(dest)
Пример #13
0
def extract(path):
    if config["attachments-enabled"] != "1":
        return

    print("Extracting attachments from %s..." % path, file = sys.stderr)

    mbox = mailbox.mbox(config["lists-base"] + "/" + path)

    for msg in mbox.keys():
        index = 0
        for part in mbox[msg].walk():
            fn = part.get_filename()
            typ = part.get_content_type()
            if fn is not None \
                    and not mailindex.decode(part.get("Content-Disposition", "inline")).startswith("inline") \
                    and typ not in \
                    ('application/pgp-signature', 'application/pkcs7-signature',
                     'application/x-pkcs7-signature', 'image/x-icon',
                     'message/external-body', 'message/rfc822', 'text/calendar',
                     'text/x-vcard'):

                p = config["attachments-base"] + "/" + path

                try:
                    fn = cleanfilename(fn)

                    if config["attachments-odponly"] != "1" or \
                            fn.lower().endswith(".odp") or \
                            typ.lower().startswith("application/vnd.oasis.opendocument.presentation"):
                        common.mkdirs(p)
                        p += "/%03u-%03u-%s" % (msg, index, fn)

                        if not os.path.exists(p):
                            temppath = common.mktemppath(p)
                        
                            f = open(temppath, "wb")
                            f.write(part.get_payload(decode = True))

                            f.flush()
                            os.fsync(f.fileno())
                            f.close()
                
                            common.rename(temppath, p)
                            common.mkro(p)

                except UnicodeEncodeError:
                    pass

            index += 1
Пример #14
0
def sync(query, keep):
    xml = common.retrieve_m(
        config["gsa-url"] +
        "?client=internal&output=xml&num=1000&filter=0&q=" + query,
        tries=10)
    xml = lxml.etree.parse(xml)

    if int(xml.xpath("//M/text()")[0]) == 1000:
        raise Exception("search returned too many results")

    for result in xml.xpath("//U/text()"):
        dest = result.split("//")[1]
        dest = dest.replace("~", "")
        common.mkdirs(os.path.split(dest)[0])
        common.retrieve(result, dest, tries=10)
        common.mkro(dest)
        keep.add(dest)
    return bytes == "%PDF"

if __name__ == "__main__":
    global config
    config = common.load_config()

    print >>sys.stderr, "Utility needs update since relaunch of www.redhat.com, feel free to submit patches..."
    sys.exit(1)

    common.mkdirs(config["references-base"])
    os.chdir(config["references-base"])

    lock = common.Lock(".lock")

    common.retrieve("http://www.redhat.com/customersuccess/", "index.html")
    common.mkro("index.html")

    toc = lxml.html.soupparser.parse("index.html").getroot()
    for url in toc.xpath("//a[substring-after(@href, '.') = 'pdf']/../../.."):
        url = copy.deepcopy(url)
        title = url.xpath("//h4//a/text()")[0].replace("/", "_")
        href = url.xpath("//a[substring-after(@href, '.') = 'pdf']/@href")[0]

        print >>sys.stderr, title
        f = common.retrieve_tmpfile("http://www.redhat.com" + href)
        if ispdf(f):
            # a few links on /customersuccess are currently broken HTML files
            common.sendfile_disk(f, title + ".pdf")
            common.mkro(title + ".pdf")
        f.close()
Пример #16
0
    query = " ".join(map(quote, args["querystring"]))

    maildb = mailindex.MailDB(args["base"] + "/.index")

    common.mkdirs(os.path.split(config["lgrep-mailbox"])[0])
    common.unlink(config["lgrep-mailbox"])
    mbox = open(config["lgrep-mailbox"], "w")

    for row in maildb.search(query):
        f = open(os.sep.join((args["base"], row["path"])))
        f.seek(row["offset"])
        mbox.write(f.read(row["length"]))
        f.close()

    mbox.close()
    common.mkro(config["lgrep-mailbox"])

    maildb.close()

    execpath = execpath.replace("%filename", os.path.split(config["lgrep-mailbox"])[1])
    execpath = execpath.replace("%path", config["lgrep-mailbox"])
    execpath = execpath.split(" ")

    try:
        os.execvp(execpath[0], execpath)

    except OSError:
        print >>sys.stderr, "Failed to exec \"%s\", please edit $HOME/.satools." % path
        sys.exit(1)
Пример #17
0
                    f = common.retrieve_tmpfile(url + "/" + href, credentials)
                except urllib2.HTTPError, e:
                    if e.code == 403:
                        print >>sys.stderr, "WARNING: %s, continuing..." % e
                        warnings += 1
                        continue
                    raise
                    
                if isgzip(f):
                    g = gzip.GzipFile(fileobj = f, mode = "r")
                    common.sendfile_disk(g, path)
                    g.close()
                else:
                    common.sendfile_disk(f, path)
                f.close()
                
                common.mkro(path)
                mailindex.index(".", _list, path)
                attachments.extract(path)

            thunderbird.link(path)

            if not (tm.tm_year == now.tm_year and tm.tm_mon == now.tm_mon):
                db.add(path)

    with open(".sync-done", "w") as f:
        pass

    if warnings:
        print >>sys.stderr, "WARNING: %u warnings occurred." % warnings
Пример #18
0
                    raise

                if isgzip(f):
                    try:
                        g = gzip.GzipFile(fileobj=f, mode="r")
                        common.sendfile_disk(g, path)
                        g.close()
                    except Exception as e:
                        print("WARNING: %s, continuing..." % e,
                              file=sys.stderr)
                        warnings += 1
                        continue
                else:
                    common.sendfile_disk(f, path)
                f.close()

                common.mkro(path)
                mailindex.index(".", _list, path)
                attachments.extract(path)

            thunderbird.link(path)

            if not (tm.tm_year == now.tm_year and tm.tm_mon == now.tm_mon):
                db.add(path)

    with open(".sync-done", "w") as f:
        pass

    if warnings:
        print("WARNING: %u warnings occurred." % warnings, file=sys.stderr)
Пример #19
0
def download(opener, href, dest):
    common.mkdirs(os.path.split(dest)[0])
    common.retrieve(href, dest, opener = opener, tries = 10)
    common.mkro(dest)
Пример #20
0
def download(url, dest):
    if not os.path.exists(dest):
        common.retrieve(url, dest)
        common.mkro(dest)
        return True
    return False
Пример #21
0
    query = " ".join(args["querystring"])

    maildb = mailindex.MailDB(args["base"] + "/.index")

    common.mkdirs(os.path.split(config["lgrep-mailbox"])[0])
    common.unlink(config["lgrep-mailbox"])
    mbox = open(config["lgrep-mailbox"], "wb")

    for row in maildb.search(query):
        f = open(os.sep.join((args["base"], row["path"])), "rb")
        f.seek(row["offset"])
        mbox.write(f.read(row["length"]))
        f.close()

    mbox.close()
    common.mkro(config["lgrep-mailbox"])

    maildb.close()

    execpath = execpath.replace("%filename",
                                os.path.split(config["lgrep-mailbox"])[1])
    execpath = execpath.replace("%path", config["lgrep-mailbox"])
    execpath = execpath.split(" ")

    try:
        os.execvp(execpath[0], execpath)

    except OSError:
        print("Failed to exec \"%s\", please edit $HOME/.satools." % path,
              file=sys.stderr)
        sys.exit(1)
                      { "xhtml" : "http://www.w3.org/1999/xhtml" })

if __name__ == "__main__":
    warnings = 0
    global config
    config = common.load_config()
    args = parse_args()

    common.mkdirs(config["product-docs-base"])
    os.chdir(config["product-docs-base"])

    lock = common.Lock(".lock")

    urlbase = "http://docs.redhat.com/docs/%(locale)s/" % args
    common.retrieve(urlbase + "toc.html", "toc.html")
    common.mkro("toc.html")

    toc = lxml.etree.parse("toc.html").getroot()
    for url in xpath(toc, "//xhtml:a[@class='type' and text()='%(type)s']/@href" % args):
        url = url[2:] # trim leading ./
        path = url[:url.index("/%(type)s/" % args)].replace("_", " ")
        common.mkdirs(path)
        path = path + "/" + url.split("/")[-1]

        try:
            common.retrieve(urlbase + url, path)
        except urllib2.HTTPError, e:
            if e.code == 403:
                print >>sys.stderr, "WARNING: %s on %s, continuing..." % (e, urlbase + url)
                warnings += 1
                continue