Exemplo n.º 1
0
        index = common.retrieve_m(url, credentials)
        index_xml = lxml.html.parse(index).getroot()
        index.close()

        for href in index_xml.xpath("//a[substring-after(@href, '.') = 'txt.gz']/@href"):
            tm = time.strptime(href, "%Y-%B.txt.gz")
            path = "%s/%04u/%02u" % (_list, tm.tm_year, tm.tm_mon)

            if tm.tm_year < int(config["lists-start-year"]):
                break

            if not path in db or not os.path.isfile(path):
                common.mkdirs(os.path.split(path)[0])
                try:
                    f = common.retrieve_tmpfile(url + "/" + href, credentials)
                except urllib2.HTTPError, e:
                    if e.code == 403:
                        print >>sys.stderr, "WARNING: %s, continuing..." % e
                        warnings += 1
                        continue
                    raise
                    
                if isgzip(f):
                    g = gzip.GzipFile(fileobj = f, mode = "r")
                    common.sendfile_disk(g, path)
                    g.close()
                else:
                    common.sendfile_disk(f, path)
                f.close()
                
Exemplo n.º 2
0
        index = common.retrieve_m(url, credentials)
        index_xml = lxml.html.parse(index).getroot()
        index.close()

        for href in index_xml.xpath("//a[substring-after(@href, '.') = 'txt.gz']/@href"):
            tm = time.strptime(href, "%Y-%B.txt.gz")
            path = "%s/%04u/%02u" % (_list, tm.tm_year, tm.tm_mon)

            if tm.tm_year < int(config["lists-start-year"]):
                break

            if not path in db or not os.path.isfile(path):
                common.mkdirs(os.path.split(path)[0])
                req = urllib.request.Request(url + "/" + href, credentials, {"Accept-Encoding": "gzip"})
                try:
                    f = common.retrieve_tmpfile(req)
                except urllib.error.HTTPError as e:
                    if e.code == 403:
                        print("WARNING: %s, continuing..." % e, file = sys.stderr)
                        warnings += 1
                        continue
                    raise
                    
                if isgzip(f):
                    try:
                        g = gzip.GzipFile(fileobj = f, mode = "r")
                        common.sendfile_disk(g, path)
                        g.close()
                    except Exception as e:
                        print("WARNING: %s, continuing..." % e, file = sys.stderr)
                        warnings += 1
Exemplo n.º 3
0
        index.close()

        for href in index_xml.xpath(
                "//a[substring-after(@href, '.') = 'txt.gz']/@href"):
            tm = time.strptime(href, "%Y-%B.txt.gz")
            path = "%s/%04u/%02u" % (_list, tm.tm_year, tm.tm_mon)

            if tm.tm_year < int(config["lists-start-year"]):
                break

            if not path in db or not os.path.isfile(path):
                common.mkdirs(os.path.split(path)[0])
                req = urllib.request.Request(url + "/" + href, credentials,
                                             {"Accept-Encoding": "gzip"})
                try:
                    f = common.retrieve_tmpfile(req)
                except urllib.error.HTTPError as e:
                    if e.code == 403:
                        print("WARNING: %s, continuing..." % e,
                              file=sys.stderr)
                        warnings += 1
                        continue
                    raise

                if isgzip(f):
                    try:
                        g = gzip.GzipFile(fileobj=f, mode="r")
                        common.sendfile_disk(g, path)
                        g.close()
                    except Exception as e:
                        print("WARNING: %s, continuing..." % e,
    return bytes == "%PDF"

if __name__ == "__main__":
    global config
    config = common.load_config()

    print >>sys.stderr, "Utility needs update since relaunch of www.redhat.com, feel free to submit patches..."
    sys.exit(1)

    common.mkdirs(config["references-base"])
    os.chdir(config["references-base"])

    lock = common.Lock(".lock")

    common.retrieve("http://www.redhat.com/customersuccess/", "index.html")
    common.mkro("index.html")

    toc = lxml.html.soupparser.parse("index.html").getroot()
    for url in toc.xpath("//a[substring-after(@href, '.') = 'pdf']/../../.."):
        url = copy.deepcopy(url)
        title = url.xpath("//h4//a/text()")[0].replace("/", "_")
        href = url.xpath("//a[substring-after(@href, '.') = 'pdf']/@href")[0]

        print >>sys.stderr, title
        f = common.retrieve_tmpfile("http://www.redhat.com" + href)
        if ispdf(f):
            # a few links on /customersuccess are currently broken HTML files
            common.sendfile_disk(f, title + ".pdf")
            common.mkro(title + ".pdf")
        f.close()