Exemplo n.º 1
0
def download(item, db, tries):
    if item["href"] in db:
        path = db.get(item["href"])

    else:
        f = common.retrieve_m(config["clearspace-root"] + item["href"], tries=tries)
        doc = WikiDoc(f.read())
        f.close()

        path = doc.path + "/" + doc.filename

        if want(path):
            skip = False
            if os.path.exists(path):
                st = os.stat(path)
                if st.st_mtime == doc.mtime:
                    skip = True

            if not skip:
                common.mkdirs(doc.path)
                common.retrieve(config["clearspace-root"] + doc.filehref, path, force=True, tries=tries)
                common.mkro(path)
                os.utime(path, (doc.mtime, doc.mtime))

    updatedbs(db, keep, item["href"], path)
Exemplo n.º 2
0
def download(url, dest):
    if url.startswith("data:"):
        return

    if fileset.tas(dest):
        return

    common.mkdirs(os.path.split(dest)[0])

    try:
        common.retrieve(url, dest)

        if args["type"] == "html-single" and dest.endswith(".html"):
            get_deps_html(url, dest)

        if args["type"] == "html-single" and dest.endswith(".css"):
            get_deps_css(url, dest)

        common.mkro(dest)
       
    except urllib.error.HTTPError as e:
        if e.code == 403 or e.code == 404:
            warn("WARNING: %s on %s, continuing..." % (e, url))
        else:
            raise
Exemplo n.º 3
0
def download(url, dest, username, password):
    pm = urllib2.HTTPPasswordMgrWithDefaultRealm()
    pm.add_password(None, url, username, password)
    opener = urllib2.build_opener(urllib2.HTTPBasicAuthHandler(pm))

    common.mkdirs(os.path.split(dest)[0])
    common.retrieve(url, dest, opener = opener, tries = 10, force = True)
    common.mkro(dest)
Exemplo n.º 4
0
def download(url, dest, username, password):
    pm = urllib.request.HTTPPasswordMgrWithDefaultRealm()
    pm.add_password(None, url, username, password)
    opener = urllib.request.build_opener(
        urllib.request.HTTPBasicAuthHandler(pm))

    common.mkdirs(os.path.split(dest)[0])
    common.retrieve(url, dest, opener=opener, tries=10, force=True)
    common.mkro(dest)
Exemplo n.º 5
0
def download_item(item, extension, tries = 1):
    dstfile = os.path.join(item.type_, item.pageurl.split("/")[-1]) + extension

    common.mkdirs(item.type_)
    try:
        print("\r[%u]" % item.number, end = "", file = sys.stderr)
        common.retrieve(item.dlurl, dstfile, tries = tries)
        common.mkro(dstfile)
    except urllib.error.HTTPError as e:
        warn("can't download item at %s (#%u, %s, %s) (%s), continuing..." % \
                 (item.dlurl, item.number, item.title, item.type_, e))
Exemplo n.º 6
0
def download_item(item, extension, tries=1):
    dstfile = os.path.join(item.type_, item.pageurl.split("/")[-1]) + extension

    common.mkdirs(item.type_)
    try:
        print("\r[%u]" % item.number, end="", file=sys.stderr)
        common.retrieve(item.dlurl, dstfile, tries=tries)
        common.mkro(dstfile)
    except urllib.error.HTTPError as e:
        warn("can't download item at %s (#%u, %s, %s) (%s), continuing..." % \
                 (item.dlurl, item.number, item.title, item.type_, e))
Exemplo n.º 7
0
def sync(query, keep):
    xml = common.retrieve_m(config["gsa-url"] + "?client=internal&output=xml&num=1000&filter=0&q=" + query, tries = 10)
    xml = lxml.etree.parse(xml)

    if int(xml.xpath("//M/text()")[0]) == 1000:
        raise Exception("search returned too many results")

    for result in xml.xpath("//U/text()"):
        dest = result.split("//")[1]
        dest = dest.replace("~", "")
        common.mkdirs(os.path.split(dest)[0])
        common.retrieve(result, dest, tries = 10)
        common.mkro(dest)
        keep.add(dest)
Exemplo n.º 8
0
def sync(query, keep):
    xml = common.retrieve_m(
        config["gsa-url"] +
        "?client=internal&output=xml&num=1000&filter=0&q=" + query,
        tries=10)
    xml = lxml.etree.parse(xml)

    if int(xml.xpath("//M/text()")[0]) == 1000:
        raise Exception("search returned too many results")

    for result in xml.xpath("//U/text()"):
        dest = result.split("//")[1]
        dest = dest.replace("~", "")
        common.mkdirs(os.path.split(dest)[0])
        common.retrieve(result, dest, tries=10)
        common.mkro(dest)
        keep.add(dest)
Exemplo n.º 9
0
def save(args):
    jnlpurl = args.url

    if re.search("/internal/", jnlpurl):
        login(args)

    if re.search("/(mr|p).jnlp\?", jnlpurl):
        jnlpurl = getjnlpurl(jnlpurl)

    vcrfile = urllib.parse.parse_qs(jnlpurl[jnlpurl.index("?") + 1:])["psid"][0]
    jnlpfile = vcrfile + ".jnlp"

    common.retrieve(jnlpurl, jnlpfile, force = True)

    try:
        xml = lxml.etree.parse(jnlpfile).getroot()
    except lxml.etree.XMLSyntaxError:
        os.unlink(jnlpfile)
        print("%s: couldn't retrieve jnlp: credentials incorrect?" % ap.prog)
        sys.exit(1)
        
    xmlargs = xml.xpath("//argument")
    for i in range(len(xmlargs) - 1):
        if xmlargs[i].text == "-play":
            common.retrieve(xmlargs[i + 1].text, vcrfile)
            xmlargs[i + 1].text = "file://" + config["elluminate-base"] + "/" + vcrfile
            break

    fetchjars(xml)

    xml.set("codebase", "file://" + config["elluminate-base"] + "/" + JARS)

    f = open(jnlpfile, "wb")
    f.write(lxml.etree.tostring(xml, xml_declaration = True))
    f.close()

    print(jnlpfile)
Exemplo n.º 10
0
def save(args):
    jnlpurl = args.url

    if re.search("/internal/", jnlpurl):
        login(args)

    if re.search("/(mr|p).jnlp\?", jnlpurl):
        jnlpurl = getjnlpurl(jnlpurl)

    vcrfile = urlparse.parse_qs(jnlpurl[jnlpurl.index("?") + 1:])["psid"][0]
    jnlpfile = vcrfile + ".jnlp"

    common.retrieve(jnlpurl, jnlpfile, force = True)

    try:
        xml = lxml.etree.parse(jnlpfile).getroot()
    except lxml.etree.XMLSyntaxError:
        os.unlink(jnlpfile)
        print "%s: couldn't retrieve jnlp: credentials incorrect?" % ap.prog
        sys.exit(1)
        
    xmlargs = xml.xpath("//argument")
    for i in range(len(xmlargs) - 1):
        if xmlargs[i].text == "-play":
            common.retrieve(xmlargs[i + 1].text, vcrfile)
            xmlargs[i + 1].text = "file://" + config["elluminate-base"] + "/" + vcrfile
            break

    fetchjars(xml)

    xml.set("codebase", "file://" + config["elluminate-base"] + "/" + JARS)

    f = open(jnlpfile, "w")
    f.write(lxml.etree.tostring(xml, xml_declaration = True))
    f.close()

    print jnlpfile
    return elem.xpath(path, namespaces = 
                      { "xhtml" : "http://www.w3.org/1999/xhtml" })

if __name__ == "__main__":
    warnings = 0
    global config
    config = common.load_config()
    args = parse_args()

    common.mkdirs(config["product-docs-base"])
    os.chdir(config["product-docs-base"])

    lock = common.Lock(".lock")

    urlbase = "http://docs.redhat.com/docs/%(locale)s/" % args
    common.retrieve(urlbase + "toc.html", "toc.html")
    common.mkro("toc.html")

    toc = lxml.etree.parse("toc.html").getroot()
    for url in xpath(toc, "//xhtml:a[@class='type' and text()='%(type)s']/@href" % args):
        url = url[2:] # trim leading ./
        path = url[:url.index("/%(type)s/" % args)].replace("_", " ")
        common.mkdirs(path)
        path = path + "/" + url.split("/")[-1]

        try:
            common.retrieve(urlbase + url, path)
        except urllib2.HTTPError, e:
            if e.code == 403:
                print >>sys.stderr, "WARNING: %s on %s, continuing..." % (e, urlbase + url)
                warnings += 1
Exemplo n.º 12
0
def fetchjars(xml):
    for ref in sorted(xml.xpath("//@href")):
        common.retrieve(xml.get("codebase") + "/" + ref, JARS + "/" + ref)
Exemplo n.º 13
0
def download(opener, href, dest):
    common.mkdirs(os.path.split(dest)[0])
    common.retrieve(href, dest, opener = opener, tries = 10)
    common.mkro(dest)
                                      tries = tries)
                doc = WikiDoc(f.read())
                f.close()

                path = doc.path + "/" + doc.filename
                
                if want(path):
                    skip = False
                    if os.path.exists(path):
                        st = os.stat(path)
                        if st.st_mtime == doc.mtime:
                            skip = True

                    if not skip:
                        common.mkdirs(doc.path)
                        common.retrieve(config["clearspace-root"] + doc.filehref,
                                        path, force = True, tries = tries)
                        common.mkro(path)
                        os.utime(path, (doc.mtime, doc.mtime))

            updatedbs(db, keep, item["href"], path)

        if len(index.items) != step:
            break

    for dirpath, dirnames, filenames in os.walk(".", topdown = False):
        # remove local files which are no longer found in clearspace
        for f in filenames:
            path = os.path.relpath(dirpath, ".") + "/" + f
            if not path.startswith("./.") and path not in keep:
                os.unlink(path)
    return bytes == "%PDF"

if __name__ == "__main__":
    global config
    config = common.load_config()

    print >>sys.stderr, "Utility needs update since relaunch of www.redhat.com, feel free to submit patches..."
    sys.exit(1)

    common.mkdirs(config["references-base"])
    os.chdir(config["references-base"])

    lock = common.Lock(".lock")

    common.retrieve("http://www.redhat.com/customersuccess/", "index.html")
    common.mkro("index.html")

    toc = lxml.html.soupparser.parse("index.html").getroot()
    for url in toc.xpath("//a[substring-after(@href, '.') = 'pdf']/../../.."):
        url = copy.deepcopy(url)
        title = url.xpath("//h4//a/text()")[0].replace("/", "_")
        href = url.xpath("//a[substring-after(@href, '.') = 'pdf']/@href")[0]

        print >>sys.stderr, title
        f = common.retrieve_tmpfile("http://www.redhat.com" + href)
        if ispdf(f):
            # a few links on /customersuccess are currently broken HTML files
            common.sendfile_disk(f, title + ".pdf")
            common.mkro(title + ".pdf")
        f.close()
Exemplo n.º 16
0
def fetchjars(xml):
    for ref in sorted(xml.xpath("//@href")):
        common.retrieve(xml.get("codebase") + "/" + ref, JARS + "/" + ref)
Exemplo n.º 17
0
def download(url, dest):
    if not os.path.exists(dest):
        common.retrieve(url, dest)
        common.mkro(dest)
        return True
    return False