Пример #1
0
def download(db, grabber, url, downType):
    page = Utils.httpFilename(url)
    page = os.path.splitext(page)[0]

    dataUrl = RAIUrls.getPageDataUrl(page)

    folder = Config.pageFolder
    localFilename = os.path.join(folder, page + ".xml")
    f = Utils.download(grabber, None, dataUrl, localFilename, downType, "utf-8")

    # ElementTree does not like unicode, it prefers byte strings
    s = f.read().strip()
    s = Utils.removeInvalidXMLCharacters(s)
    root = ElementTree.fromstring(s)

    for child in root.findall("content"):
        pid = Utils.getNewPID(db, None)
        it = Elem(pid, grabber, child)
        Utils.addToDB(db, it)