Exemplo n.º 1
0
def build_link_map(directory):
    # find all html files
    html_files = []
    for root, dirnames, filenames in os.walk(directory):
        for filename in fnmatch.filter(filenames, '*.html'):
            html_files.append(os.path.join(root, filename))

    link_map = LinkMap()

    for fn in html_files:
        f = open(fn, "r")
        text = f.read()
        f.close()

        m = re.search('<script>[^<]*mw\.config\.set([^<]*wgPageName[^<]*)</script>', text)
        if not m:
            continue
        text = m.group(1)
        text = re.sub('\s*', '', text)
        m = re.search('"wgPageName":"([^"]*)"', text)
        if not m:
            continue

        title = m.group(1)

        target = os.path.relpath(os.path.abspath(fn), os.path.abspath(directory))
        link_map.add_link(title, target)
    return link_map