def print_python_code(top, validate_with_vim=False):
    # The dict containing the blacklist
    pkg_infos = {}
    non_ascii_characters = {}

    egg_to_pkg_info = collections.defaultdict(dict)

    for path in iter_invalid_unicode(top):
        print(path)
        filename = os.path.basename(path)

        with zipfile2.ZipFile(path) as fp:
            data = fp.read("EGG-INFO/PKG-INFO")

        sha256 = compute_sha256(path)

        name, version, build = filename.split("-", 2)

        varname = "{}_{}".format(name.replace(".", "_").upper(), version.replace(".", "_").upper())
        egg_to_pkg_info[filename][sha256] = varname

        encoding = ALTERNATIVE_ENCODING.get(filename, "latin1")

        content = data.decode(encoding)

        if validate_with_vim:
            candidates = [i for i, v in enumerate(content) if ord(v) >= 128]

            non_ascii_characters[varname] = candidates
            for pos in candidates:
                with tempfile.NamedTemporaryFile() as fp:
                    fp.write(content.encode("utf8"))
                    fp.flush()
                    cmd = ["vim", "+normal {}go".format(pos), fp.name]
                    print(path)
                    p = subprocess.Popen(cmd)
                    p.communicate()

        pkg_info = u"\n".join((u'\n{} = u"""'.format(varname), content, u'"""\n'))
        pkg_infos[varname] = pkg_info

    py_content = [pkg_infos[k] for k in sorted(pkg_infos)]

    buf = StringIO()
    pprint.pprint(dict(egg_to_pkg_info), buf)

    output, ignored = FormatCode(buf.getvalue())

    py_content.append(output)

    output = u"\n".join(py_content)

    with codecs.open("invalid_unicode.py", "wt", encoding="utf8") as fp:
        fp.write(output)
def build_list(top):
    ret = collections.defaultdict(dict)

    for root, dirs, files in os.walk(top):
        for f in files:
            principal = os.path.splitext(f)[0].lower()
            if f.endswith(".egg") and principal in names:
                path = os.path.join(root, f)
                try:
                    metadata = EggMetadata.from_egg(path)
                except zipfile.BadZipfile:
                    pass
                else:
                    ret[f][compute_sha256(path)] = "py27"
    return dict(ret)
def build_list(top):
    ret = collections.defaultdict(dict)

    for root, dirs, files in os.walk(top):
        print(root, file=sys.stderr)
        for i, f in enumerate(files):
            if i % 100 == 0:
                print("{}/{}".format(i, len(files)), end="\r", file=sys.stderr)
                sys.stderr.flush()
            if f.endswith(".egg"):
                path = os.path.normpath(os.path.abspath(os.path.join(root, f)))
                try:
                    python_tag = may_be_invalid(path)
                    if python_tag is not None:
                        key = os.path.basename(path)
                        sha256 = compute_sha256(path)
                        ret[key][sha256] = python_tag
                except OkonomiyakiError as e:
                    msg = "Okonomiyaki error parsing {!r} ({!r})"
                    print(msg.format(path, str(e)), file=sys.stderr)
                except Exception as e:
                    msg = "Okonomiyaki bug parsing {!r} ({!r})"
                    print(msg.format(path, str(e)), file=sys.stderr)
    return dict(ret)