def print_python_code(top, validate_with_vim=False): # The dict containing the blacklist pkg_infos = {} non_ascii_characters = {} egg_to_pkg_info = collections.defaultdict(dict) for path in iter_invalid_unicode(top): print(path) filename = os.path.basename(path) with zipfile2.ZipFile(path) as fp: data = fp.read("EGG-INFO/PKG-INFO") sha256 = compute_sha256(path) name, version, build = filename.split("-", 2) varname = "{}_{}".format(name.replace(".", "_").upper(), version.replace(".", "_").upper()) egg_to_pkg_info[filename][sha256] = varname encoding = ALTERNATIVE_ENCODING.get(filename, "latin1") content = data.decode(encoding) if validate_with_vim: candidates = [i for i, v in enumerate(content) if ord(v) >= 128] non_ascii_characters[varname] = candidates for pos in candidates: with tempfile.NamedTemporaryFile() as fp: fp.write(content.encode("utf8")) fp.flush() cmd = ["vim", "+normal {}go".format(pos), fp.name] print(path) p = subprocess.Popen(cmd) p.communicate() pkg_info = u"\n".join((u'\n{} = u"""'.format(varname), content, u'"""\n')) pkg_infos[varname] = pkg_info py_content = [pkg_infos[k] for k in sorted(pkg_infos)] buf = StringIO() pprint.pprint(dict(egg_to_pkg_info), buf) output, ignored = FormatCode(buf.getvalue()) py_content.append(output) output = u"\n".join(py_content) with codecs.open("invalid_unicode.py", "wt", encoding="utf8") as fp: fp.write(output)
def build_list(top): ret = collections.defaultdict(dict) for root, dirs, files in os.walk(top): for f in files: principal = os.path.splitext(f)[0].lower() if f.endswith(".egg") and principal in names: path = os.path.join(root, f) try: metadata = EggMetadata.from_egg(path) except zipfile.BadZipfile: pass else: ret[f][compute_sha256(path)] = "py27" return dict(ret)
def build_list(top): ret = collections.defaultdict(dict) for root, dirs, files in os.walk(top): print(root, file=sys.stderr) for i, f in enumerate(files): if i % 100 == 0: print("{}/{}".format(i, len(files)), end="\r", file=sys.stderr) sys.stderr.flush() if f.endswith(".egg"): path = os.path.normpath(os.path.abspath(os.path.join(root, f))) try: python_tag = may_be_invalid(path) if python_tag is not None: key = os.path.basename(path) sha256 = compute_sha256(path) ret[key][sha256] = python_tag except OkonomiyakiError as e: msg = "Okonomiyaki error parsing {!r} ({!r})" print(msg.format(path, str(e)), file=sys.stderr) except Exception as e: msg = "Okonomiyaki bug parsing {!r} ({!r})" print(msg.format(path, str(e)), file=sys.stderr) return dict(ret)