def parse(url_or_path, encoding=None, handler_class=DrillHandler): """ :param url_or_path: A file-like object, a filesystem path, a URL, or a string containing XML :rtype: :class:`XmlElement` """ handler = handler_class() parser = expat.ParserCreate(encoding) parser.buffer_text = 1 parser.StartElementHandler = handler.start_element parser.EndElementHandler = handler.end_element parser.CharacterDataHandler = handler.characters if isinstance(url_or_path, basestring): if '://' in url_or_path[:20]: with contextlib.closing(url_lib.urlopen(url_or_path)) as f: parser.ParseFile(f) elif url_or_path[:100].strip().startswith('<'): if isinstance(url_or_path, unicode): if encoding is None: encoding = 'utf-8' url_or_path = url_or_path.encode(encoding) parser.Parse(url_or_path, True) else: with open(url_or_path, 'rb') as f: parser.ParseFile(f) elif PY3 and isinstance(url_or_path, bytes): parser.ParseFile(bytes_io(url_or_path)) else: parser.ParseFile(url_or_path) return handler.root
def parse(url_or_path, encoding=None, handler_class=DrillHandler): """ :param url_or_path: A file-like object, a filesystem path, a URL, or a string containing XML :rtype: :class:`XmlElement` """ handler = handler_class() parser = expat.ParserCreate(encoding) parser.buffer_text = 1 parser.StartElementHandler = handler.start_element parser.EndElementHandler = handler.end_element parser.CharacterDataHandler = handler.characters if isinstance(url_or_path, basestring): if "://" in url_or_path[:20]: with contextlib.closing(url_lib.urlopen(url_or_path)) as f: parser.ParseFile(f) elif url_or_path[:100].strip().startswith("<"): if isinstance(url_or_path, unicode): if encoding is None: encoding = "utf-8" url_or_path = url_or_path.encode(encoding) parser.Parse(url_or_path, True) else: with open(url_or_path, "rb") as f: parser.ParseFile(f) elif PY3 and isinstance(url_or_path, bytes): parser.ParseFile(bytes_io(url_or_path)) else: parser.ParseFile(url_or_path) return handler.root
def xml(self, **kwargs): """ Returns an XML representation of this node (including descendants). This method automatically creates an :class:`XmlWriter` instance internally to handle the writing. :param **kwargs: Any named arguments are passed along to the :class:`XmlWriter` constructor """ s = bytes_io() writer = XmlWriter(s, **kwargs) self.write(writer) return s.getvalue()
def get_pure_py_file_map(t, platform, dependencies): info = json.loads(t.extractfile('info/index.json').read().decode('utf-8')) try: paths = json.loads( t.extractfile('info/paths.json').read().decode('utf-8')) _check_paths_version(paths) except KeyError: paths = None source_plat = info['platform'] source_type = 'unix' if source_plat in {'osx', 'linux'} else 'win' dest_plat, dest_arch = platform.split('-') dest_type = 'unix' if dest_plat in {'osx', 'linux'} else 'win' files = t.extractfile('info/files').read().decode("utf-8").splitlines() if source_type == 'unix' and dest_type == 'win': mapping = path_mapping_unix_windows elif source_type == 'win' and dest_type == 'unix': mapping = path_mapping_windows_unix else: mapping = path_mapping_identity newinfo = info.copy() newinfo['platform'] = dest_plat newinfo['arch'] = 'x86_64' if dest_arch == '64' else 'x86' newinfo['subdir'] = platform if dependencies is not None: newinfo['depends'] = dependencies pythons = list(filter(None, [pyver_re.match(p) for p in info['depends']])) pythons = list(set(p.group(1) for p in pythons)) if 'python' in info['depends'] and not pythons: # No python version dependency was specified # Only a problem when converting from windows to unix, since # the python version is part of the folder structure on unix. if source_type == 'win' and dest_type == 'unix': raise RuntimeError( "Python dependency must explicit when converting" "from windows package to a linux packages") if len(pythons) > 1: raise RuntimeError( "Found more than one versioned Python dependency in package %s" % t.name) elif len(pythons) == 0: # not a Python package mapping = [] else: pyver = pythons[0] mapping = [(re.compile(i[0].format(pyver=pyver)), i[1].format(pyver=pyver)) for i in mapping] members = t.getmembers() file_map = {} paths_mapping_dict = {} # keep track of what we change in files pathmember = None # is None when info/has_prefix does not exist has_prefix_files = None if 'info/has_prefix' in t.getnames(): has_prefix_files = t.extractfile("info/has_prefix").read().decode() if has_prefix_files: fieldnames = ['prefix', 'type', 'path'] try: csv_dialect = csv.Sniffer().sniff(has_prefix_files) except csv.Error: # should be a tab-delimited file. Not completely sure whether text editors may have # replaced tabs with spaces, though csv_dialect = csv.excel_tab csv_dialect.lineterminator = '\n' for attr in ('delimiter', 'quotechar'): if PY3 and hasattr(getattr(csv_dialect, attr), 'decode'): setattr(csv_dialect, attr, getattr(csv_dialect, attr).decode()) elif not PY3 and hasattr(getattr(csv_dialect, attr), 'encode'): setattr(csv_dialect, attr, getattr(csv_dialect, attr).encode()) has_prefix_files = csv.DictReader(has_prefix_files.splitlines(), fieldnames=fieldnames, dialect=csv_dialect) # convenience: store list of dictionaries as map by path has_prefix_files = {d['path']: d for d in has_prefix_files} for member in members: # Update metadata if member.path == 'info/index.json': newmember = tarfile.TarInfo('info/index.json') if PY3: newbytes = bytes(json.dumps(newinfo), 'utf-8') else: newbytes = json.dumps(newinfo) newmember.size = len(newbytes) file_map['info/index.json'] = (newmember, bytes_io(newbytes)) continue elif member.path == 'info/files': # We have to do this at the end when we have all the files filemember = deepcopy(member) continue elif member.path == 'info/paths.json': pathmember = deepcopy(member) continue # Move paths oldpath = member.path append_new_path_to_has_prefix = False if has_prefix_files and oldpath in has_prefix_files: append_new_path_to_has_prefix = True for old, new in mapping: newpath = old.sub(new, oldpath) if newpath != oldpath: newmember = deepcopy(member) newmember.path = newpath assert member.path == oldpath file_map[oldpath] = None file_map[newpath] = newmember loc = files.index(oldpath) files[loc] = newpath paths_mapping_dict[oldpath] = newpath if append_new_path_to_has_prefix: has_prefix_files[oldpath]['path'] = newpath break else: file_map[oldpath] = member # Make Windows compatible entry-points if source_type == 'unix' and dest_type == 'win': old = path_mapping_bat_proxy[0] for new in path_mapping_bat_proxy[1]: match = old.match(oldpath) if match: newpath = old.sub(new, oldpath) if newpath.endswith('-script'): if match.group(2): newpath = newpath + match.group(2) else: newpath = newpath + '.py' if newpath != oldpath: newmember = tarfile.TarInfo(newpath) if newpath.endswith('.bat'): if PY3: data = bytes(BAT_PROXY.replace('\n', '\r\n'), 'ascii') else: data = BAT_PROXY.replace('\n', '\r\n') else: data = t.extractfile(member).read() if append_new_path_to_has_prefix: has_prefix_files[oldpath]['path'] = newpath newmember.size = len(data) file_map[newpath] = newmember, bytes_io(data) files.append(newpath) found_path = [ p for p in paths['paths'] if p['_path'] == oldpath ] assert len(found_path) == 1 newdict = copy(found_path[0]) newdict['_path'] = newpath paths['paths'].append(newdict) # Change paths.json the same way that we changed files if paths: updated_paths = _update_paths(paths, paths_mapping_dict) paths = json.dumps(updated_paths, sort_keys=True, indent=4, separators=(',', ': ')) files = list(set(files)) files = '\n'.join(sorted(files)) + '\n' if PY3: files = bytes(files, 'utf-8') if paths: paths = bytes(paths, 'utf-8') filemember.size = len(files) file_map['info/files'] = filemember, bytes_io(files) if pathmember: pathmember.size = len(paths) file_map['info/paths.json'] = pathmember, bytes_io(paths) if has_prefix_files: output = StringIO() writer = csv.DictWriter(output, fieldnames=fieldnames, dialect=csv_dialect) writer.writerows(has_prefix_files.values()) member = t.getmember('info/has_prefix') output_val = output.getvalue() if hasattr(output_val, 'encode'): output_val = output_val.encode() member.size = len(output_val) file_map['info/has_prefix'] = member, bytes_io(output_val) return file_map
def get_pure_py_file_map(t, platform): info = json.loads(t.extractfile('info/index.json').read().decode('utf-8')) source_plat = info['platform'] source_type = 'unix' if source_plat in {'osx', 'linux'} else 'win' dest_plat, dest_arch = platform.split('-') dest_type = 'unix' if dest_plat in {'osx', 'linux'} else 'win' files = t.extractfile('info/files').read().decode("utf-8") if source_type == 'unix' and dest_type == 'win': mapping = path_mapping_unix_windows elif source_type == 'win' and dest_type == 'unix': mapping = path_mapping_windows_unix else: mapping = path_mapping_identity newinfo = info.copy() newinfo['platform'] = dest_plat newinfo['arch'] = 'x86_64' if dest_arch == '64' else 'x86' newinfo['subdir'] = platform pythons = list(filter(None, [pyver_re.match(p) for p in info['depends']])) if len(pythons) > 1: raise RuntimeError("Found more than one Python dependency in package %s" % t.name) elif len(pythons) == 0: # not a Python package mapping = [] elif pythons[0].group(1): pyver = pythons[0].group(1) mapping = [(re.compile(i[0].format(pyver=pyver)), i[1].format(pyver=pyver)) for i in mapping] else: # No python version dependency was specified # Only a problem when converting from windows to unix, since # the python version is part of the folder structure on unix. if source_type == 'win' and dest_type == 'unix': raise RuntimeError("Python dependency must explicit when converting" "from windows package to a linux packages") members = t.getmembers() file_map = {} for member in members: # Update metadata if member.path == 'info/index.json': newmember = tarfile.TarInfo('info/index.json') if PY3: newbytes = bytes(json.dumps(newinfo), 'utf-8') else: newbytes = json.dumps(newinfo) newmember.size = len(newbytes) file_map['info/index.json'] = (newmember, bytes_io(newbytes)) continue elif member.path == 'info/files': # We have to do this at the end when we have all the files filemember = deepcopy(member) continue elif member.path == 'info/has_prefix': if source_type == 'unix' and dest_type == 'win': # has_prefix is not needed on Windows file_map['info/has_prefix'] = None # Move paths oldpath = member.path for old, new in mapping: newpath = old.sub(new, oldpath) if oldpath in file_map: # Already been handled break if newpath != oldpath: newmember = deepcopy(member) newmember.path = newpath assert member.path == oldpath file_map[oldpath] = None file_map[newpath] = newmember files = files.replace(oldpath, newpath) break else: file_map[oldpath] = member # Make Windows compatible entry-points batseen = set() if source_type == 'unix' and dest_type == 'win': for old, new in path_mapping_bat_proxy: newpath = old.sub(new, oldpath) if oldpath in batseen: break if newpath != oldpath: newmember = tarfile.TarInfo(newpath) if PY3: data = bytes(BAT_PROXY.replace('\n', '\r\n'), 'ascii') else: data = BAT_PROXY.replace('\n', '\r\n') newmember.size = len(data) file_map[newpath] = newmember, bytes_io(data) batseen.add(oldpath) files = files + newpath + "\n" files = '\n'.join(sorted(files.splitlines())) + '\n' if PY3: files = bytes(files, 'utf-8') filemember.size = len(files) file_map['info/files'] = filemember, bytes_io(files) return file_map
elif url_or_path[:100].strip().startswith('<'): if isinstance(url_or_path, unicode): if encoding is None: encoding = 'utf-8' url_or_path = url_or_path.encode(encoding) parser.Parse(url_or_path, True) else: f = open(url_or_path, 'rb') try: parser.ParseFile(f) except Exception, e: raise e finally: f.close() elif PY3 and isinstance(url_or_path, bytes): parser.ParseFile(bytes_io(url_or_path)) else: parser.ParseFile(url_or_path) return handler.root class DrillElementIterator (object): READ_CHUNK_SIZE = 16384 def __init__(self, filelike, parser): self.filelike = filelike self.parser = parser self.elements = [] def add(self, element): self.elements.append(element)
def get_pure_py_file_map(t, platform): info = json.loads(t.extractfile('info/index.json').read().decode('utf-8')) source_plat = info['platform'] source_type = 'unix' if source_plat in {'osx', 'linux'} else 'win' dest_plat, dest_arch = platform.split('-') dest_type = 'unix' if dest_plat in {'osx', 'linux'} else 'win' files = t.extractfile('info/files').read().decode("utf-8") if source_type == 'unix' and dest_type == 'win': mapping = path_mapping_unix_windows elif source_type == 'win' and dest_type == 'unix': mapping = path_mapping_windows_unix else: mapping = path_mapping_identity newinfo = info.copy() newinfo['platform'] = dest_plat newinfo['arch'] = 'x86_64' if dest_arch == '64' else 'x86' newinfo['subdir'] = platform pythons = list(filter(None, [pyver_re.match(p) for p in info['depends']])) if len(pythons) > 1: raise RuntimeError( "Found more than one Python dependency in package %s" % t.name) elif len(pythons) == 0: # not a Python package mapping = [] elif pythons[0].group(1): pyver = pythons[0].group(1) mapping = [(re.compile(i[0].format(pyver=pyver)), i[1].format(pyver=pyver)) for i in mapping] else: # No python version dependency was specified # Only a problem when converting from windows to unix, since # the python version is part of the folder structure on unix. if source_type == 'win' and dest_type == 'unix': raise RuntimeError( "Python dependency must explicit when converting" "from windows package to a linux packages") members = t.getmembers() file_map = {} for member in members: # Update metadata if member.path == 'info/index.json': newmember = tarfile.TarInfo('info/index.json') if PY3: newbytes = bytes(json.dumps(newinfo), 'utf-8') else: newbytes = json.dumps(newinfo) newmember.size = len(newbytes) file_map['info/index.json'] = (newmember, bytes_io(newbytes)) continue elif member.path == 'info/files': # We have to do this at the end when we have all the files filemember = deepcopy(member) continue elif member.path == 'info/has_prefix': if source_type == 'unix' and dest_type == 'win': # has_prefix is not needed on Windows file_map['info/has_prefix'] = None # Move paths oldpath = member.path for old, new in mapping: newpath = old.sub(new, oldpath) if oldpath in file_map: # Already been handled break if newpath != oldpath: newmember = deepcopy(member) newmember.path = newpath assert member.path == oldpath file_map[oldpath] = None file_map[newpath] = newmember files = files.replace(oldpath, newpath) break else: file_map[oldpath] = member # Make Windows compatible entry-points batseen = set() if source_type == 'unix' and dest_type == 'win': for old, new in path_mapping_bat_proxy: newpath = old.sub(new, oldpath) if oldpath in batseen: break if newpath != oldpath: newmember = tarfile.TarInfo(newpath) if PY3: data = bytes(BAT_PROXY.replace('\n', '\r\n'), 'ascii') else: data = BAT_PROXY.replace('\n', '\r\n') newmember.size = len(data) file_map[newpath] = newmember, bytes_io(data) batseen.add(oldpath) files = files + newpath + "\n" files = '\n'.join(sorted(files.splitlines())) + '\n' if PY3: files = bytes(files, 'utf-8') filemember.size = len(files) file_map['info/files'] = filemember, bytes_io(files) return file_map
def get_pure_py_file_map(t, platform): info = json.loads(t.extractfile('info/index.json').read().decode('utf-8')) try: paths = json.loads(t.extractfile('info/paths.json').read().decode('utf-8')) _check_paths_version(paths) except KeyError: paths = None source_plat = info['platform'] source_type = 'unix' if source_plat in {'osx', 'linux'} else 'win' dest_plat, dest_arch = platform.split('-') dest_type = 'unix' if dest_plat in {'osx', 'linux'} else 'win' files = t.extractfile('info/files').read().decode("utf-8").splitlines() if source_type == 'unix' and dest_type == 'win': mapping = path_mapping_unix_windows elif source_type == 'win' and dest_type == 'unix': mapping = path_mapping_windows_unix else: mapping = path_mapping_identity newinfo = info.copy() newinfo['platform'] = dest_plat newinfo['arch'] = 'x86_64' if dest_arch == '64' else 'x86' newinfo['subdir'] = platform pythons = list(filter(None, [pyver_re.match(p) for p in info['depends']])) pythons = list(set(p.group(1) for p in pythons)) if 'python' in info['depends'] and not pythons: # No python version dependency was specified # Only a problem when converting from windows to unix, since # the python version is part of the folder structure on unix. if source_type == 'win' and dest_type == 'unix': raise RuntimeError("Python dependency must explicit when converting" "from windows package to a linux packages") if len(pythons) > 1: raise RuntimeError("Found more than one versioned Python dependency in package %s" % t.name) elif len(pythons) == 0: # not a Python package mapping = [] else: pyver = pythons[0] mapping = [(re.compile(i[0].format(pyver=pyver)), i[1].format(pyver=pyver)) for i in mapping] members = t.getmembers() file_map = {} paths_mapping_dict = {} # keep track of what we change in files pathmember = None # is None when info/has_prefix does not exist has_prefix_files = None if 'info/has_prefix' in t.getnames(): has_prefix_files = t.extractfile("info/has_prefix").read().decode() if has_prefix_files: fieldnames = ['prefix', 'type', 'path'] try: csv_dialect = csv.Sniffer().sniff(has_prefix_files) except csv.Error: # should be a tab-delimited file. Not completely sure whether text editors may have # replaced tabs with spaces, though csv_dialect = csv.excel_tab csv_dialect.lineterminator = '\n' for attr in ('delimiter', 'quotechar'): if PY3 and hasattr(getattr(csv_dialect, attr), 'decode'): setattr(csv_dialect, attr, getattr(csv_dialect, attr).decode()) elif not PY3 and hasattr(getattr(csv_dialect, attr), 'encode'): setattr(csv_dialect, attr, getattr(csv_dialect, attr).encode()) has_prefix_files = csv.DictReader(has_prefix_files.splitlines(), fieldnames=fieldnames, dialect=csv_dialect) # convenience: store list of dictionaries as map by path has_prefix_files = {d['path']: d for d in has_prefix_files} for member in members: # Update metadata if member.path == 'info/index.json': newmember = tarfile.TarInfo('info/index.json') if PY3: newbytes = bytes(json.dumps(newinfo), 'utf-8') else: newbytes = json.dumps(newinfo) newmember.size = len(newbytes) file_map['info/index.json'] = (newmember, bytes_io(newbytes)) continue elif member.path == 'info/files': # We have to do this at the end when we have all the files filemember = deepcopy(member) continue elif member.path == 'info/paths.json': pathmember = deepcopy(member) continue # Move paths oldpath = member.path append_new_path_to_has_prefix = False if has_prefix_files and oldpath in has_prefix_files: append_new_path_to_has_prefix = True for old, new in mapping: newpath = old.sub(new, oldpath) if newpath != oldpath: newmember = deepcopy(member) newmember.path = newpath assert member.path == oldpath file_map[oldpath] = None file_map[newpath] = newmember loc = files.index(oldpath) files[loc] = newpath paths_mapping_dict[oldpath] = newpath if append_new_path_to_has_prefix: has_prefix_files[oldpath]['path'] = newpath break else: file_map[oldpath] = member # Make Windows compatible entry-points if source_type == 'unix' and dest_type == 'win': old = path_mapping_bat_proxy[0] for new in path_mapping_bat_proxy[1]: match = old.match(oldpath) if match: newpath = old.sub(new, oldpath) if newpath.endswith('-script'): if match.group(2): newpath = newpath + match.group(2) else: newpath = newpath + '.py' if newpath != oldpath: newmember = tarfile.TarInfo(newpath) if newpath.endswith('.bat'): if PY3: data = bytes(BAT_PROXY.replace('\n', '\r\n'), 'ascii') else: data = BAT_PROXY.replace('\n', '\r\n') else: data = t.extractfile(member).read() if append_new_path_to_has_prefix: has_prefix_files[oldpath]['path'] = newpath newmember.size = len(data) file_map[newpath] = newmember, bytes_io(data) files.append(newpath) found_path = [p for p in paths['paths'] if p['_path'] == oldpath] assert len(found_path) == 1 newdict = copy(found_path[0]) newdict['_path'] = newpath paths['paths'].append(newdict) # Change paths.json the same way that we changed files if paths: updated_paths = _update_paths(paths, paths_mapping_dict) paths = json.dumps(updated_paths, sort_keys=True, indent=4, separators=(',', ': ')) files = list(set(files)) files = '\n'.join(sorted(files)) + '\n' if PY3: files = bytes(files, 'utf-8') if paths: paths = bytes(paths, 'utf-8') filemember.size = len(files) file_map['info/files'] = filemember, bytes_io(files) if pathmember: pathmember.size = len(paths) file_map['info/paths.json'] = pathmember, bytes_io(paths) if has_prefix_files: output = StringIO() writer = csv.DictWriter(output, fieldnames=fieldnames, dialect=csv_dialect) writer.writerows(has_prefix_files.values()) member = t.getmember('info/has_prefix') output_val = output.getvalue() if hasattr(output_val, 'encode'): output_val = output_val.encode() member.size = len(output_val) file_map['info/has_prefix'] = member, bytes_io(output_val) return file_map