def unpackFrom(tar_file_path, to_directory): # first unpack into a sibling directory of the specified directory, and # then move it into place. # we expect our tarballs to contain a single top-level directory. We strip # off this name as we extract to minimise the path length into_parent_dir = os.path.dirname(to_directory) fsutils.mkDirP(into_parent_dir) temp_directory = tempfile.mkdtemp(dir=into_parent_dir) with tarfile.open(tar_file_path) as tf: strip_dirname = '' # get the extraction directory name from the first part of the # extraction paths: it should be the same for all members of # the archive for m in tf.getmembers(): split_path = fsutils.fullySplitPath(m.name) logger.debug('process member: %s %s', m.name, split_path) if os.path.isabs(m.name) or '..' in split_path: raise ValueError('archive uses invalid paths') if not strip_dirname: if len(split_path) != 1 or not len(split_path[0]): raise ValueError( 'archive does not appear to contain a single module') strip_dirname = split_path[0] continue else: if split_path[0] != strip_dirname: raise ValueError( 'archive does not appear to contain a single module') m.name = os.path.join(*split_path[1:]) tf.extract(m, path=temp_directory) shutil.move(temp_directory, to_directory) logger.debug('extraction complete %s', to_directory)
def unpackFromCache(cache_key, to_directory): ''' If the specified cache key exists, unpack the tarball into the specified directory, otherwise raise KeyError. ''' if cache_key is None: raise KeyError('"None" is never in cache') cache_dir = folders.cacheDirectory() fsutils.mkDirP(cache_dir) path = os.path.join(cache_dir, cache_key) logger.debug('attempt to unpack from cache %s -> %s', path, to_directory) try: unpackFrom(path, to_directory) try: shutil.copy(path + '.json', os.path.join(to_directory, '.yotta_origin.json')) except IOError as e: if e.errno == errno.ENOENT: pass else: raise cache_logger.debug('unpacked %s from cache into %s', cache_key, to_directory) return except IOError as e: if e.errno == errno.ENOENT: cache_logger.debug('%s not in cache', cache_key) raise KeyError('not in cache')
def unpackFrom(tar_file_path, to_directory): # first unpack into a sibling directory of the specified directory, and # then move it into place. # we expect our tarballs to contain a single top-level directory. We strip # off this name as we extract to minimise the path length into_parent_dir = os.path.dirname(to_directory) fsutils.mkDirP(into_parent_dir) temp_directory = tempfile.mkdtemp(dir=into_parent_dir) with tarfile.open(tar_file_path) as tf: strip_dirname = '' # get the extraction directory name from the first part of the # extraction paths: it should be the same for all members of # the archive for m in tf.getmembers(): split_path = fsutils.fullySplitPath(m.name) logger.debug('process member: %s %s', m.name, split_path) if os.path.isabs(m.name) or '..' in split_path: raise ValueError('archive uses invalid paths') if not strip_dirname: if len(split_path) != 1 or not len(split_path[0]): raise ValueError('archive does not appear to contain a single module') strip_dirname = split_path[0] continue else: if split_path[0] != strip_dirname: raise ValueError('archive does not appear to contain a single module') m.name = os.path.join(*split_path[1:]) tf.extract(m, path=temp_directory) shutil.move(temp_directory, to_directory) logger.debug('extraction complete %s', to_directory)
def write(self, filename=None): if filename is None: filename, data = self._firstConfig() elif filename in self.configs: data = self.configs[filename] else: raise ValueError('No such file.') dirname = os.path.dirname(filename) fsutils.mkDirP(dirname) ordered_json.dump(filename, data)
def downloadToCache(stream, hashinfo={}, cache_key=None): ''' Download the specified stream to a temporary cache directory, and return (path to the downloaded, cache key). If cache_key is None, then a cache key will be generated and returned, but you will probably want to remove the cache file yourself (this is safe). ''' hash_name = None hash_value = None m = None if len(hashinfo): # check for hashes in preferred order. Currently this is just sha256 # (which the registry uses). Initial investigations suggest that github # doesn't return a header with the hash of the file being downloaded. for h in ('sha256',): if h in hashinfo: hash_name = h hash_value = hashinfo[h] m = getattr(hashlib, h)() break if not hash_name: logger.warning('could not find supported hash type in %s', hashinfo) if cache_key is None: cache_key = '%032x' % random.getrandbits(256) cache_dir = folders.cacheDirectory() fsutils.mkDirP(cache_dir) download_fname = os.path.join(cache_dir, cache_key) with _openExclusively(download_fname) as f: f.seek(0) for chunk in stream.iter_content(1024): f.write(chunk) if hash_name: m.update(chunk) if hash_name: calculated_hash = m.hexdigest() logger.debug( 'calculated %s hash: %s check against: %s' % ( hash_name, calculated_hash, hash_value ) ) if hash_value and (hash_value != calculated_hash): raise Exception('Hash verification failed.') logger.debug('wrote tarfile of size: %s to %s', f.tell(), download_fname) f.truncate() return (download_fname, cache_key)
def pruneCache(): ''' Prune the cache ''' cache_dir = folders.cacheDirectory() def fullpath(f): return os.path.join(cache_dir, f) # ensure cache exists fsutils.mkDirP(cache_dir) for f in sorted( [f for f in os.listdir(cache_dir) if os.path.isfile(fullpath(f))], key = lambda f: os.stat(fullpath(f)).st_mtime )[Max_Cached_Modules:]: cache_logger.debug('cleaning up cache file %s', f) fsutils.rmF(fullpath(f)) cache_logger.debug('cache pruned to %s items', Max_Cached_Modules)
def write(self, filename=None): if filename is None: filename, data = self._firstConfig() elif filename in self.configs: data = self.configs[filename] else: raise ValueError('No such file.') dirname = os.path.normpath(os.path.dirname(filename)) logging.debug('write settings to "%s" (will ensure directory "%s" exists)', filename, dirname) try: fsutils.mkDirP(dirname) ordered_json.dump(filename, data) except OSError as e: logging.error('Failed to save user settings to %s/%s, please check that the path exists and is writable.', dirname, filename)
def downloadToCache(stream, hashinfo={}, cache_key=None): ''' Download the specified stream to a temporary cache directory, and return (path to the downloaded, cache key). If cache_key is None, then a cache key will be generated and returned, but you will probably want to remove the cache file yourself (this is safe). ''' hash_name = None hash_value = None m = None if len(hashinfo): # check for hashes in preferred order. Currently this is just sha256 # (which the registry uses). Initial investigations suggest that github # doesn't return a header with the hash of the file being downloaded. for h in ('sha256', ): if h in hashinfo: hash_name = h hash_value = hashinfo[h] m = getattr(hashlib, h)() break if not hash_name: logger.warning('could not find supported hash type in %s', hashinfo) if cache_key is None: cache_key = '%032x' % random.getrandbits(256) cache_dir = folders.cacheDirectory() fsutils.mkDirP(cache_dir) download_fname = os.path.join(cache_dir, cache_key) with _openExclusively(download_fname) as f: f.seek(0) for chunk in stream.iter_content(1024): f.write(chunk) if hash_name: m.update(chunk) if hash_name: calculated_hash = m.hexdigest() logger.debug('calculated %s hash: %s check against: %s' % (hash_name, calculated_hash, hash_value)) if hash_value and (hash_value != calculated_hash): raise Exception('Hash verification failed.') logger.debug('wrote tarfile of size: %s to %s', f.tell(), download_fname) f.truncate() return (download_fname, cache_key)
def pruneCache(): ''' Prune the cache ''' cache_dir = folders.cacheDirectory() def fullpath(f): return os.path.join(cache_dir, f) # ensure cache exists fsutils.mkDirP(cache_dir) for f in sorted( [f for f in os.listdir(cache_dir) if os.path.isfile(fullpath(f))], key=lambda f: os.stat(fullpath(f)).st_mtime)[Max_Cached_Modules:]: cache_logger.debug('cleaning up cache file %s', f) fsutils.rmF(fullpath(f)) cache_logger.debug('cache pruned to %s items', Max_Cached_Modules)
def unpackFromCache(cache_key, to_directory): ''' If the specified cache key exists, unpack the tarball into the specified directory, otherwise raise KeyError. ''' if cache_key is None: raise KeyError('"None" is never in cache') cache_dir = folders.cacheDirectory() fsutils.mkDirP(cache_dir) path = os.path.join(cache_dir, cache_key) try: unpackFrom(path, to_directory) cache_logger.debug('unpacked %s from cache into %s', cache_key, to_directory) return except IOError as e: if e.errno == errno.ENOENT: cache_logger.debug('%s not in cache', cache_key) raise KeyError('not in cache')
def generateSubDirList(self, builddir, dirname, source_files, component, all_subdirs, immediate_dependencies, executable_name): logger.debug('generate CMakeLists.txt for directory: %s' % os.path.join(component.path, dirname)) link_dependencies = [x for x in immediate_dependencies] fname = os.path.join(builddir, dirname, 'CMakeLists.txt') # if the directory name is 'test' then, then generate multiple # independent executable targets: if dirname == 'test': tests = [] for f in source_files: object_name = component.getName() + '-' + os.path.basename(os.path.splitext(str(f))[0]).lower() tests.append([[str(f)], object_name, [f.lang]]) # link tests against the main executable link_dependencies.append(component.getName()) file_contents = str(Cheetah.Template.Template(Test_CMakeLists_Template, searchList=[{ 'source_directory':os.path.join(component.path, dirname), 'tests':tests, 'link_dependencies':link_dependencies }])) elif dirname == 'source' or executable_name: if executable_name: object_name = executable_name executable = True else: object_name = component.getName() executable = False # if we're building the main library, or an executable for this # component, then we should link against all the other directories # containing cmakelists: link_dependencies += [x for x in all_subdirs if x not in ('source', 'test', dirname)] file_contents = str(Cheetah.Template.Template(Subdir_CMakeLists_Template, searchList=[{ 'source_directory':os.path.join(component.path, dirname), 'executable':executable, 'file_names':[str(f) for f in source_files], 'object_name':object_name, 'link_dependencies':link_dependencies, 'languages':set(f.lang for f in source_files) }])) else: raise Exception('auto CMakeLists for non-source/test directories is not supported') fsutils.mkDirP(os.path.join(builddir, dirname)) self.writeIfDifferent(fname, file_contents);
def pruneCache(): ''' Prune the cache ''' cache_dir = folders.cacheDirectory() def fullpath(f): return os.path.join(cache_dir, f) # ensure cache exists fsutils.mkDirP(cache_dir) max_cached_modules = getMaxCachedModules() for f in sorted( [f for f in os.listdir(cache_dir) if os.path.isfile(fullpath(f)) and not f.endswith('.json') ], key = lambda f: os.stat(fullpath(f)).st_mtime, reverse = True )[max_cached_modules:]: cache_logger.debug('cleaning up cache file %s', f) removeFromCache(f) cache_logger.debug('cache pruned to %s items', max_cached_modules)
def pruneCache(): ''' Prune the cache ''' cache_dir = folders.cacheDirectory() def fullpath(f): return os.path.join(cache_dir, f) # ensure cache exists fsutils.mkDirP(cache_dir) max_cached_modules = getMaxCachedModules() for f in sorted([ f for f in os.listdir(cache_dir) if os.path.isfile(fullpath(f)) and not f.endswith('.json') ], key=lambda f: os.stat(fullpath(f)).st_mtime, reverse=True)[max_cached_modules:]: cache_logger.debug('cleaning up cache file %s', f) removeFromCache(f) cache_logger.debug('cache pruned to %s items', max_cached_modules)
def _writeFile(self, path, contents): dirname = os.path.dirname(path) fsutils.mkDirP(dirname) self.writeIfDifferent(path, contents)
def unpackTarballStream(stream, into_directory, hash=(None, None)): ''' Unpack a responses stream that contains a tarball into a directory ''' hash_name = hash[0] hash_value = hash[1] if hash_name: m = getattr(hashlib, hash_name)() into_parent_dir = os.path.dirname(into_directory) fsutils.mkDirP(into_parent_dir) temp_directory = tempfile.mkdtemp(dir=into_parent_dir) download_fname = os.path.join(temp_directory, 'download.tar.gz') # remove any partially downloaded file: TODO: checksumming & caching of # downloaded components in some central place fsutils.rmF(download_fname) # create the archive exclusively, we don't want someone else maliciously # overwriting our tar archive with something that unpacks to an absolute # path when we might be running sudo'd try: with _openExclusively(download_fname) as f: f.seek(0) for chunk in stream.iter_content(1024): f.write(chunk) if hash_name: m.update(chunk) if hash_name: calculated_hash = m.hexdigest() logger.debug( 'calculated %s hash: %s check against: %s' % ( hash_name, calculated_hash, hash_value ) ) if hash_value and (hash_value != calculated_hash): raise Exception('Hash verification failed.') logger.debug('wrote tarfile of size: %s to %s', f.tell(), download_fname) f.truncate() logger.debug( 'got file, extract into %s (for %s)', temp_directory, into_directory ) # head back to the start of the file and untar (without closing the # file) f.seek(0) f.flush() os.fsync(f) with tarfile.open(fileobj=f) as tf: extracted_dirname = '' # get the extraction directory name from the first part of the # extraction paths: it should be the same for all members of # the archive for m in tf.getmembers(): split_path = fsutils.fullySplitPath(m.name) if len(split_path) > 1: if extracted_dirname: if split_path[0] != extracted_dirname: raise ValueError('archive does not appear to contain a single module') else: extracted_dirname = split_path[0] tf.extractall(path=temp_directory) # move the directory we extracted stuff into to where we actually want it # to be fsutils.rmRf(into_directory) shutil.move(os.path.join(temp_directory, extracted_dirname), into_directory) finally: fsutils.rmF(download_fname) fsutils.rmRf(temp_directory) logger.debug('extraction complete %s', into_directory)
def downloadToCache(stream, hashinfo={}, cache_key=None, origin_info=dict()): ''' Download the specified stream to a temporary cache directory, and returns a cache key that can be used to access/remove the file. If cache_key is None, then a cache key will be generated and returned. You will probably want to use removeFromCache(cache_key) to remove it. ''' hash_name = None hash_value = None m = None if len(hashinfo): # check for hashes in preferred order. Currently this is just sha256 # (which the registry uses). Initial investigations suggest that github # doesn't return a header with the hash of the file being downloaded. for h in ('sha256', ): if h in hashinfo: hash_name = h hash_value = hashinfo[h] m = getattr(hashlib, h)() break if not hash_name: logger.warning('could not find supported hash type in %s', hashinfo) if cache_key is None: cache_key = '%032x' % random.getrandbits(256) cache_dir = folders.cacheDirectory() fsutils.mkDirP(cache_dir) cache_as = os.path.join(cache_dir, cache_key) file_size = 0 (download_file, download_fname) = tempfile.mkstemp(dir=cache_dir) with os.fdopen(download_file, 'wb') as f: f.seek(0) for chunk in stream.iter_content(4096): f.write(chunk) if hash_name: m.update(chunk) if hash_name: calculated_hash = m.hexdigest() logger.debug('calculated %s hash: %s check against: %s' % (hash_name, calculated_hash, hash_value)) if hash_value and (hash_value != calculated_hash): raise Exception('Hash verification failed.') file_size = f.tell() logger.debug('wrote tarfile of size: %s to %s', file_size, download_fname) f.truncate() try: os.rename(download_fname, cache_as) extended_origin_info = {'hash': hashinfo, 'size': file_size} extended_origin_info.update(origin_info) ordered_json.dump(cache_as + '.json', extended_origin_info) except OSError as e: if e.errno == errno.ENOENT: # if we failed, it's because the file already exists (probably # because another process got there first), so just rm our # temporary file and continue cache_logger.debug('another process downloaded %s first', cache_key) fsutils.rmF(download_fname) else: raise return cache_key
def generate(self, builddir, component, active_dependencies, immediate_dependencies, all_dependencies, toplevel): ''' active_dependencies is the dictionary of components that need to be built for this component, but will not already have been built for another component. ''' include_own_dir = string.Template( 'include_directories("$path")\n' ).substitute(path=component.path) include_root_dirs = '' include_sys_dirs = '' include_other_dirs = '' objc_flags_set = {} objc_flags = [] for name, c in all_dependencies.items(): include_root_dirs += string.Template( 'include_directories("$path")\n' ).substitute(path=c.path) dep_sys_include_dirs = c.getExtraSysIncludes() for d in dep_sys_include_dirs: include_sys_dirs += string.Template( 'include_directories(SYSTEM "$path")\n' ).substitute(path=os.path.join(c.path, d)) dep_extra_include_dirs = c.getExtraIncludes() for d in dep_extra_include_dirs: include_other_dirs += string.Template( 'include_directories("$path")\n' ).substitute(path=os.path.join(c.path, d)) for name, c in all_dependencies.items() + [(component.getName(), component)]: dep_extra_objc_flags = c.getExtraObjcFlags() # Try to warn Geraint when flags are clobbered. This will probably # miss some obscure flag forms, but it tries pretty hard for f in dep_extra_objc_flags: flag_name = None if len(f.split('=')) == 2: flag_name = f.split('=')[0] elif f.startswith('-fno-'): flag_name = f[5:] elif f.startswith('-fno'): flag_name = f[4:] elif f.startswith('-f'): flag_name = f[2:] if flag_name is not None: if flag_name in objc_flags_set and objc_flags_set[flag_name] != name: logger.warning( 'component %s Objective-C flag "%s" clobbers a value earlier set by component %s' % ( name, f, objc_flags_set[flag_name] )) objc_flags_set[flag_name] = name objc_flags.append(f) set_objc_flags = ' '.join(objc_flags) add_depend_subdirs = '' for name, c in active_dependencies.items(): add_depend_subdirs += string.Template( 'add_subdirectory("$working_dir/$component_name")\n' ).substitute( working_dir=builddir, component_name=name ) binary_subdirs = {os.path.normpath(x) : y for x,y in component.getBinaries().items()}; manual_subdirs = [] autogen_subdirs = [] for f in os.listdir(component.path): if f in Ignore_Subdirs or f.startswith('.') or f.startswith('_'): continue if os.path.isfile(os.path.join(component.path, f, 'CMakeLists.txt')): self.checkStandardSourceDir(f, component) # if the subdirectory has a CMakeLists.txt in it, then use that manual_subdirs.append(f) elif f in ('source', 'test') or os.path.normpath(f) in binary_subdirs: # otherwise, if the directory has source files, generate a # CMakeLists in the corresponding temporary directory, and add # that. # For now we only do this for the source and test directories - # in theory we could do others sources = self.containsSourceFiles(os.path.join(component.path, f)) if sources: autogen_subdirs.append((f, sources)) elif f.lower() in ('source', 'src', 'test'): self.checkStandardSourceDir(f, component) add_own_subdirs = '' for f in manual_subdirs: if os.path.isfile(os.path.join(component.path, f, 'CMakeLists.txt')): add_own_subdirs += string.Template( '''add_subdirectory( "$component_source_dir/$subdir_name" "$working_dir/$subdir_name" ) ''' ).substitute( component_source_dir = component.path, working_dir = builddir, subdir_name = f ) # names of all directories at this level with stuff in: used to figure # out what to link automatically all_subdirs = manual_subdirs + [x[0] for x in autogen_subdirs] for f, source_files in autogen_subdirs: if f in binary_subdirs: exe_name = binary_subdirs[f] else: exe_name = None self.generateSubDirList(builddir, f, source_files, component, all_subdirs, immediate_dependencies, exe_name); add_own_subdirs += string.Template( '''add_subdirectory( "$working_dir/$subdir_name" "$working_dir/$subdir_name" ) ''' ).substitute( working_dir = builddir, subdir_name = f ) def sanitizeTarget(t): return t.replace('-', '_').upper() target_definitions = '-DTARGET=' + sanitizeTarget(self.target.getName()) + ' ' set_targets_like = 'set(TARGET_LIKE_' + sanitizeTarget(self.target.getName()) + ' TRUE)\n' for target in self.target.dependencyResolutionOrder(): if '*' not in target: target_definitions += '-DTARGET_LIKE_' + sanitizeTarget(target) + ' ' set_targets_like += 'set(TARGET_LIKE_' + sanitizeTarget(target) + ' TRUE)\n' file_contents = str(Cheetah.Template.Template(CMakeLists_Template, searchList=[{ "toplevel": toplevel, "target_name": self.target.getName(), "set_targets_like": set_targets_like, "toolchain_file": self.target.getToolchainFile(), "component_name": component.getName(), "include_own_dir": include_own_dir, "include_root_dirs": include_root_dirs, "include_sys_dirs": include_sys_dirs, "include_other_dirs": include_other_dirs, "set_objc_flags": set_objc_flags, "add_depend_subdirs": add_depend_subdirs, "add_own_subdirs": add_own_subdirs, "yotta_target_definitions": target_definitions, "component_version": component.getVersion() }])) fsutils.mkDirP(builddir) fname = os.path.join(builddir, 'CMakeLists.txt') self.writeIfDifferent(fname, file_contents)
def downloadToCache(stream, hashinfo={}, cache_key=None, origin_info=dict()): ''' Download the specified stream to a temporary cache directory, and returns a cache key that can be used to access/remove the file. If cache_key is None, then a cache key will be generated and returned. You will probably want to use removeFromCache(cache_key) to remove it. ''' hash_name = None hash_value = None m = None if len(hashinfo): # check for hashes in preferred order. Currently this is just sha256 # (which the registry uses). Initial investigations suggest that github # doesn't return a header with the hash of the file being downloaded. for h in ('sha256',): if h in hashinfo: hash_name = h hash_value = hashinfo[h] m = getattr(hashlib, h)() break if not hash_name: logger.warning('could not find supported hash type in %s', hashinfo) if cache_key is None: cache_key = '%032x' % random.getrandbits(256) cache_dir = folders.cacheDirectory() fsutils.mkDirP(cache_dir) cache_as = os.path.join(cache_dir, cache_key) file_size = 0 (download_file, download_fname) = tempfile.mkstemp(dir=cache_dir) with os.fdopen(download_file, 'wb') as f: f.seek(0) for chunk in stream.iter_content(4096): f.write(chunk) if hash_name: m.update(chunk) if hash_name: calculated_hash = m.hexdigest() logger.debug( 'calculated %s hash: %s check against: %s' % ( hash_name, calculated_hash, hash_value ) ) if hash_value and (hash_value != calculated_hash): raise Exception('Hash verification failed.') file_size = f.tell() logger.debug('wrote tarfile of size: %s to %s', file_size, download_fname) f.truncate() try: os.rename(download_fname, cache_as) extended_origin_info = { 'hash': hashinfo, 'size': file_size } extended_origin_info.update(origin_info) ordered_json.dump(cache_as + '.json', extended_origin_info) except OSError as e: if e.errno == errno.ENOENT: # if we failed, it's because the file already exists (probably # because another process got there first), so just rm our # temporary file and continue cache_logger.debug('another process downloaded %s first', cache_key) fsutils.rmF(download_fname) else: raise return cache_key
def unpackTarballStream(stream, into_directory, hash=(None, None)): ''' Unpack a stream-like object that contains a tarball into a directory ''' hash_name = hash[0] hash_value = hash[1] if hash_name: m = getattr(hashlib, hash_name)() into_parent_dir = os.path.dirname(into_directory) fsutils.mkDirP(into_parent_dir) temp_directory = tempfile.mkdtemp(dir=into_parent_dir) download_fname = os.path.join(temp_directory, 'download.tar.gz') # remove any partially downloaded file: TODO: checksumming & caching of # downloaded components in some central place fsutils.rmF(download_fname) # create the archive exclusively, we don't want someone else maliciously # overwriting our tar archive with something that unpacks to an absolute # path when we might be running sudo'd try: fd = os.open( download_fname, os.O_CREAT | os.O_EXCL | os.O_RDWR | getattr(os, "O_BINARY", 0)) with os.fdopen(fd, 'rb+') as f: f.seek(0) for chunk in stream.iter_content(1024): f.write(chunk) if hash_name: m.update(chunk) if hash_name: calculated_hash = m.hexdigest() logging.debug('calculated hash: %s check against: %s' % (calculated_hash, hash_value)) if hash_value and (hash_value != calculated_hash): raise Exception('Hash verification failed.') f.truncate() logging.debug('got file, extract into %s (for %s)', temp_directory, into_directory) # head back to the start of the file and untar (without closing the # file) f.seek(0) f.flush() os.fsync(f) with tarfile.open(fileobj=f) as tf: to_extract = [] # modify members to change where they extract to! for m in tf.getmembers(): split_path = fsutils.fullySplitPath(m.name) if len(split_path) > 1: m.name = os.path.join(*(split_path[1:])) to_extract.append(m) tf.extractall(path=temp_directory, members=to_extract) # remove the temporary download file, maybe in the future we will cache # these somewhere fsutils.rmRf(os.path.join(into_directory, 'download.tar.gz')) # move the directory we extracted stuff into to where we actually want it # to be fsutils.rmRf(into_directory) shutil.move(temp_directory, into_directory) finally: fsutils.rmRf(temp_directory) logging.debug('extraction complete %s', into_directory)
def unpackTarballStream(stream, into_directory, hash=(None, None)): ''' Unpack a stream-like object that contains a tarball into a directory ''' hash_name = hash[0] hash_value = hash[1] if hash_name: m = getattr(hashlib, hash_name)() into_parent_dir = os.path.dirname(into_directory) fsutils.mkDirP(into_parent_dir) temp_directory = tempfile.mkdtemp(dir=into_parent_dir) download_fname = os.path.join(temp_directory, 'download.tar.gz') # remove any partially downloaded file: TODO: checksumming & caching of # downloaded components in some central place fsutils.rmF(download_fname) # create the archive exclusively, we don't want someone else maliciously # overwriting our tar archive with something that unpacks to an absolute # path when we might be running sudo'd try: fd = os.open(download_fname, os.O_CREAT | os.O_EXCL | os.O_RDWR | getattr(os, "O_BINARY", 0)) with os.fdopen(fd, 'rb+') as f: f.seek(0) for chunk in stream.iter_content(1024): f.write(chunk) if hash_name: m.update(chunk) if hash_name: calculated_hash = m.hexdigest() logging.debug( 'calculated hash: %s check against: %s' % (calculated_hash, hash_value)) if hash_value and (hash_value != calculated_hash): raise Exception('Hash verification failed.') f.truncate() logging.debug( 'got file, extract into %s (for %s)', temp_directory, into_directory) # head back to the start of the file and untar (without closing the # file) f.seek(0) f.flush() os.fsync(f) with tarfile.open(fileobj=f) as tf: to_extract = [] # modify members to change where they extract to! for m in tf.getmembers(): split_path = fsutils.fullySplitPath(m.name) if len(split_path) > 1: m.name = os.path.join(*(split_path[1:])) to_extract.append(m) tf.extractall(path=temp_directory, members=to_extract) # remove the temporary download file, maybe in the future we will cache # these somewhere fsutils.rmRf(os.path.join(into_directory, 'download.tar.gz')) # move the directory we extracted stuff into to where we actually want it # to be fsutils.rmRf(into_directory) shutil.move(temp_directory, into_directory) finally: fsutils.rmRf(temp_directory) logging.debug('extraction complete %s', into_directory)