def publish(self, registry=None): ''' Publish to the appropriate registry, return a description of any errors that occured, or None if successful. No VCS tagging is performed. ''' if (registry is None) or (registry == registry_access.Registry_Base_URL): if 'private' in self.description and self.description['private']: return "this %s is private and cannot be published" % (self.description_filename.split('.')[0]) upload_archive = os.path.join(self.path, 'upload.tar.gz') fsutils.rmF(upload_archive) fd = os.open(upload_archive, os.O_CREAT | os.O_EXCL | os.O_RDWR | getattr(os, "O_BINARY", 0)) with os.fdopen(fd, 'rb+') as tar_file: tar_file.truncate() self.generateTarball(tar_file) tar_file.seek(0) with self.findAndOpenReadme() as readme_file_wrapper: if not readme_file_wrapper: logger.warning("no readme.md file detected") with open(self.getDescriptionFile(), 'r') as description_file: return registry_access.publish( self.getRegistryNamespace(), self.getName(), self.getVersion(), description_file, tar_file, readme_file_wrapper.file, readme_file_wrapper.extension().lower(), registry=registry )
def publish(self): ''' Publish to the appropriate registry, return a description of any errors that occured, or None if successful. No VCS tagging is performed. ''' upload_archive = os.path.join(self.path, 'upload.tar.gz') fsutils.rmF(upload_archive) fd = os.open(upload_archive, os.O_CREAT | os.O_EXCL | os.O_RDWR) with os.fdopen(fd, 'rb+') as tar_file: tar_file.truncate() self.generateTarball(tar_file) tar_file.seek(0) with self.findAndOpenReadme() as readme_file_wrapper: if not readme_file_wrapper: logger.warning("no readme.md file detected") with open(self.getDescriptionFile(), 'r') as description_file: return registry_access.publish( self.getRegistryNamespace(), self.getName(), self.getVersion(), description_file, tar_file, readme_file_wrapper.file, readme_file_wrapper.extension().lower() )
def publish(self): ''' Publish to the appropriate registry, return a description of any errors that occured, or None if successful. No VCS tagging is performed. ''' if 'private' in self.description and self.description['private']: return "this %s is private and cannot be published" % ( self.description_filename.split('.')[0]) upload_archive = os.path.join(self.path, 'upload.tar.gz') fsutils.rmF(upload_archive) fd = os.open( upload_archive, os.O_CREAT | os.O_EXCL | os.O_RDWR | getattr(os, "O_BINARY", 0)) with os.fdopen(fd, 'rb+') as tar_file: tar_file.truncate() self.generateTarball(tar_file) tar_file.seek(0) with self.findAndOpenReadme() as readme_file_wrapper: if not readme_file_wrapper: logger.warning("no readme.md file detected") with open(self.getDescriptionFile(), 'r') as description_file: return registry_access.publish( self.getRegistryNamespace(), self.getName(), self.getVersion(), description_file, tar_file, readme_file_wrapper.file, readme_file_wrapper.extension().lower())
def unpackTarballStream(stream, into_directory, hash={}, cache_key=None): ''' Unpack a responses stream that contains a tarball into a directory. If a hash is provided, then it will be used as a cache key (for future requests you can try to retrieve the key value from the cache first, before making the request) ''' download_fname, cache_key = downloadToCache(stream, hash, cache_key) unpackFromCache(cache_key, into_directory) # if we didn't provide a cache key, there's no point in storing the cache if cache_key is None: fsutils.rmF(download_fname)
def pruneCache(): ''' Prune the cache ''' cache_dir = folders.cacheDirectory() def fullpath(f): return os.path.join(cache_dir, f) # ensure cache exists fsutils.mkDirP(cache_dir) for f in sorted( [f for f in os.listdir(cache_dir) if os.path.isfile(fullpath(f))], key = lambda f: os.stat(fullpath(f)).st_mtime )[Max_Cached_Modules:]: cache_logger.debug('cleaning up cache file %s', f) fsutils.rmF(fullpath(f)) cache_logger.debug('cache pruned to %s items', Max_Cached_Modules)
def unpackFrom(tar_file_path, to_directory): # first unpack into a sibling directory of the specified directory, and # then move it into place. # we expect our tarballs to contain a single top-level directory. We strip # off this name as we extract to minimise the path length into_parent_dir = os.path.dirname(to_directory) fsutils.mkDirP(into_parent_dir) temp_directory = tempfile.mkdtemp(dir=into_parent_dir) try: with tarfile.open(tar_file_path) as tf: strip_dirname = '' # get the extraction directory name from the first part of the # extraction paths: it should be the same for all members of # the archive for m in tf.getmembers(): split_path = fsutils.fullySplitPath(m.name) logger.debug('process member: %s %s', m.name, split_path) if os.path.isabs(m.name) or '..' in split_path: raise ValueError('archive uses invalid paths') if not strip_dirname: if len(split_path) != 1 or not len(split_path[0]): raise ValueError( 'archive does not appear to contain a single module' ) strip_dirname = split_path[0] continue else: if split_path[0] != strip_dirname: raise ValueError( 'archive does not appear to contain a single module' ) m.name = os.path.join(*split_path[1:]) tf.extract(m, path=temp_directory) # make sure the destination directory doesn't exist: fsutils.rmRf(to_directory) shutil.move(temp_directory, to_directory) temp_directory = None logger.debug('extraction complete %s', to_directory) except IOError as e: if e.errno != errno.ENOENT: logger.error('failed to extract tarfile %s', e) fsutils.rmF(tar_file_path) raise finally: if temp_directory is not None: # if anything has failed, cleanup fsutils.rmRf(temp_directory)
def pruneCache(): ''' Prune the cache ''' cache_dir = folders.cacheDirectory() def fullpath(f): return os.path.join(cache_dir, f) # ensure cache exists fsutils.mkDirP(cache_dir) for f in sorted( [f for f in os.listdir(cache_dir) if os.path.isfile(fullpath(f))], key=lambda f: os.stat(fullpath(f)).st_mtime)[Max_Cached_Modules:]: cache_logger.debug('cleaning up cache file %s', f) fsutils.rmF(fullpath(f)) cache_logger.debug('cache pruned to %s items', Max_Cached_Modules)
def unpackFrom(tar_file_path, to_directory): # first unpack into a sibling directory of the specified directory, and # then move it into place. # we expect our tarballs to contain a single top-level directory. We strip # off this name as we extract to minimise the path length into_parent_dir = os.path.dirname(to_directory) fsutils.mkDirP(into_parent_dir) temp_directory = tempfile.mkdtemp(dir=into_parent_dir) try: with tarfile.open(tar_file_path) as tf: strip_dirname = '' # get the extraction directory name from the first part of the # extraction paths: it should be the same for all members of # the archive for m in tf.getmembers(): split_path = fsutils.fullySplitPath(m.name) logger.debug('process member: %s %s', m.name, split_path) if os.path.isabs(m.name) or '..' in split_path: raise ValueError('archive uses invalid paths') if not strip_dirname: if len(split_path) != 1 or not len(split_path[0]): raise ValueError('archive does not appear to contain a single module') strip_dirname = split_path[0] continue else: if split_path[0] != strip_dirname: raise ValueError('archive does not appear to contain a single module') m.name = os.path.join(*split_path[1:]) tf.extract(m, path=temp_directory) # make sure the destination directory doesn't exist: fsutils.rmRf(to_directory) shutil.move(temp_directory, to_directory) temp_directory = None logger.debug('extraction complete %s', to_directory) except IOError as e: if e.errno != errno.ENOENT: logger.error('failed to extract tarfile %s', e) fsutils.rmF(tar_file_path) raise finally: if temp_directory is not None: # if anything has failed, cleanup fsutils.rmRf(temp_directory)
def publish(self, registry=None): ''' Publish to the appropriate registry, return a description of any errors that occured, or None if successful. No VCS tagging is performed. ''' if (registry is None) or (registry == registry_access.Registry_Base_URL): if 'private' in self.description and self.description['private']: return "this %s is private and cannot be published" % ( self.description_filename.split('.')[0]) upload_archive = os.path.join(self.path, 'upload.tar.gz') fsutils.rmF(upload_archive) fd = os.open( upload_archive, os.O_CREAT | os.O_EXCL | os.O_RDWR | getattr(os, "O_BINARY", 0)) with os.fdopen(fd, 'rb+') as tar_file: tar_file.truncate() self.generateTarball(tar_file) logger.debug('generated tar file of length %s', tar_file.tell()) tar_file.seek(0) # calculate the hash of the file before we upload it: shasum = hashlib.sha256() while True: chunk = tar_file.read(1000) if not chunk: break shasum.update(chunk) logger.debug('generated tar file has hash %s', shasum.hexdigest()) tar_file.seek(0) with self.findAndOpenReadme() as readme_file_wrapper: if not readme_file_wrapper: logger.warning("no readme.md file detected") with open(self.getDescriptionFile(), 'r') as description_file: return registry_access.publish( self.getRegistryNamespace(), self.getName(), self.getVersion(), description_file, tar_file, readme_file_wrapper.file, readme_file_wrapper.extension().lower(), registry=registry)
def publish(self, registry=None): ''' Publish to the appropriate registry, return a description of any errors that occured, or None if successful. No VCS tagging is performed. ''' if (registry is None) or (registry == registry_access.Registry_Base_URL): if 'private' in self.description and self.description['private']: return "this %s is private and cannot be published" % (self.description_filename.split('.')[0]) upload_archive = os.path.join(self.path, 'upload.tar.gz') fsutils.rmF(upload_archive) fd = os.open(upload_archive, os.O_CREAT | os.O_EXCL | os.O_RDWR | getattr(os, "O_BINARY", 0)) with os.fdopen(fd, 'rb+') as tar_file: tar_file.truncate() self.generateTarball(tar_file) logger.debug('generated tar file of length %s', tar_file.tell()) tar_file.seek(0) # calculate the hash of the file before we upload it: shasum = hashlib.sha256() while True: chunk = tar_file.read(1000) if not chunk: break shasum.update(chunk) logger.debug('generated tar file has hash %s', shasum.hexdigest()) tar_file.seek(0) with self.findAndOpenReadme() as readme_file_wrapper: if not readme_file_wrapper: logger.warning("no readme.md file detected") with open(self.getDescriptionFile(), 'r') as description_file: return registry_access.publish( self.getRegistryNamespace(), self.getName(), self.getVersion(), description_file, tar_file, readme_file_wrapper.file, readme_file_wrapper.extension().lower(), registry=registry )
def downloadToCache(stream, hashinfo={}, cache_key=None, origin_info=dict()): ''' Download the specified stream to a temporary cache directory, and returns a cache key that can be used to access/remove the file. If cache_key is None, then a cache key will be generated and returned. You will probably want to use removeFromCache(cache_key) to remove it. ''' hash_name = None hash_value = None m = None if len(hashinfo): # check for hashes in preferred order. Currently this is just sha256 # (which the registry uses). Initial investigations suggest that github # doesn't return a header with the hash of the file being downloaded. for h in ('sha256',): if h in hashinfo: hash_name = h hash_value = hashinfo[h] m = getattr(hashlib, h)() break if not hash_name: logger.warning('could not find supported hash type in %s', hashinfo) if cache_key is None: cache_key = '%032x' % random.getrandbits(256) cache_dir = folders.cacheDirectory() fsutils.mkDirP(cache_dir) cache_as = os.path.join(cache_dir, cache_key) file_size = 0 (download_file, download_fname) = tempfile.mkstemp(dir=cache_dir) with os.fdopen(download_file, 'wb') as f: f.seek(0) for chunk in stream.iter_content(4096): f.write(chunk) if hash_name: m.update(chunk) if hash_name: calculated_hash = m.hexdigest() logger.debug( 'calculated %s hash: %s check against: %s' % ( hash_name, calculated_hash, hash_value ) ) if hash_value and (hash_value != calculated_hash): raise Exception('Hash verification failed.') file_size = f.tell() logger.debug('wrote tarfile of size: %s to %s', file_size, download_fname) f.truncate() try: os.rename(download_fname, cache_as) extended_origin_info = { 'hash': hashinfo, 'size': file_size } extended_origin_info.update(origin_info) ordered_json.dump(cache_as + '.json', extended_origin_info) except OSError as e: if e.errno == errno.ENOENT: # if we failed, it's because the file already exists (probably # because another process got there first), so just rm our # temporary file and continue cache_logger.debug('another process downloaded %s first', cache_key) fsutils.rmF(download_fname) else: raise return cache_key
def downloadToCache(stream, hashinfo={}, cache_key=None, origin_info=dict()): ''' Download the specified stream to a temporary cache directory, and returns a cache key that can be used to access/remove the file. If cache_key is None, then a cache key will be generated and returned. You will probably want to use removeFromCache(cache_key) to remove it. ''' hash_name = None hash_value = None m = None if len(hashinfo): # check for hashes in preferred order. Currently this is just sha256 # (which the registry uses). Initial investigations suggest that github # doesn't return a header with the hash of the file being downloaded. for h in ('sha256', ): if h in hashinfo: hash_name = h hash_value = hashinfo[h] m = getattr(hashlib, h)() break if not hash_name: logger.warning('could not find supported hash type in %s', hashinfo) if cache_key is None: cache_key = '%032x' % random.getrandbits(256) cache_dir = folders.cacheDirectory() fsutils.mkDirP(cache_dir) cache_as = os.path.join(cache_dir, cache_key) file_size = 0 (download_file, download_fname) = tempfile.mkstemp(dir=cache_dir) with os.fdopen(download_file, 'wb') as f: f.seek(0) for chunk in stream.iter_content(4096): f.write(chunk) if hash_name: m.update(chunk) if hash_name: calculated_hash = m.hexdigest() logger.debug('calculated %s hash: %s check against: %s' % (hash_name, calculated_hash, hash_value)) if hash_value and (hash_value != calculated_hash): raise Exception('Hash verification failed.') file_size = f.tell() logger.debug('wrote tarfile of size: %s to %s', file_size, download_fname) f.truncate() try: os.rename(download_fname, cache_as) extended_origin_info = {'hash': hashinfo, 'size': file_size} extended_origin_info.update(origin_info) ordered_json.dump(cache_as + '.json', extended_origin_info) except OSError as e: if e.errno == errno.ENOENT: # if we failed, it's because the file already exists (probably # because another process got there first), so just rm our # temporary file and continue cache_logger.debug('another process downloaded %s first', cache_key) fsutils.rmF(download_fname) else: raise return cache_key
def removeFromCache(cache_key): f = os.path.join(folders.cacheDirectory(), cache_key) fsutils.rmF(f) # remove any metadata too, if it exists fsutils.rmF(f + '.json')
def unpackTarballStream(stream, into_directory, hash=(None, None)): ''' Unpack a stream-like object that contains a tarball into a directory ''' hash_name = hash[0] hash_value = hash[1] if hash_name: m = getattr(hashlib, hash_name)() into_parent_dir = os.path.dirname(into_directory) fsutils.mkDirP(into_parent_dir) temp_directory = tempfile.mkdtemp(dir=into_parent_dir) download_fname = os.path.join(temp_directory, 'download.tar.gz') # remove any partially downloaded file: TODO: checksumming & caching of # downloaded components in some central place fsutils.rmF(download_fname) # create the archive exclusively, we don't want someone else maliciously # overwriting our tar archive with something that unpacks to an absolute # path when we might be running sudo'd try: fd = os.open( download_fname, os.O_CREAT | os.O_EXCL | os.O_RDWR | getattr(os, "O_BINARY", 0)) with os.fdopen(fd, 'rb+') as f: f.seek(0) for chunk in stream.iter_content(1024): f.write(chunk) if hash_name: m.update(chunk) if hash_name: calculated_hash = m.hexdigest() logging.debug('calculated hash: %s check against: %s' % (calculated_hash, hash_value)) if hash_value and (hash_value != calculated_hash): raise Exception('Hash verification failed.') f.truncate() logging.debug('got file, extract into %s (for %s)', temp_directory, into_directory) # head back to the start of the file and untar (without closing the # file) f.seek(0) f.flush() os.fsync(f) with tarfile.open(fileobj=f) as tf: to_extract = [] # modify members to change where they extract to! for m in tf.getmembers(): split_path = fsutils.fullySplitPath(m.name) if len(split_path) > 1: m.name = os.path.join(*(split_path[1:])) to_extract.append(m) tf.extractall(path=temp_directory, members=to_extract) # remove the temporary download file, maybe in the future we will cache # these somewhere fsutils.rmRf(os.path.join(into_directory, 'download.tar.gz')) # move the directory we extracted stuff into to where we actually want it # to be fsutils.rmRf(into_directory) shutil.move(temp_directory, into_directory) finally: fsutils.rmRf(temp_directory) logging.debug('extraction complete %s', into_directory)
def unpackTarballStream(stream, into_directory, hash=(None, None)): ''' Unpack a stream-like object that contains a tarball into a directory ''' hash_name = hash[0] hash_value = hash[1] if hash_name: m = getattr(hashlib, hash_name)() into_parent_dir = os.path.dirname(into_directory) fsutils.mkDirP(into_parent_dir) temp_directory = tempfile.mkdtemp(dir=into_parent_dir) download_fname = os.path.join(temp_directory, 'download.tar.gz') # remove any partially downloaded file: TODO: checksumming & caching of # downloaded components in some central place fsutils.rmF(download_fname) # create the archive exclusively, we don't want someone else maliciously # overwriting our tar archive with something that unpacks to an absolute # path when we might be running sudo'd try: fd = os.open(download_fname, os.O_CREAT | os.O_EXCL | os.O_RDWR | getattr(os, "O_BINARY", 0)) with os.fdopen(fd, 'rb+') as f: f.seek(0) for chunk in stream.iter_content(1024): f.write(chunk) if hash_name: m.update(chunk) if hash_name: calculated_hash = m.hexdigest() logging.debug( 'calculated hash: %s check against: %s' % (calculated_hash, hash_value)) if hash_value and (hash_value != calculated_hash): raise Exception('Hash verification failed.') f.truncate() logging.debug( 'got file, extract into %s (for %s)', temp_directory, into_directory) # head back to the start of the file and untar (without closing the # file) f.seek(0) f.flush() os.fsync(f) with tarfile.open(fileobj=f) as tf: to_extract = [] # modify members to change where they extract to! for m in tf.getmembers(): split_path = fsutils.fullySplitPath(m.name) if len(split_path) > 1: m.name = os.path.join(*(split_path[1:])) to_extract.append(m) tf.extractall(path=temp_directory, members=to_extract) # remove the temporary download file, maybe in the future we will cache # these somewhere fsutils.rmRf(os.path.join(into_directory, 'download.tar.gz')) # move the directory we extracted stuff into to where we actually want it # to be fsutils.rmRf(into_directory) shutil.move(temp_directory, into_directory) finally: fsutils.rmRf(temp_directory) logging.debug('extraction complete %s', into_directory)
def unpackTarballStream(stream, into_directory, hash=(None, None)): ''' Unpack a responses stream that contains a tarball into a directory ''' hash_name = hash[0] hash_value = hash[1] if hash_name: m = getattr(hashlib, hash_name)() into_parent_dir = os.path.dirname(into_directory) fsutils.mkDirP(into_parent_dir) temp_directory = tempfile.mkdtemp(dir=into_parent_dir) download_fname = os.path.join(temp_directory, 'download.tar.gz') # remove any partially downloaded file: TODO: checksumming & caching of # downloaded components in some central place fsutils.rmF(download_fname) # create the archive exclusively, we don't want someone else maliciously # overwriting our tar archive with something that unpacks to an absolute # path when we might be running sudo'd try: with _openExclusively(download_fname) as f: f.seek(0) for chunk in stream.iter_content(1024): f.write(chunk) if hash_name: m.update(chunk) if hash_name: calculated_hash = m.hexdigest() logger.debug( 'calculated %s hash: %s check against: %s' % ( hash_name, calculated_hash, hash_value ) ) if hash_value and (hash_value != calculated_hash): raise Exception('Hash verification failed.') logger.debug('wrote tarfile of size: %s to %s', f.tell(), download_fname) f.truncate() logger.debug( 'got file, extract into %s (for %s)', temp_directory, into_directory ) # head back to the start of the file and untar (without closing the # file) f.seek(0) f.flush() os.fsync(f) with tarfile.open(fileobj=f) as tf: extracted_dirname = '' # get the extraction directory name from the first part of the # extraction paths: it should be the same for all members of # the archive for m in tf.getmembers(): split_path = fsutils.fullySplitPath(m.name) if len(split_path) > 1: if extracted_dirname: if split_path[0] != extracted_dirname: raise ValueError('archive does not appear to contain a single module') else: extracted_dirname = split_path[0] tf.extractall(path=temp_directory) # move the directory we extracted stuff into to where we actually want it # to be fsutils.rmRf(into_directory) shutil.move(os.path.join(temp_directory, extracted_dirname), into_directory) finally: fsutils.rmF(download_fname) fsutils.rmRf(temp_directory) logger.debug('extraction complete %s', into_directory)