def zip(path): ''' Take a path to a file or directory and return the path to a zip archive containing its contents. ''' absolute_path = path_util.normalize(path) path_util.check_isvalid(absolute_path, 'zip_directory') # Recursively copy the directory into a temp directory. temp_path = tempfile.mkdtemp() temp_subpath = os.path.join(temp_path, ZIP_SUBPATH) path_util.copy(absolute_path, temp_subpath) # Multiplex between zipping a directory and zipping a file here, because # make_archive does NOT handle the file case cleanly. if os.path.isdir(temp_subpath): zip_path = shutil.make_archive( base_name=temp_path, base_dir=ZIP_SUBPATH, root_dir=temp_path, format='zip', ) else: zip_path = temp_path + '.zip' with ZipFile(zip_path, 'w') as zip_file: zip_file.write(temp_subpath, ZIP_SUBPATH) # Clean up the temporary directory and return the zip file's path. path_util.remove(temp_path) return zip_path
def start_bundle(self, bundle, bundle_store, parent_dict, username): """ Start a bundle in the background. """ if self.bundle != None: return None temp_dir = canonicalize.get_current_location(bundle_store, bundle.uuid) path_util.make_directory(temp_dir) # We don't follow symlinks (for consistency with remote # machine, where it is more secure, so people can't make us # copy random files on the system). Of course in local mode, # if some of those symlinks are absolute, the run can # read/write those locations. But we're not sandboxed, so # anything could happen. The dependencies are copied, so in # practice, this is not a bit worry. pairs = bundle.get_dependency_paths(bundle_store, parent_dict, temp_dir) print >> sys.stderr, "LocalMachine.start_bundle: copying dependencies of %s to %s" % (bundle.uuid, temp_dir) for (source, target) in pairs: path_util.copy(source, target, follow_symlinks=False) script_file = temp_dir + ".sh" with open(script_file, "w") as f: f.write("cd %s &&\n" % temp_dir) f.write("(%s) > stdout 2>stderr\n" % bundle.command) # Use stdbuf (if it exists) to turn off buffering so we get real-time feedback. if os.path.exists("/usr/bin/stdbuf"): process = subprocess.Popen("/usr/bin/stdbuf -o0 bash " + script_file, shell=True) else: process = subprocess.Popen("bash " + script_file, shell=True) self.bundle = bundle self.temp_dir = temp_dir self.process = process return {"bundle": bundle, "temp_dir": temp_dir, "job_handle": str(process.pid)}
def zip(path, follow_symlinks, exclude_names=[], file_name=None): ''' Take a path to a file or directory and return the path to a zip archive containing its contents. ''' if isinstance(path, list): for p in path: absolute_path = path_util.normalize(p) path_util.check_isvalid(absolute_path, 'zip_directory') else: absolute_path = path_util.normalize(path) path_util.check_isvalid(absolute_path, 'zip_directory') # Add proper name if file_name: sub_path = file_name else: sub_path = ZIP_SUBPATH # Recursively copy the directory into a temp directory. temp_path = tempfile.mkdtemp() temp_subpath = os.path.join(temp_path, sub_path) # TODO: this is inefficient; do the zipping from the original source # directly. if isinstance(path, list): os.mkdir(temp_subpath) for p in path: absolute_path = path_util.normalize(p) path_util.copy(absolute_path, os.path.join(temp_subpath, os.path.basename(p)), follow_symlinks=follow_symlinks, exclude_names=exclude_names) else: absolute_path = path_util.normalize(path) path_util.copy(absolute_path, temp_subpath, follow_symlinks=follow_symlinks, exclude_names=exclude_names) # TODO: These methods of zipping don't preserve permissions, so using a # system call for now (only works in Linux) # Multiplex between zipping a directory and zipping a file here, because # make_archive does NOT handle the file case cleanly. #if os.path.isdir(temp_subpath): # zip_path = shutil.make_archive( # base_name=temp_path, # base_dir=ZIP_SUBPATH, # root_dir=temp_path, # format='zip', # ) #else: # zip_path = temp_path + '.zip' # with ZipFile(zip_path, 'w') as zip_file: # zip_file.write(temp_subpath, ZIP_SUBPATH) # Clean up the temporary directory and return the zip file's path. zip_path = temp_path + '.zip' opts = '-qr' if not follow_symlinks: opts += ' --symlinks' if os.system("cd %s && zip %s %s %s" % (temp_path, opts, zip_path, sub_path)) != 0: raise UsageError('zip failed') path_util.remove(temp_path) return zip_path, sub_path
def rm_partition(self, partition): """ Deletes the given disk from the bundle store, and if it is not the last partition, it redistributes the bundles from that partition across the remaining partitions. """ # Transfer all of the files to their correct locations. if self.__get_num_partitions() == 1: """ Prevent foot-shooting """ print >> sys.stderr, "Error, cannot remove last partition. If you really wish to delete CodaLab, please run the following command:" print >> sys.stderr, " rm -rf %s" % self.codalab_home return relocations = dict() partition_abs_path = os.path.join(self.partitions, partition) old_mdata = os.path.join(partition_abs_path, MultiDiskBundleStore.DATA_SUBDIRECTORY) old_mtemp = os.path.join(partition_abs_path, MultiDiskBundleStore.TEMP_SUBDIRECTORY) try: print partition_abs_path path_util.check_isvalid(partition_abs_path, 'rm-partition') except: print >> sys.stderr, "Partition with name '%s' does not exist. Run `cl ls-partitions` to see a list of mounted partitions." % partition sys.exit(1) # Reset the ring to distribute across remaining partitions self.ring.remove_node(partition) bundles_to_move = reduce(lambda dirs, files: dirs + files, path_util.ls(old_mdata)) for bundle in bundles_to_move: new_partition = self.ring.get_node(bundle) relocations[bundle] = os.path.join(self.partitions, new_partition) # Copy all bundles off of the old partition to temp directories on the new partition for bundle, partition in relocations.iteritems(): # temporary directory on the partition temp_dir = os.path.join(partition, MultiDiskBundleStore.TEMP_SUBDIRECTORY) from_path = os.path.join(old_mdata, bundle) to_path = os.path.join(temp_dir, 'stage-%s' % bundle) path_util.copy(from_path, to_path) # Now that each bundle is on the proper partition, move each from the staging area to the # production mdata/ subdirectory on its partition. for bundle, partition in relocations.iteritems(): temp_dir = os.path.join(partition, MultiDiskBundleStore.TEMP_SUBDIRECTORY) from_path = os.path.join(temp_dir, 'stage-%s' % bundle) to_path = os.path.join(partition, MultiDiskBundleStore.DATA_SUBDIRECTORY, bundle) path_util.rename(from_path, to_path) # Remove data from partition and unlink from CodaLab print >> sys.stderr, "Cleaning bundles off of partition..." path_util.remove(old_mdata) path_util.remove(old_mtemp) print >> sys.stderr, "Unlinking partition %s from CodaLab deployment..." % partition path_util.remove(partition_abs_path) print >> sys.stderr, "Partition removed successfully from bundle store pool"
def _make_bundle(self, bundle): try: bundle_location = self._bundle_store.get_bundle_location( bundle.uuid) path = os.path.normpath(bundle_location) deps = [] for dep in bundle.dependencies: parent_bundle_path = os.path.normpath( self._bundle_store.get_bundle_location(dep.parent_uuid)) dependency_path = os.path.normpath( os.path.join(parent_bundle_path, dep.parent_path)) if not dependency_path.startswith(parent_bundle_path) or ( not os.path.islink(dependency_path) and not os.path.exists(dependency_path)): raise Exception('Invalid dependency %s' % (path_util.safe_join( dep.parent_uuid, dep.parent_path))) child_path = os.path.normpath( os.path.join(path, dep.child_path)) if not child_path.startswith(path): raise Exception('Invalid key for dependency: %s' % (dep.child_path)) deps.append((dependency_path, child_path)) remove_path(path) if len(deps) == 1 and deps[0][1] == path: path_util.copy(deps[0][0], path, follow_symlinks=False) else: os.mkdir(path) for dependency_path, child_path in deps: path_util.copy(dependency_path, child_path, follow_symlinks=False) self._model.update_disk_metadata(bundle, bundle_location, enforce_disk_quota=True) logger.info('Finished making bundle %s', bundle.uuid) self._model.update_bundle(bundle, {'state': State.READY}) except Exception as e: logger.info('Failing bundle %s: %s', bundle.uuid, str(e)) self._model.update_bundle(bundle, { 'state': State.FAILED, 'metadata': { 'failure_message': str(e) } }) finally: with self._make_uuids_lock: self._make_uuids.remove(bundle.uuid)
def add_partition(self, target, new_partition_name): """ MultiDiskBundleStore specific method. Add a new partition to the bundle store. The "target" is actually a symlink to the target directory, which the user has configured as the mountpoint for some desired partition. First, all bundles that are to be relocated onto the new partition are copied to a temp location to be resilient against failures. After the copy is performed, the bundles are subsequently moved to the new partition, and finally the original copy of the bundles are deleted from their old locations """ target = os.path.abspath(target) new_partition_location = os.path.join(self.partitions, new_partition_name) mtemp = os.path.join(target, MultiDiskBundleStore.TEMP_SUBDIRECTORY) try: path_util.make_directory(mtemp) except: print >> sys.stderr, "Could not make directory %s on partition %s, aborting" % (mtemp, target) sys.exit(1) self.ring.add_node(new_partition_name) # Add the node to the partition locations delete_on_success = [] # Paths to bundles that will be deleted after the copy finishes successfully print >> sys.stderr, "Marking bundles for placement on new partition %s (might take a while)" % new_partition_name # For each bundle in the bundle store, check to see if any hash to the new partition. If so move them over partitions, _ = path_util.ls(self.partitions) for partition in partitions: partition_abs_path = os.path.join(self.partitions, partition, MultiDiskBundleStore.DATA_SUBDIRECTORY) bundles = reduce(lambda dirs, files: dirs + files, path_util.ls(partition_abs_path)) for bundle in bundles: correct_partition = self.ring.get_node(bundle) if correct_partition != partition: # Reposition the node to the correct partition from_path = os.path.join(self.partitions, partition, MultiDiskBundleStore.DATA_SUBDIRECTORY, bundle) to_path = os.path.join(mtemp, bundle) print >> sys.stderr, "copying %s to %s" % (from_path, to_path) path_util.copy(from_path, to_path) delete_on_success += [from_path] print >> sys.stderr, "Adding new partition as %s..." % new_partition_location path_util.soft_link(target, new_partition_location) # Atomically move the temp location to the new partition's mdata new_mdata = os.path.join(new_partition_location, MultiDiskBundleStore.DATA_SUBDIRECTORY) new_mtemp = os.path.join(new_partition_location, MultiDiskBundleStore.TEMP_SUBDIRECTORY) path_util.rename(new_mtemp, new_mdata) path_util.make_directory(new_mtemp) # Go through and purge all of the originals at this time print >> sys.stderr, "Cleaning up drives..." for to_delete in delete_on_success: path_util.remove(to_delete) print >> sys.stderr, "Successfully added partition '%s' to the pool." % new_partition_name
def install_dependencies(self, bundle_store, parent_dict, dest_path, copy): ''' Symlink or copy this bundle's dependencies into the directory at dest_path. The caller is responsible for cleaning up this directory. ''' precondition(os.path.isabs(dest_path), '%s is a relative path!' % (dest_path,)) pairs = self.get_dependency_paths(bundle_store, parent_dict, dest_path, relative_symlinks=not copy) for (target, link_path) in pairs: # If the dependency already exists, remove it (this happens when we are reinstalling) if os.path.exists(link_path): path_util.remove(link_path) # Either copy (but not follow further symlinks) or symlink. if copy: path_util.copy(target, link_path, follow_symlinks=False) else: os.symlink(target, link_path)
def _make_bundle(self, bundle): try: path = os.path.normpath(self._bundle_store.get_bundle_location(bundle.uuid)) deps = [] for dep in bundle.dependencies: parent_bundle_path = os.path.normpath( self._bundle_store.get_bundle_location(dep.parent_uuid) ) dependency_path = os.path.normpath( os.path.join(parent_bundle_path, dep.parent_path) ) if not dependency_path.startswith(parent_bundle_path) or ( not os.path.islink(dependency_path) and not os.path.exists(dependency_path) ): raise Exception( 'Invalid dependency %s' % (path_util.safe_join(dep.parent_uuid, dep.parent_path)) ) child_path = os.path.normpath(os.path.join(path, dep.child_path)) if not child_path.startswith(path): raise Exception('Invalid key for dependency: %s' % (dep.child_path)) deps.append((dependency_path, child_path)) remove_path(path) if len(deps) == 1 and deps[0][1] == path: path_util.copy(deps[0][0], path, follow_symlinks=False) else: os.mkdir(path) for dependency_path, child_path in deps: path_util.copy(dependency_path, child_path, follow_symlinks=False) self._upload_manager.update_metadata_and_save(bundle, enforce_disk_quota=True) logger.info('Finished making bundle %s', bundle.uuid) self._model.update_bundle(bundle, {'state': State.READY}) except Exception as e: logger.info('Failing bundle %s: %s', bundle.uuid, str(e)) self._model.update_bundle( bundle, {'state': State.FAILED, 'metadata': {'failure_message': str(e)}} ) finally: with self._make_uuids_lock: self._make_uuids.remove(bundle.uuid)
def zip(path, follow_symlinks, exclude_patterns, file_name): ''' Take a path to a file or directory |path| and return the path to a zip archive containing its contents. |file_name| is what the zip archive contains. ''' if isinstance(path, list): for p in path: absolute_path = path_util.normalize(p) path_util.check_isvalid(absolute_path, 'zip_directory') else: absolute_path = path_util.normalize(path) path_util.check_isvalid(absolute_path, 'zip_directory') # Recursively copy the directory into a temp directory. temp_path = tempfile.mkdtemp() temp_subpath = os.path.join(temp_path, file_name) print_util.open_line('Copying %s to %s' % (path, temp_subpath)) if isinstance(path, list): os.mkdir(temp_subpath) for p in path: absolute_path = path_util.normalize(p) path_util.copy(absolute_path, os.path.join(temp_subpath, os.path.basename(p)), follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns) else: absolute_path = path_util.normalize(path) path_util.copy(absolute_path, temp_subpath, follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns) print_util.clear_line() zip_path = temp_path + '.zip' opts = '-qr' if not follow_symlinks: opts += ' --symlinks' print_util.open_line('Zipping to %s' % zip_path) if os.system("cd %s && zip %s %s %s" % (temp_path, opts, zip_path, file_name)) != 0: raise UsageError('zip failed') path_util.remove(temp_path) return zip_path
def start_bundle(self, bundle, bundle_store, parent_dict, username): ''' Start a bundle in the background. ''' if self.bundle != None: return None temp_dir = canonicalize.get_current_location(bundle_store, bundle.uuid) path_util.make_directory(temp_dir) # We don't follow symlinks (for consistency with remote # machine, where it is more secure, so people can't make us # copy random files on the system). Of course in local mode, # if some of those symlinks are absolute, the run can # read/write those locations. But we're not sandboxed, so # anything could happen. The dependencies are copied, so in # practice, this is not a bit worry. pairs = bundle.get_dependency_paths(bundle_store, parent_dict, temp_dir) print >> sys.stderr, 'LocalMachine.start_bundle: copying dependencies of %s to %s' % ( bundle.uuid, temp_dir) for (source, target) in pairs: path_util.copy(source, target, follow_symlinks=False) script_file = temp_dir + '.sh' with open(script_file, 'w') as f: f.write("cd %s &&\n" % temp_dir) f.write('(%s) > stdout 2>stderr\n' % bundle.command) # Use stdbuf (if it exists) to turn off buffering so we get real-time feedback. if os.path.exists('/usr/bin/stdbuf'): process = subprocess.Popen("/usr/bin/stdbuf -o0 bash " + script_file, shell=True) else: process = subprocess.Popen("bash " + script_file, shell=True) self.bundle = bundle self.temp_dir = temp_dir self.process = process return { 'bundle': bundle, 'temp_dir': temp_dir, 'job_handle': str(process.pid) }
def install_dependencies(self, bundle_store, parent_dict, dest_path, copy): ''' Symlink or copy this bundle's dependencies into the directory at dest_path. The caller is responsible for cleaning up this directory. ''' precondition(os.path.isabs(dest_path), '%s is a relative path!' % (dest_path, )) pairs = self.get_dependency_paths(bundle_store, parent_dict, dest_path, relative_symlinks=not copy) for (target, link_path) in pairs: # If the dependency already exists, remove it (this happens when we are reinstalling) if os.path.exists(link_path): path_util.remove(link_path) # Either copy (but not follow further symlinks) or symlink. if copy: path_util.copy(target, link_path, follow_symlinks=False) else: os.symlink(target, link_path)
def upload(self, path, allow_symlinks=False): ''' Copy the contents of the directory at path into the data subdirectory, in a subfolder named by a hash of the contents of the new data directory. Return a (data_hash, metadata) pair, where the metadata is a dict mapping keys to precomputed statistics about the new data directory. ''' absolute_path = path_util.normalize(path) path_util.check_isvalid(absolute_path, 'upload') # Recursively copy the directory into a new BundleStore temp directory. temp_directory = uuid.uuid4().hex temp_path = os.path.join(self.temp, temp_directory) path_util.copy(absolute_path, temp_path) # Multiplex between uploading a directory and uploading a file here. # All other path_util calls will use these lists of directories and files. if os.path.isdir(temp_path): dirs_and_files = path_util.recursive_ls(temp_path) else: dirs_and_files = ([], [temp_path]) if not allow_symlinks: path_util.check_for_symlinks(temp_path, dirs_and_files) path_util.set_permissions(temp_path, 0o755, dirs_and_files) # Hash the contents of the temporary directory, and then if there is no # data with this hash value, move this directory into the data directory. data_hash = '0x%s' % (path_util.hash_directory(temp_path, dirs_and_files),) data_size = path_util.get_size(temp_path, dirs_and_files) final_path = os.path.join(self.data, data_hash) final_path_exists = False try: os.utime(final_path, None) final_path_exists = True except OSError, e: if e.errno == errno.ENOENT: os.rename(temp_path, final_path) else: raise
def upload(self, sources, follow_symlinks, exclude_patterns, git, unpack, remove_sources): ''' |sources|: specifies the locations of the contents to upload. Each element is either a URL or a local path. |follow_symlinks|: for local path(s), whether to follow (resolve) symlinks |exclude_patterns|: for local path(s), don't upload these patterns (e.g., *.o) |git|: for URL, whether |source| is a git repo to clone. |unpack|: for each source in |sources|, whether to unpack it if it's an archive. |remove_sources|: remove |sources|. If |sources| contains one source, then the bundle contents will be that source. Otherwise, the bundle contents will be a directory with each of the sources. Exceptions: - If |git|, then each source is replaced with the result of running 'git clone |source|' - If |unpack| is True or a source is an archive (zip, tar.gz, etc.), then unpack the source. Install the contents of the directory at |source| into DATA_SUBDIRECTORY in a subdirectory named by a hash of the contents. Return a (data_hash, metadata) pair, where the metadata is a dict mapping keys to precomputed statistics about the new data directory. ''' to_delete = [] # Create temporary directory as a staging area and put everything there. temp_path = tempfile.mkdtemp('-bundle_store_upload') temp_subpaths = [] for source in sources: # Where to save |source| to (might change this value if we unpack). temp_subpath = os.path.join(temp_path, os.path.basename(source)) if remove_sources: to_delete.append(source) source_unpack = unpack and zip_util.path_is_archive(source) if path_util.path_is_url(source): # Download the URL. print_util.open_line('BundleStore.upload: downloading %s to %s' % (source, temp_path)) if git: file_util.git_clone(source, temp_subpath) else: file_util.download_url(source, temp_subpath, print_status=True) if source_unpack: zip_util.unpack(temp_subpath, zip_util.strip_archive_ext(temp_subpath)) path_util.remove(temp_subpath) temp_subpath = zip_util.strip_archive_ext(temp_subpath) print_util.clear_line() else: # Copy the local path. source_path = path_util.normalize(source) path_util.check_isvalid(source_path, 'upload') # Recursively copy the directory into a new BundleStore temp directory. print_util.open_line('BundleStore.upload: %s => %s' % (source_path, temp_subpath)) if source_unpack: zip_util.unpack(source_path, zip_util.strip_archive_ext(temp_subpath)) temp_subpath = zip_util.strip_archive_ext(temp_subpath) else: if remove_sources: path_util.rename(source_path, temp_subpath) else: path_util.copy(source_path, temp_subpath, follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns) print_util.clear_line() temp_subpaths.append(temp_subpath) # If exactly one source, then upload that directly. if len(temp_subpaths) == 1: to_delete.append(temp_path) temp_path = temp_subpaths[0] # Multiplex between uploading a directory and uploading a file here. # All other path_util calls will use these lists of directories and files. if os.path.isdir(temp_path): dirs_and_files = path_util.recursive_ls(temp_path) else: dirs_and_files = ([], [temp_path]) # Hash the contents of the temporary directory, and then if there is no # data with this hash value, move this directory into the data directory. print_util.open_line('BundleStore.upload: hashing %s' % temp_path) data_hash = '0x%s' % (path_util.hash_directory(temp_path, dirs_and_files),) print_util.clear_line() print_util.open_line('BundleStore.upload: computing size of %s' % temp_path) data_size = path_util.get_size(temp_path, dirs_and_files) print_util.clear_line() final_path = os.path.join(self.data, data_hash) if os.path.exists(final_path): # Already exists, just delete it path_util.remove(temp_path) else: print >>sys.stderr, 'BundleStore.upload: moving %s to %s' % (temp_path, final_path) path_util.rename(temp_path, final_path) # Delete paths. for path in to_delete: if os.path.exists(path): path_util.remove(path) # After this operation there should always be a directory at the final path. assert(os.path.lexists(final_path)), 'Uploaded to %s failed!' % (final_path,) return (data_hash, {'data_size': data_size})
def _make_bundle(self, bundle): try: bundle_link_url = getattr(bundle.metadata, "link_url", None) bundle_location = bundle_link_url or self._bundle_store.get_bundle_location( bundle.uuid) path = os.path.normpath(bundle_location) deps = [] parent_bundle_link_urls = self._model.get_bundle_metadata( [dep.parent_uuid for dep in bundle.dependencies], "link_url") for dep in bundle.dependencies: parent_bundle_link_url = parent_bundle_link_urls.get( dep.parent_uuid) try: parent_bundle_path = parent_bundle_link_url or os.path.normpath( self._bundle_store.get_bundle_location( dep.parent_uuid)) except NotFoundError: raise Exception('Invalid dependency %s' % (path_util.safe_join( dep.parent_uuid, dep.parent_path))) # TODO(Ashwin): make this logic non-fs specific. dependency_path = os.path.normpath( os.path.join(parent_bundle_path, dep.parent_path)) if not dependency_path.startswith(parent_bundle_path) or ( not os.path.islink(dependency_path) and not os.path.exists(dependency_path)): raise Exception('Invalid dependency %s' % (path_util.safe_join( dep.parent_uuid, dep.parent_path))) child_path = os.path.normpath( os.path.join(path, dep.child_path)) if not child_path.startswith(path): raise Exception('Invalid key for dependency: %s' % (dep.child_path)) deps.append((dependency_path, child_path)) remove_path(path) if len(deps) == 1 and deps[0][1] == path: path_util.copy(deps[0][0], path, follow_symlinks=False) else: os.mkdir(path) for dependency_path, child_path in deps: path_util.copy(dependency_path, child_path, follow_symlinks=False) # TODO(Ashwin): fix self._model.update_disk_metadata(bundle, bundle_location, enforce_disk_quota=True) logger.info('Finished making bundle %s', bundle.uuid) self._model.update_bundle(bundle, {'state': State.READY}) except Exception as e: logger.info('Failing bundle %s: %s', bundle.uuid, str(e)) self._model.update_bundle( bundle, { 'state': State.FAILED, 'metadata': { 'failure_message': str(e), 'error_traceback': traceback.format_exc(), }, }, ) finally: with self._make_uuids_lock: self._make_uuids.remove(bundle.uuid)
def upload_to_bundle_store(self, bundle, sources, follow_symlinks, exclude_patterns, remove_sources, git, unpack, simplify_archives): """ Uploads contents for the given bundle to the bundle store. |sources|: specifies the locations of the contents to upload. Each element is either a URL, a local path or a tuple (filename, file-like object). |follow_symlinks|: for local path(s), whether to follow (resolve) symlinks, but only if remove_sources is False. |exclude_patterns|: for local path(s), don't upload these patterns (e.g., *.o), but only if remove_sources is False. |remove_sources|: for local path(s), whether |sources| should be removed |git|: for URLs, whether |source| is a git repo to clone. |unpack|: for each source in |sources|, whether to unpack it if it's an archive. |simplify_archives|: whether to simplify unpacked archives so that if they contain a single file, the final path is just that file, not a directory containing that file. If |sources| contains one source, then the bundle contents will be that source. Otherwise, the bundle contents will be a directory with each of the sources. Exceptions: - If |git|, then each source is replaced with the result of running 'git clone |source|' - If |unpack| is True or a source is an archive (zip, tar.gz, etc.), then unpack the source. """ bundle_path = self._bundle_store.get_bundle_location(bundle.uuid) try: path_util.make_directory(bundle_path) # Note that for uploads with a single source, the directory # structure is simplified at the end. for source in sources: is_url, is_local_path, is_fileobj, filename = self._interpret_source(source) source_output_path = os.path.join(bundle_path, filename) if is_url: if git: source_output_path = file_util.strip_git_ext(source_output_path) file_util.git_clone(source, source_output_path) else: file_util.download_url(source, source_output_path) if unpack and self._can_unpack_file(source_output_path): self._unpack_file( source_output_path, zip_util.strip_archive_ext(source_output_path), remove_source=True, simplify_archive=simplify_archives) elif is_local_path: source_path = path_util.normalize(source) path_util.check_isvalid(source_path, 'upload') if unpack and self._can_unpack_file(source_path): self._unpack_file( source_path, zip_util.strip_archive_ext(source_output_path), remove_source=remove_sources, simplify_archive=simplify_archives) elif remove_sources: path_util.rename(source_path, source_output_path) else: path_util.copy(source_path, source_output_path, follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns) elif is_fileobj: if unpack and zip_util.path_is_archive(filename): self._unpack_fileobj( source[0], source[1], zip_util.strip_archive_ext(source_output_path), simplify_archive=simplify_archives) else: with open(source_output_path, 'wb') as out: shutil.copyfileobj(source[1], out) if len(sources) == 1: self._simplify_directory(bundle_path) except: if os.path.exists(bundle_path): path_util.remove(bundle_path) raise
def upload_to_bundle_store( self, bundle, sources, follow_symlinks, exclude_patterns, remove_sources, git, unpack, simplify_archives, ): """ Uploads contents for the given bundle to the bundle store. |sources|: specifies the locations of the contents to upload. Each element is either a URL, a local path or a tuple (filename, binary file-like object). |follow_symlinks|: for local path(s), whether to follow (resolve) symlinks, but only if remove_sources is False. |exclude_patterns|: for local path(s), don't upload these patterns (e.g., *.o), but only if remove_sources is False. |remove_sources|: for local path(s), whether |sources| should be removed |git|: for URLs, whether |source| is a git repo to clone. |unpack|: for each source in |sources|, whether to unpack it if it's an archive. |simplify_archives|: whether to simplify unpacked archives so that if they contain a single file, the final path is just that file, not a directory containing that file. If |sources| contains one source, then the bundle contents will be that source. Otherwise, the bundle contents will be a directory with each of the sources. Exceptions: - If |git|, then each source is replaced with the result of running 'git clone |source|' - If |unpack| is True or a source is an archive (zip, tar.gz, etc.), then unpack the source. """ exclude_patterns = (self._default_exclude_patterns + exclude_patterns if exclude_patterns else self._default_exclude_patterns) bundle_link_url = getattr(bundle.metadata, "link_url", None) if bundle_link_url: # Don't do anything for linked bundles. return bundle_path = self._bundle_store.get_bundle_location(bundle.uuid) try: path_util.make_directory(bundle_path) # Note that for uploads with a single source, the directory # structure is simplified at the end. for source in sources: is_url, is_local_path, is_fileobj, filename = self._interpret_source( source) source_output_path = os.path.join(bundle_path, filename) if is_url: if git: source_output_path = file_util.strip_git_ext( source_output_path) file_util.git_clone(source, source_output_path) else: file_util.download_url(source, source_output_path) if unpack and self._can_unpack_file( source_output_path): self._unpack_file( source_output_path, zip_util.strip_archive_ext(source_output_path), remove_source=True, simplify_archive=simplify_archives, ) elif is_local_path: source_path = path_util.normalize(source) path_util.check_isvalid(source_path, 'upload') if unpack and self._can_unpack_file(source_path): self._unpack_file( source_path, zip_util.strip_archive_ext(source_output_path), remove_source=remove_sources, simplify_archive=simplify_archives, ) elif remove_sources: path_util.rename(source_path, source_output_path) else: path_util.copy( source_path, source_output_path, follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns, ) elif is_fileobj: if unpack and zip_util.path_is_archive(filename): self._unpack_fileobj( source[0], source[1], zip_util.strip_archive_ext(source_output_path), simplify_archive=simplify_archives, ) else: with open(source_output_path, 'wb') as out: shutil.copyfileobj(source[1], out) if len(sources) == 1: self._simplify_directory(bundle_path) except: if os.path.exists(bundle_path): path_util.remove(bundle_path) raise
def add_partition(self, target, new_partition_name): """ MultiDiskBundleStore specific method. Add a new partition to the bundle store. The "target" is actually a symlink to the target directory, which the user has configured as the mountpoint for some desired partition. First, all bundles that are to be relocated onto the new partition are copied to a temp location to be resilient against failures. After the copy is performed, the bundles are subsequently moved to the new partition, and finally the original copy of the bundles are deleted from their old locations """ target = os.path.abspath(target) new_partition_location = os.path.join(self.partitions, new_partition_name) mtemp = os.path.join(target, MultiDiskBundleStore.TEMP_SUBDIRECTORY) try: path_util.make_directory(mtemp) except: print >> sys.stderr, "Could not make directory %s on partition %s, aborting" % ( mtemp, target) sys.exit(1) self.ring.add_node( new_partition_name) # Add the node to the partition locations delete_on_success = [ ] # Paths to bundles that will be deleted after the copy finishes successfully print >> sys.stderr, "Marking bundles for placement on new partition %s (might take a while)" % new_partition_name # For each bundle in the bundle store, check to see if any hash to the new partition. If so move them over partitions, _ = path_util.ls(self.partitions) for partition in partitions: partition_abs_path = os.path.join( self.partitions, partition, MultiDiskBundleStore.DATA_SUBDIRECTORY) bundles = reduce(lambda dirs, files: dirs + files, path_util.ls(partition_abs_path)) for bundle in bundles: correct_partition = self.ring.get_node(bundle) if correct_partition != partition: # Reposition the node to the correct partition from_path = os.path.join( self.partitions, partition, MultiDiskBundleStore.DATA_SUBDIRECTORY, bundle) to_path = os.path.join(mtemp, bundle) print >> sys.stderr, "copying %s to %s" % (from_path, to_path) path_util.copy(from_path, to_path) delete_on_success += [from_path] print >> sys.stderr, "Adding new partition as %s..." % new_partition_location path_util.soft_link(target, new_partition_location) # Atomically move the temp location to the new partition's mdata new_mdata = os.path.join(new_partition_location, MultiDiskBundleStore.DATA_SUBDIRECTORY) new_mtemp = os.path.join(new_partition_location, MultiDiskBundleStore.TEMP_SUBDIRECTORY) path_util.rename(new_mtemp, new_mdata) path_util.make_directory(new_mtemp) # Go through and purge all of the originals at this time print >> sys.stderr, "Cleaning up drives..." for to_delete in delete_on_success: path_util.remove(to_delete) print >> sys.stderr, "Successfully added partition '%s' to the pool." % new_partition_name
def upload(self, path, follow_symlinks, exclude_patterns): ''' Copy the contents of the directory at |path| into the data subdirectory, in a subfolder named by a hash of the contents of the new data directory. If |path| is in a temporary directory, then we just move it. Return a (data_hash, metadata) pair, where the metadata is a dict mapping keys to precomputed statistics about the new data directory. ''' # Create temporary directory as a staging area. # If |path| is already temporary, then we use that directly # (with the understanding that |path| will be moved) if not isinstance(path, list) and os.path.realpath(path).startswith( os.path.realpath(self.temp)): temp_path = path else: temp_path = os.path.join(self.temp, uuid.uuid4().hex) if not isinstance(path, list) and path_util.path_is_url(path): # Have to be careful. Want to make sure if we're fetching a URL # that points to a file, we are allowing this. if path.startswith('file://'): path_suffix = path[7:] if os.path.islink(path_suffix): raise UsageError('Not allowed to upload symlink %s' % path_suffix) if not any( path_suffix.startswith(f) for f in self.direct_upload_paths): raise UsageError( 'Not allowed to upload %s (only %s allowed)' % (path_suffix, self.direct_upload_paths)) # Download |path| if it is a URL. print >> sys.stderr, 'BundleStore.upload: downloading %s to %s' % ( path, temp_path) file_util.download_url(path, temp_path, print_status=True) elif path != temp_path: # Copy |path| into the temp_path. if isinstance(path, list): absolute_path = [path_util.normalize(p) for p in path] for p in absolute_path: path_util.check_isvalid(p, 'upload') else: absolute_path = path_util.normalize(path) path_util.check_isvalid(absolute_path, 'upload') # Recursively copy the directory into a new BundleStore temp directory. print_util.open_line('BundleStore.upload: copying %s to %s' % (absolute_path, temp_path)) path_util.copy(absolute_path, temp_path, follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns) print_util.clear_line() # Multiplex between uploading a directory and uploading a file here. # All other path_util calls will use these lists of directories and files. if os.path.isdir(temp_path): dirs_and_files = path_util.recursive_ls(temp_path) else: dirs_and_files = ([], [temp_path]) # Hash the contents of the temporary directory, and then if there is no # data with this hash value, move this directory into the data directory. print_util.open_line('BundleStore.upload: hashing %s' % temp_path) data_hash = '0x%s' % (path_util.hash_directory(temp_path, dirs_and_files), ) print_util.clear_line() print_util.open_line('BundleStore.upload: computing size of %s' % temp_path) data_size = path_util.get_size(temp_path, dirs_and_files) print_util.clear_line() final_path = os.path.join(self.data, data_hash) final_path_exists = False try: # If data_hash already exists, then we don't need to move it over. os.utime(final_path, None) final_path_exists = True except OSError, e: if e.errno == errno.ENOENT: print >> sys.stderr, 'BundleStore.upload: moving %s to %s' % ( temp_path, final_path) path_util.rename(temp_path, final_path) else: raise
def start_bundle(self, bundle, bundle_store, parent_dict, username): ''' Sets up all the temporary files and then dispatches the job. username: the username of the owner of the bundle Returns the bundle information. ''' # Create a temporary directory temp_dir = canonicalize.get_current_location(bundle_store, bundle.uuid) temp_dir = os.path.realpath(temp_dir) # Follow symlinks path_util.make_directory(temp_dir) # Copy all the dependencies to that temporary directory. pairs = bundle.get_dependency_paths(bundle_store, parent_dict, temp_dir) print >>sys.stderr, 'RemoteMachine.start_bundle: copying dependencies of %s to %s' % (bundle.uuid, temp_dir) for (source, target) in pairs: path_util.copy(source, target, follow_symlinks=False) # Set defaults for the dispatcher. docker_image = self.default_docker_image if bundle.metadata.request_docker_image: docker_image = bundle.metadata.request_docker_image request_time = self.default_request_time if bundle.metadata.request_time: request_time = bundle.metadata.request_time request_memory = self.default_request_memory if bundle.metadata.request_memory: request_memory = bundle.metadata.request_memory request_cpus = self.default_request_cpus if bundle.metadata.request_cpus: request_cpus = bundle.metadata.request_cpus request_gpus = self.default_request_gpus if bundle.metadata.request_gpus: request_gpus = bundle.metadata.request_gpus request_queue = self.default_request_queue if bundle.metadata.request_queue: request_queue = bundle.metadata.request_queue request_priority = self.default_request_priority if bundle.metadata.request_priority: request_priority = bundle.metadata.request_priority script_file = temp_dir + '.sh' # main entry point ptr_temp_dir = '$temp_dir' # 1) If no argument to script_file, use the temp_dir (e.g., Torque, master/worker share file system). # 2) If argument is 'use_script_for_temp_dir', use the script to determine temp_dir (e.g., qsub, no master/worker do not share file system). set_temp_dir_header = 'if [ -z "$1" ]; then temp_dir=' + temp_dir + '; else temp_dir=`readlink -f $0 | sed -e \'s/\\.sh$//\'`; fi\n' # Write the command to be executed to a script. if docker_image: internal_script_file = temp_dir + '-internal.sh' # run inside the docker container # These paths depend on $temp_dir, an environment variable which will be set (referenced inside script_file) ptr_container_file = ptr_temp_dir + '.cid' # contains the docker container id ptr_action_file = ptr_temp_dir + '.action' # send actions to the container (e.g., kill) ptr_status_dir = ptr_temp_dir + '.status' # receive information from the container (e.g., memory) ptr_script_file = ptr_temp_dir + '.sh' # main entry point ptr_internal_script_file = ptr_temp_dir + '-internal.sh' # run inside the docker container # Names of file inside the docker container docker_temp_dir = bundle.uuid docker_internal_script_file = bundle.uuid + '-internal.sh' # 1) script_file starts the docker container and runs internal_script_file in docker. # --rm removes the docker container once the job terminates (note that this makes things slow) # -v mounts the internal and user scripts and the temp directory # Trap SIGTERM and forward it to docker. with open(script_file, 'w') as f: f.write(set_temp_dir_header) # Monitor CPU/memory/disk def copy_if_exists(source_template, arg, target): source = source_template % arg # -f because target might be read-only return 'if [ -e %s ] && [ -e %s ]; then cp -f %s %s; fi' % (arg, source, source, target) monitor_commands = [ # Report on status (memory, cpu, etc.) 'mkdir -p %s' % ptr_status_dir, 'if [ -e /cgroup ]; then cgroup=/cgroup; else cgroup=/sys/fs/cgroup; fi', # find where cgroup is copy_if_exists('$cgroup/cpuacct/docker/$(cat %s)/cpuacct.stat', ptr_container_file, ptr_status_dir), copy_if_exists('$cgroup/memory/docker/$(cat %s)/memory.usage_in_bytes', ptr_container_file, ptr_status_dir), copy_if_exists('$cgroup/blkio/docker/$(cat %s)/blkio.throttle.io_service_bytes', ptr_container_file, ptr_status_dir), # Respond to kill action '[ -e %s ] && [ "$(cat %s)" == "kill" ] && docker kill $(cat %s) && rm %s' % (ptr_action_file, ptr_action_file, ptr_container_file, ptr_action_file), # Sleep 'sleep 1', ] f.write('while [ -e %s ]; do\n %s\ndone &\n' % (ptr_temp_dir, '\n '. join(monitor_commands))) # Tell docker to constrain resources (memory). # Note: limiting memory is not always supported. See: # http://programster.blogspot.com/2014/09/docker-implementing-container-memory.html resource_args = '' if bundle.metadata.request_memory: resource_args += ' -m %s' % int(formatting.parse_size(bundle.metadata.request_memory)) # TODO: would constrain --cpuset=0, but difficult because don't know the CPU ids f.write("docker run%s --rm --cidfile %s -u %s -v %s:/%s -v %s:/%s %s bash %s & wait $!\n" % ( resource_args, ptr_container_file, os.geteuid(), ptr_temp_dir, docker_temp_dir, ptr_internal_script_file, docker_internal_script_file, docker_image, docker_internal_script_file)) # 2) internal_script_file runs the actual command inside the docker container with open(internal_script_file, 'w') as f: # Make sure I have a username username = pwd.getpwuid(os.getuid())[0] # do this because os.getlogin() doesn't always work f.write("echo %s::%s:%s::/:/bin/bash >> /etc/passwd\n" % (username, os.geteuid(), os.getgid())) # Do this because .bashrc isn't sourced automatically (even with --login, though it works with docker -t -i, strange...) f.write(". .bashrc || exit 1\n") # Go into the temp directory f.write("cd %s &&\n" % docker_temp_dir) # Run the actual command f.write('(%s) > stdout 2>stderr\n' % bundle.command) else: # Just run the command regularly without docker with open(script_file, 'w') as f: f.write(set_temp_dir_header) f.write("cd %s &&\n" % ptr_temp_dir) f.write('(%s) > stdout 2>stderr\n' % bundle.command) # Determine resources to request resource_args = [] if request_time: resource_args.extend(['--request_time', formatting.parse_duration(request_time)]) if request_memory: resource_args.extend(['--request_memory', formatting.parse_size(request_memory)]) if request_cpus: resource_args.extend(['--request_cpus', request_cpus]) if request_gpus: resource_args.extend(['--request_gpus', request_gpus]) if request_queue: resource_args.extend(['--request_queue', request_queue]) if request_priority: resource_args.extend(['--request_priority', request_priority]) if username: resource_args.extend(['--username', username]) # Start the command args = self.dispatch_command.split() + ['start'] + map(str, resource_args) + [script_file] if self.verbose >= 1: print '=== start_bundle(): running %s' % args result = json.loads(self.run_command_get_stdout(args)) if self.verbose >= 1: print '=== start_bundle(): got %s' % result # Return the information about the job. return { 'bundle': bundle, 'temp_dir': temp_dir, 'job_handle': result['handle'], 'docker_image': docker_image, }
def start_bundle(self, bundle, bundle_store, parent_dict, username): ''' Sets up all the temporary files and then dispatches the job. username: the username of the owner of the bundle Returns the bundle information. ''' # Create a temporary directory temp_dir = canonicalize.get_current_location(bundle_store, bundle.uuid) temp_dir = os.path.realpath(temp_dir) # Follow symlinks path_util.make_directory(temp_dir) # Copy all the dependencies to that temporary directory. pairs = bundle.get_dependency_paths(bundle_store, parent_dict, temp_dir) print >>sys.stderr, 'RemoteMachine.start_bundle: copying dependencies of %s to %s' % (bundle.uuid, temp_dir) for (source, target) in pairs: path_util.copy(source, target, follow_symlinks=False) # Set docker image docker_image = self.default_docker_image if bundle.metadata.request_docker_image: docker_image = bundle.metadata.request_docker_image # Write the command to be executed to a script. if docker_image: container_file = temp_dir + '.cid' # contains the docker container id action_file = temp_dir + '.action' # send actions to the container (e.g., kill) status_dir = temp_dir + '.status' # receive information from the container (e.g., memory) script_file = temp_dir + '.sh' # main entry point internal_script_file = temp_dir + '-internal.sh' # run inside the docker container # Names of file inside the docker container docker_temp_dir = bundle.uuid docker_internal_script_file = bundle.uuid + '-internal.sh' # 1) script_file starts the docker container and runs internal_script_file in docker. # --rm removes the docker container once the job terminates (note that this makes things slow) # -v mounts the internal and user scripts and the temp directory # Trap SIGTERM and forward it to docker. with open(script_file, 'w') as f: # trap doesn't quite work reliably with Torque, so don't use it #f.write('trap \'echo Killing docker container $(cat %s); docker kill $(cat %s); echo Killed: $?; exit 143\' TERM\n' % (container_file, container_file)) # Inspect doesn't tell us a lot, so don't use it #f.write('while [ -e %s ]; do docker inspect $(cat %s) > %s; sleep 1; done &\n' % (temp_dir, container_file, status_dir)) # Monitor CPU/memory/disk monitor_commands = [ # Report on status 'mkdir -p %s' % status_dir, 'if [ -e /cgroup ]; then cgroup=/cgroup; else cgroup=/sys/fs/cgroup; fi', # find where cgroup is 'cp -f $cgroup/cpuacct/docker/$(cat %s)/cpuacct.stat %s' % (container_file, status_dir), 'cp -f $cgroup/memory/docker/$(cat %s)/memory.usage_in_bytes %s' % (container_file, status_dir), 'cp -f $cgroup/blkio/docker/$(cat %s)/blkio.throttle.io_service_bytes %s' % (container_file, status_dir), # Respond to actions '[ -e %s ] && [ "$(cat %s)" == "kill" ] && docker kill $(cat %s) && rm %s' % (action_file, action_file, container_file, action_file), ] f.write('while [ -e %s ]; do %s; sleep 1; done &\n' % (temp_dir, '; '. join(monitor_commands))) # Constrain resources resource_args = '' if bundle.metadata.request_memory: resource_args += ' -m %s' % int(formatting.parse_size(bundle.metadata.request_memory)) # TODO: would constrain --cpuset=0, but difficult because don't know the CPU ids f.write("docker run%s --rm --cidfile %s -u %s -v %s:/%s -v %s:/%s %s bash %s & wait $!\n" % ( resource_args, container_file, os.geteuid(), temp_dir, docker_temp_dir, internal_script_file, docker_internal_script_file, docker_image, docker_internal_script_file)) # 2) internal_script_file runs the actual command inside the docker container with open(internal_script_file, 'w') as f: # Make sure I have a username f.write("echo %s::%s:%s::/:/bin/bash >> /etc/passwd\n" % (os.getlogin(), os.geteuid(), os.getgid())) # Do this because .bashrc isn't sourced automatically (even with --login, though it works with docker -t -i, strange...) f.write(". .bashrc || exit 1\n") # Go into the temp directory f.write("cd %s &&\n" % docker_temp_dir) # Run the actual command f.write('(%s) > stdout 2>stderr\n' % bundle.command) else: # Just run the command regularly without docker script_file = temp_dir + '.sh' with open(script_file, 'w') as f: f.write("cd %s &&\n" % temp_dir) f.write('(%s) > stdout 2>stderr\n' % bundle.command) # Determine resources to request resource_args = [] if bundle.metadata.request_time: resource_args.extend(['--request_time', formatting.parse_duration(bundle.metadata.request_time)]) if bundle.metadata.request_memory: resource_args.extend(['--request_memory', formatting.parse_size(bundle.metadata.request_memory)]) if bundle.metadata.request_cpus: resource_args.extend(['--request_cpus', bundle.metadata.request_cpus]) if bundle.metadata.request_gpus: resource_args.extend(['--request_gpus', bundle.metadata.request_gpus]) if bundle.metadata.request_queue: resource_args.extend(['--request_queue', bundle.metadata.request_queue]) if username: resource_args.extend(['--username', username]) # Start the command args = self.dispatch_command.split() + ['start'] + map(str, resource_args) + [script_file] if self.verbose >= 1: print '=== start_bundle(): running %s' % args result = json.loads(self.run_command_get_stdout(args)) if self.verbose >= 1: print '=== start_bundle(): got %s' % result # Return the information about the job. return { 'bundle': bundle, 'temp_dir': temp_dir, 'job_handle': result['handle'], 'docker_image': docker_image, }
def upload(self, path, follow_symlinks): """ Copy the contents of the directory at |path| into the data subdirectory, in a subfolder named by a hash of the contents of the new data directory. If |path| is in a temporary directory, then we just move it. Return a (data_hash, metadata) pair, where the metadata is a dict mapping keys to precomputed statistics about the new data directory. """ # Create temporary directory as a staging area. # If |path| is already temporary, then we use that directly # (with the understanding that |path| will be moved) if not isinstance(path, list) and os.path.realpath(path).startswith(os.path.realpath(self.temp)): temp_path = path else: temp_path = os.path.join(self.temp, uuid.uuid4().hex) if not isinstance(path, list) and path_util.path_is_url(path): # Have to be careful. Want to make sure if we're fetching a URL # that points to a file, we are allowing this. if path.startswith("file://"): path_suffix = path[7:] if os.path.islink(path_suffix): raise UsageError("Not allowed to upload symlink %s" % path_suffix) if not any(path_suffix.startswith(f) for f in self.direct_upload_paths): raise UsageError( "Not allowed to upload %s (only %s allowed)" % (path_suffix, self.direct_upload_paths) ) # Download |path| if it is a URL. print >>sys.stderr, "BundleStore.upload: downloading %s to %s" % (path, temp_path) file_util.download_url(path, temp_path, print_status=True) elif path != temp_path: # Copy |path| into the temp_path. if isinstance(path, list): absolute_path = [path_util.normalize(p) for p in path] for p in absolute_path: path_util.check_isvalid(p, "upload") else: absolute_path = path_util.normalize(path) path_util.check_isvalid(absolute_path, "upload") # Recursively copy the directory into a new BundleStore temp directory. print >>sys.stderr, "BundleStore.upload: copying %s to %s" % (absolute_path, temp_path) path_util.copy(absolute_path, temp_path, follow_symlinks=follow_symlinks) # Multiplex between uploading a directory and uploading a file here. # All other path_util calls will use these lists of directories and files. if os.path.isdir(temp_path): dirs_and_files = path_util.recursive_ls(temp_path) else: dirs_and_files = ([], [temp_path]) # Hash the contents of the temporary directory, and then if there is no # data with this hash value, move this directory into the data directory. print >>sys.stderr, "BundleStore.upload: hashing %s" % (temp_path) data_hash = "0x%s" % (path_util.hash_directory(temp_path, dirs_and_files),) data_size = path_util.get_size(temp_path, dirs_and_files) final_path = os.path.join(self.data, data_hash) final_path_exists = False try: # If data_hash already exists, then we don't need to move it over. os.utime(final_path, None) final_path_exists = True except OSError, e: if e.errno == errno.ENOENT: print >>sys.stderr, "BundleStore.upload: moving %s to %s" % (temp_path, final_path) path_util.rename(temp_path, final_path) else: raise
def upload(self, sources, follow_symlinks, exclude_patterns, git, unpack, remove_sources, uuid): """ |sources|: specifies the locations of the contents to upload. Each element is either a URL or a local path. |follow_symlinks|: for local path(s), whether to follow (resolve) symlinks |exclude_patterns|: for local path(s), don't upload these patterns (e.g., *.o) |git|: for URL, whether |source| is a git repo to clone. |unpack|: for each source in |sources|, whether to unpack it if it's an archive. |remove_sources|: remove |sources|. If |sources| contains one source, then the bundle contents will be that source. Otherwise, the bundle contents will be a directory with each of the sources. Exceptions: - If |git|, then each source is replaced with the result of running 'git clone |source|' - If |unpack| is True or a source is an archive (zip, tar.gz, etc.), then unpack the source. Install the contents of the directory at |source| into DATA_SUBDIRECTORY in a subdirectory named by a hash of the contents. Return a (data_hash, metadata) pair, where the metadata is a dict mapping keys to precomputed statistics about the new data directory. """ to_delete = [] # If just a single file, set the final path to be equal to that file single_path = len(sources) == 1 # Determine which disk this will go on disk_choice = self.ring.get_node(uuid) final_path = os.path.join(self.partitions, disk_choice, self.DATA_SUBDIRECTORY, uuid) if os.path.exists(final_path): raise UsageError('Path %s already present in bundle store' % final_path) # Only make if not there elif not single_path: path_util.make_directory(final_path) # Paths to resources subpaths = [] for source in sources: # Where to save |source| to (might change this value if we unpack). if not single_path: subpath = os.path.join(final_path, os.path.basename(source)) else: subpath = final_path if remove_sources: to_delete.append(source) source_unpack = unpack and zip_util.path_is_archive(source) if source_unpack and single_path: # Load the file into the bundle store under the given path subpath += zip_util.get_archive_ext(source) if path_util.path_is_url(source): # Download the URL. print_util.open_line('BundleStore.upload: downloading %s to %s' % (source, subpath)) if git: file_util.git_clone(source, subpath) else: file_util.download_url(source, subpath, print_status=True) if source_unpack: zip_util.unpack(subpath, zip_util.strip_archive_ext(subpath)) path_util.remove(subpath) subpath = zip_util.strip_archive_ext(subpath) print_util.clear_line() else: # Copy the local path. source_path = path_util.normalize(source) path_util.check_isvalid(source_path, 'upload') # Recursively copy the directory into the BundleStore print_util.open_line('BundleStore.upload: %s => %s' % (source_path, subpath)) if source_unpack: zip_util.unpack(source_path, zip_util.strip_archive_ext(subpath)) subpath = zip_util.strip_archive_ext(subpath) else: if remove_sources: path_util.rename(source_path, subpath) else: path_util.copy(source_path, subpath, follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns) print_util.clear_line() subpaths.append(subpath) dirs_and_files = None if os.path.isdir(final_path): dirs_and_files = path_util.recursive_ls(final_path) else: dirs_and_files = [], [final_path] # Hash the contents of the bundle directory. Update the data_hash attribute # for the bundle print_util.open_line('BundleStore.upload: hashing %s' % final_path) data_hash = '0x%s' % (path_util.hash_directory(final_path, dirs_and_files)) print_util.clear_line() print_util.open_line('BundleStore.upload: computing size of %s' % final_path) data_size = path_util.get_size(final_path, dirs_and_files) print_util.clear_line() # Delete paths. for path in to_delete: if os.path.exists(path): path_util.remove(path) # After this operation there should always be a directory at the final path. assert (os.path.lexists(final_path)), 'Uploaded to %s failed!' % (final_path,) return (data_hash, {'data_size': data_size})
def start_bundle(self, bundle, bundle_store, parent_dict, username): ''' Sets up all the temporary files and then dispatches the job. username: the username of the owner of the bundle Returns the bundle information. ''' # Create a temporary directory temp_dir = canonicalize.get_current_location(bundle_store, bundle.uuid) temp_dir = os.path.realpath(temp_dir) # Follow symlinks path_util.make_directory(temp_dir) # Copy all the dependencies to that temporary directory. pairs = bundle.get_dependency_paths(bundle_store, parent_dict, temp_dir) print >>sys.stderr, 'RemoteMachine.start_bundle: copying dependencies of %s to %s' % (bundle.uuid, temp_dir) for (source, target) in pairs: path_util.copy(source, target, follow_symlinks=False) # Set defaults for the dispatcher. docker_image = bundle.metadata.request_docker_image or self.default_docker_image # Parse |request_string| using |to_value|, but don't exceed |max_value|. def parse_and_min(to_value, request_string, default_value, max_value): # Use default if request value doesn't exist if request_string: request_value = to_value(request_string) else: request_value = default_value if request_value and max_value: return int(min(request_value, max_value)) elif request_value: return int(request_value) elif max_value: return int(max_value) else: return None request_time = parse_and_min(formatting.parse_duration, bundle.metadata.request_time, self.default_request_time, self.max_request_time) request_memory = parse_and_min(formatting.parse_size, bundle.metadata.request_memory, self.default_request_memory, self.max_request_memory) request_disk = parse_and_min(formatting.parse_size, bundle.metadata.request_disk, self.default_request_disk, self.max_request_disk) request_cpus = bundle.metadata.request_cpus or self.default_request_cpus request_gpus = bundle.metadata.request_gpus or self.default_request_gpus request_queue = bundle.metadata.request_queue or self.default_request_queue request_priority = bundle.metadata.request_priority or self.default_request_priority request_network = bundle.metadata.request_network or self.default_request_network script_file = temp_dir + '.sh' # main entry point ptr_temp_dir = '$temp_dir' # 1) If no argument to script_file, use the temp_dir (e.g., Torque, master/worker share file system). # 2) If argument is 'use_script_for_temp_dir', use the script to determine temp_dir (e.g., qsub, no master/worker do not share file system). set_temp_dir_header = 'if [ -z "$1" ]; then temp_dir=' + temp_dir + '; else temp_dir=`readlink -f $0 | sed -e \'s/\\.sh$//\'`; fi\n' # Write the command to be executed to a script. internal_script_file = temp_dir + '-internal.sh' # run inside the docker container # These paths depend on $temp_dir, an environment variable which will be set (referenced inside script_file) ptr_container_file = ptr_temp_dir + '.cid' # contains the docker container id ptr_action_file = ptr_temp_dir + '.action' # send actions to the container (e.g., kill) ptr_status_dir = ptr_temp_dir + '.status' # receive information from the container (e.g., memory) ptr_script_file = ptr_temp_dir + '.sh' # main entry point ptr_internal_script_file = ptr_temp_dir + '-internal.sh' # run inside the docker container # Names of file inside the docker container docker_temp_dir = '/' + bundle.uuid docker_internal_script_file = '/' + bundle.uuid + '-internal.sh' # 1) script_file starts the docker container and runs internal_script_file in docker. # --rm removes the docker container once the job terminates (note that this makes things slow) # -v mounts the internal and user scripts and the temp directory # Trap SIGTERM and forward it to docker. with open(script_file, 'w') as f: f.write(set_temp_dir_header) # Monitor CPU/memory/disk # Used to copy status about the docker container. def copy_if_exists(source_template, arg, target): source = source_template % arg # -f because target might be read-only return 'if [ -e %s ] && [ -e %s ]; then cp -f %s %s; fi' % (arg, source, source, target) def get_field(path, col): return 'cat %s | cut -f%s -d\'%s\'' % (path, col, BundleAction.SEPARATOR) monitor_commands = [ # Report on status (memory, cpu, etc.) 'mkdir -p %s' % ptr_status_dir, 'if [ -e /cgroup ]; then cgroup=/cgroup; else cgroup=/sys/fs/cgroup; fi', # find where cgroup is copy_if_exists('$cgroup/cpuacct/docker/$(cat %s)/cpuacct.stat', ptr_container_file, ptr_status_dir), copy_if_exists('$cgroup/memory/docker/$(cat %s)/memory.usage_in_bytes', ptr_container_file, ptr_status_dir), copy_if_exists('$cgroup/blkio/docker/$(cat %s)/blkio.throttle.io_service_bytes', ptr_container_file, ptr_status_dir), # Enforce memory limits '[ -e "%s/memory.usage_in_bytes" ] && mem=$(cat %s/memory.usage_in_bytes)' % (ptr_status_dir, ptr_status_dir), 'echo "memory: $mem (max %s)"' % request_memory, 'if [ -n "$mem" ] && [ "$mem" -gt "%s" ]; then echo "[CodaLab] Memory limit exceeded: $mem > %s, terminating." >> %s/stderr; docker kill $(cat %s); break; fi' % \ (request_memory, request_memory, ptr_temp_dir, ptr_container_file), # Enforce disk limits 'disk=$(du -sb %s | cut -f1)' % ptr_temp_dir, 'echo "disk: $disk (max %s)"' % request_disk, 'if [ -n "$disk" ] && [ "$disk" -gt "%s" ]; then echo "[CodaLab] Disk limit exceeded: $disk > %s, terminating." >> %s/stderr; docker kill $(cat %s); break; fi' % \ (request_disk, request_disk, ptr_temp_dir, ptr_container_file), # Execute "kill" 'if [ -e %s ] && [ "$(cat %s)" == "kill" ]; then echo "[CodaLab] Received kill command, terminating." >> %s/stderr; docker kill $(cat %s); rm %s; break; fi' % \ (ptr_action_file, ptr_action_file, ptr_temp_dir, ptr_container_file, ptr_action_file), # Execute "write <subpath> <contents>" 'if [ -e %s ] && [ "$(%s)" == "write" ]; then echo Writing...; %s > %s/$(%s); rm %s; fi' % \ (ptr_action_file, get_field(ptr_action_file, 1), get_field(ptr_action_file, '3-'), ptr_temp_dir, get_field(ptr_action_file, 2), ptr_action_file), # Sleep 'sleep 1', ] f.write('while [ -e %s ]; do\n %s\ndone &\n' % (ptr_temp_dir, '\n '. join(monitor_commands))) resource_args = '' # Limiting memory in docker is not (always) supported. So we rely on bash (see above). # http://programster.blogspot.com/2014/09/docker-implementing-container-memory.html #if request_memory: # resource_args += ' -m %s' % int(formatting.parse_size(request_memory)) # TODO: would constrain --cpuset=0, but difficult because don't know the CPU ids # Attach all GPUs if any. Note that only the 64-bit version of # libcuda.so is picked up. f.write('devices=$(/bin/ls /dev/nvidia* 2>/dev/null)\n') f.write('if [ -n "$devices" ]; then devices=$(for d in $devices; do echo --device $d:$d; done); fi\n') f.write('libcuda=$(/sbin/ldconfig -p 2>/dev/null | grep "libcuda.so$" | grep "x86-64" | head -n 1 | cut -d " " -f 4)\n') f.write('if [ -n "$libcuda" ]; then libcuda=" -v $libcuda:/usr/lib/x86_64-linux-gnu/libcuda.so:ro"; fi\n') resource_args += ' $devices$libcuda' # Enable network? if not request_network: resource_args += ' --net=none' f.write("docker run%s --rm --cidfile %s -u %s -v %s:%s -v %s:%s -e HOME=%s %s bash %s >%s/stdout 2>%s/stderr & wait $!\n" % ( resource_args, ptr_container_file, os.geteuid(), ptr_temp_dir, docker_temp_dir, ptr_internal_script_file, docker_internal_script_file, docker_temp_dir, docker_image, docker_internal_script_file, ptr_temp_dir, ptr_temp_dir)) # 2) internal_script_file runs the actual command inside the docker container with open(internal_script_file, 'w') as f: # Make sure I have a username username = pwd.getpwuid(os.getuid())[0] # do this because os.getlogin() doesn't always work f.write("[ -w /etc/passwd ] && echo %s::%s:%s::/:/bin/bash >> /etc/passwd\n" % (username, os.geteuid(), os.getgid())) # Do this because .bashrc isn't sourced automatically (even with --login, though it works with docker -t -i, strange...) f.write("[ -e .bashrc ] && . .bashrc\n") # Go into the temp directory f.write("cd %s &&\n" % docker_temp_dir) # Run the actual command f.write('(%s) >>stdout 2>>stderr\n' % bundle.command) # Determine resources to request resource_args = [] if request_time: resource_args.extend(['--request-time', request_time]) if request_memory: resource_args.extend(['--request-memory', request_memory]) if request_disk: resource_args.extend(['--request-disk', request_disk]) if request_cpus: resource_args.extend(['--request-cpus', request_cpus]) if request_gpus: resource_args.extend(['--request-gpus', request_gpus]) if request_queue: resource_args.extend(['--request-queue', request_queue]) if request_priority: resource_args.extend(['--request-priority', request_priority]) if username: resource_args.extend(['--username', username]) # Start the command args = self.dispatch_command.split() + ['start'] + map(str, resource_args) + [script_file] if self.verbose >= 1: print '=== start_bundle(): running %s' % args result = json.loads(self.run_command_get_stdout(args)) if self.verbose >= 1: print '=== start_bundle(): got %s' % result if not result['handle']: raise SystemError('Starting bundle failed') # Return the information about the job. return { 'bundle': bundle, 'temp_dir': temp_dir, 'job_handle': result['handle'], 'docker_image': docker_image, 'request_time': str(request_time) if request_time else None, 'request_memory': str(request_memory) if request_memory else None, 'request_disk': str(request_disk) if request_disk else None, 'request_cpus': request_cpus, 'request_gpus': request_gpus, 'request_queue': request_queue, 'request_priority': request_priority, 'request_network': request_network, }
def start_bundle(self, bundle, bundle_store, parent_dict, username): ''' Sets up all the temporary files and then dispatches the job. username: the username of the owner of the bundle Returns the bundle information. ''' # Create a temporary directory temp_dir = canonicalize.get_current_location(bundle_store, bundle.uuid) temp_dir = os.path.realpath(temp_dir) # Follow symlinks path_util.make_directory(temp_dir) # Copy all the dependencies to that temporary directory. pairs = bundle.get_dependency_paths(bundle_store, parent_dict, temp_dir) print >> sys.stderr, 'RemoteMachine.start_bundle: copying dependencies of %s to %s' % ( bundle.uuid, temp_dir) for (source, target) in pairs: path_util.copy(source, target, follow_symlinks=False) # Set defaults for the dispatcher. docker_image = self.default_docker_image if bundle.metadata.request_docker_image: docker_image = bundle.metadata.request_docker_image request_time = self.default_request_time if bundle.metadata.request_time: request_time = bundle.metadata.request_time request_memory = self.default_request_memory if bundle.metadata.request_memory: request_memory = bundle.metadata.request_memory request_cpus = self.default_request_cpus if bundle.metadata.request_cpus: request_cpus = bundle.metadata.request_cpus request_gpus = self.default_request_gpus if bundle.metadata.request_gpus: request_gpus = bundle.metadata.request_gpus request_queue = self.default_request_queue if bundle.metadata.request_queue: request_queue = bundle.metadata.request_queue request_priority = self.default_request_priority if bundle.metadata.request_priority: request_priority = bundle.metadata.request_priority script_file = temp_dir + '.sh' # main entry point ptr_temp_dir = '$temp_dir' # 1) If no argument to script_file, use the temp_dir (e.g., Torque, master/worker share file system). # 2) If argument is 'use_script_for_temp_dir', use the script to determine temp_dir (e.g., qsub, no master/worker do not share file system). set_temp_dir_header = 'if [ -z "$1" ]; then temp_dir=' + temp_dir + '; else temp_dir=`readlink -f $0 | sed -e \'s/\\.sh$//\'`; fi\n' # Write the command to be executed to a script. if docker_image: internal_script_file = temp_dir + '-internal.sh' # run inside the docker container # These paths depend on $temp_dir, an environment variable which will be set (referenced inside script_file) ptr_container_file = ptr_temp_dir + '.cid' # contains the docker container id ptr_action_file = ptr_temp_dir + '.action' # send actions to the container (e.g., kill) ptr_status_dir = ptr_temp_dir + '.status' # receive information from the container (e.g., memory) ptr_script_file = ptr_temp_dir + '.sh' # main entry point ptr_internal_script_file = ptr_temp_dir + '-internal.sh' # run inside the docker container # Names of file inside the docker container docker_temp_dir = bundle.uuid docker_internal_script_file = bundle.uuid + '-internal.sh' # 1) script_file starts the docker container and runs internal_script_file in docker. # --rm removes the docker container once the job terminates (note that this makes things slow) # -v mounts the internal and user scripts and the temp directory # Trap SIGTERM and forward it to docker. with open(script_file, 'w') as f: f.write(set_temp_dir_header) # Monitor CPU/memory/disk def copy_if_exists(source_template, arg, target): source = source_template % arg # -f because target might be read-only return 'if [ -e %s ] && [ -e %s ]; then cp -f %s %s; fi' % ( arg, source, source, target) monitor_commands = [ # Report on status (memory, cpu, etc.) 'mkdir -p %s' % ptr_status_dir, 'if [ -e /cgroup ]; then cgroup=/cgroup; else cgroup=/sys/fs/cgroup; fi', # find where cgroup is copy_if_exists( '$cgroup/cpuacct/docker/$(cat %s)/cpuacct.stat', ptr_container_file, ptr_status_dir), copy_if_exists( '$cgroup/memory/docker/$(cat %s)/memory.usage_in_bytes', ptr_container_file, ptr_status_dir), copy_if_exists( '$cgroup/blkio/docker/$(cat %s)/blkio.throttle.io_service_bytes', ptr_container_file, ptr_status_dir), # Respond to kill action '[ -e %s ] && [ "$(cat %s)" == "kill" ] && docker kill $(cat %s) && rm %s' % (ptr_action_file, ptr_action_file, ptr_container_file, ptr_action_file), # Sleep 'sleep 1', ] f.write('while [ -e %s ]; do\n %s\ndone &\n' % (ptr_temp_dir, '\n '.join(monitor_commands))) # Tell docker to constrain resources (memory). # Note: limiting memory is not always supported. See: # http://programster.blogspot.com/2014/09/docker-implementing-container-memory.html resource_args = '' if bundle.metadata.request_memory: resource_args += ' -m %s' % int( formatting.parse_size(bundle.metadata.request_memory)) # TODO: would constrain --cpuset=0, but difficult because don't know the CPU ids f.write( "docker run%s --rm --cidfile %s -u %s -v %s:/%s -v %s:/%s %s bash %s >%s/stdout 2>%s/stderr & wait $!\n" % (resource_args, ptr_container_file, os.geteuid(), ptr_temp_dir, docker_temp_dir, ptr_internal_script_file, docker_internal_script_file, docker_image, docker_internal_script_file, ptr_temp_dir, ptr_temp_dir)) # 2) internal_script_file runs the actual command inside the docker container with open(internal_script_file, 'w') as f: # Make sure I have a username username = pwd.getpwuid(os.getuid())[ 0] # do this because os.getlogin() doesn't always work f.write("echo %s::%s:%s::/:/bin/bash >> /etc/passwd\n" % (username, os.geteuid(), os.getgid())) # Do this because .bashrc isn't sourced automatically (even with --login, though it works with docker -t -i, strange...) f.write(". .bashrc || exit 1\n") # Go into the temp directory f.write("cd %s &&\n" % docker_temp_dir) # Run the actual command f.write('(%s) >>stdout 2>>stderr\n' % bundle.command) else: # Just run the command regularly without docker with open(script_file, 'w') as f: f.write(set_temp_dir_header) f.write("cd %s &&\n" % ptr_temp_dir) f.write('(%s) >stdout 2>stderr\n' % bundle.command) # Determine resources to request resource_args = [] if request_time: resource_args.extend( ['--request_time', formatting.parse_duration(request_time)]) if request_memory: resource_args.extend( ['--request_memory', formatting.parse_size(request_memory)]) if request_cpus: resource_args.extend(['--request_cpus', request_cpus]) if request_gpus: resource_args.extend(['--request_gpus', request_gpus]) if request_queue: resource_args.extend(['--request_queue', request_queue]) if request_priority: resource_args.extend(['--request_priority', request_priority]) if username: resource_args.extend(['--username', username]) # Start the command args = self.dispatch_command.split() + ['start'] + map( str, resource_args) + [script_file] if self.verbose >= 1: print '=== start_bundle(): running %s' % args result = json.loads(self.run_command_get_stdout(args)) if self.verbose >= 1: print '=== start_bundle(): got %s' % result # Return the information about the job. return { 'bundle': bundle, 'temp_dir': temp_dir, 'job_handle': result['handle'], 'docker_image': docker_image, }