def zip(path, follow_symlinks, exclude_names=[], file_name=None): ''' Take a path to a file or directory and return the path to a zip archive containing its contents. ''' if isinstance(path, list): for p in path: absolute_path = path_util.normalize(p) path_util.check_isvalid(absolute_path, 'zip_directory') else: absolute_path = path_util.normalize(path) path_util.check_isvalid(absolute_path, 'zip_directory') # Add proper name if file_name: sub_path = file_name else: sub_path = ZIP_SUBPATH # Recursively copy the directory into a temp directory. temp_path = tempfile.mkdtemp() temp_subpath = os.path.join(temp_path, sub_path) # TODO: this is inefficient; do the zipping from the original source # directly. if isinstance(path, list): os.mkdir(temp_subpath) for p in path: absolute_path = path_util.normalize(p) path_util.copy(absolute_path, os.path.join(temp_subpath, os.path.basename(p)), follow_symlinks=follow_symlinks, exclude_names=exclude_names) else: absolute_path = path_util.normalize(path) path_util.copy(absolute_path, temp_subpath, follow_symlinks=follow_symlinks, exclude_names=exclude_names) # TODO: These methods of zipping don't preserve permissions, so using a # system call for now (only works in Linux) # Multiplex between zipping a directory and zipping a file here, because # make_archive does NOT handle the file case cleanly. #if os.path.isdir(temp_subpath): # zip_path = shutil.make_archive( # base_name=temp_path, # base_dir=ZIP_SUBPATH, # root_dir=temp_path, # format='zip', # ) #else: # zip_path = temp_path + '.zip' # with ZipFile(zip_path, 'w') as zip_file: # zip_file.write(temp_subpath, ZIP_SUBPATH) # Clean up the temporary directory and return the zip file's path. zip_path = temp_path + '.zip' opts = '-qr' if not follow_symlinks: opts += ' --symlinks' if os.system("cd %s && zip %s %s %s" % (temp_path, opts, zip_path, sub_path)) != 0: raise UsageError('zip failed') path_util.remove(temp_path) return zip_path, sub_path
def test_normalize(self): test_pairs = [ ('~', os.path.expanduser('~')), (os.curdir, os.getcwd()), (os.pardir, os.path.abspath(os.path.join(os.getcwd(), os.pardir))), ] for (test_path, expected_result) in test_pairs: actual_result = path_util.normalize(test_path) self.assertTrue(os.path.isabs(actual_result)) self.assertEqual(actual_result, expected_result) # Test idempotency. An absolute path be a fixed point of normalize. self.assertEqual(path_util.normalize(actual_result), actual_result)
def test_normalize(self): test_pairs = [ ('~', os.path.expanduser('~')), (os.curdir, os.getcwd()), (os.pardir, os.path.abspath(os.path.join(os.getcwd(), os.pardir))), ] for (test_path, expected_result) in test_pairs: actual_result = path_util.normalize(test_path) self.assertTrue(os.path.isabs(actual_result)) self.assertEqual(actual_result, expected_result) # Test idempotency. An absolute path be a fixed point of normalize. self.assertEqual(path_util.normalize(actual_result), actual_result)
def codalab_home(self): from codalab.lib import path_util # Default to this directory in the user's home directory. # In the future, allow customization based on. result = path_util.normalize("~/.codalab") path_util.make_directory(result) return result
def zip(path): ''' Take a path to a file or directory and return the path to a zip archive containing its contents. ''' absolute_path = path_util.normalize(path) path_util.check_isvalid(absolute_path, 'zip_directory') # Recursively copy the directory into a temp directory. temp_path = tempfile.mkdtemp() temp_subpath = os.path.join(temp_path, ZIP_SUBPATH) path_util.copy(absolute_path, temp_subpath) # Multiplex between zipping a directory and zipping a file here, because # make_archive does NOT handle the file case cleanly. if os.path.isdir(temp_subpath): zip_path = shutil.make_archive( base_name=temp_path, base_dir=ZIP_SUBPATH, root_dir=temp_path, format='zip', ) else: zip_path = temp_path + '.zip' with ZipFile(zip_path, 'w') as zip_file: zip_file.write(temp_subpath, ZIP_SUBPATH) # Clean up the temporary directory and return the zip file's path. path_util.remove(temp_path) return zip_path
def do_upload_command(self, argv, parser): worksheet_uuid = self.env_model.get_current_worksheet() help_text = 'bundle_type: [%s]' % ('|'.join(sorted(UPLOADED_TYPES))) parser.add_argument('bundle_type', help=help_text) parser.add_argument('path', help='path of the directory to upload') # Add metadata arguments for UploadedBundle and all of its subclasses. metadata_keys = set() metadata_util.add_arguments(UploadedBundle, metadata_keys, parser) for bundle_type in UPLOADED_TYPES: bundle_subclass = get_bundle_subclass(bundle_type) metadata_util.add_arguments(bundle_subclass, metadata_keys, parser) metadata_util.add_auto_argument(parser) args = parser.parse_args(argv) # Check that the upload path exists. path_util.check_isvalid(path_util.normalize(args.path), 'upload') # Pull out the upload bundle type from the arguments and validate it. if args.bundle_type not in UPLOADED_TYPES: raise UsageError('Invalid bundle type %s (options: [%s])' % ( args.bundle_type, '|'.join(sorted(UPLOADED_TYPES)), )) bundle_subclass = get_bundle_subclass(args.bundle_type) metadata = metadata_util.request_missing_data(bundle_subclass, args) # Type-check the bundle metadata BEFORE uploading the bundle data. # This optimization will avoid file copies on failed bundle creations. bundle_subclass.construct(data_hash='', metadata=metadata).validate() print self.client.upload(args.bundle_type, args.path, metadata, worksheet_uuid)
def codalab_home(self): from codalab.lib import path_util # Default to this directory in the user's home directory. # In the future, allow customization based on. home = os.getenv('CODALAB_HOME', '~/.codalab') home = path_util.normalize(home) path_util.make_directory(home) return home
def setUpClass(cls): cls.test_root = path_util.normalize("~/.codalab_tests") path_util.make_directory(cls.test_root) cls.bundle_store = BundleStore(cls.test_root) cls.model = SQLiteModel(cls.test_root) users = [User('root', 0), User('user1', 1), User('user2', 2), User('user4', 4)] cls.auth_handler = MockAuthHandler(users) cls.client = LocalBundleClient('local', cls.bundle_store, cls.model, cls.auth_handler)
def __init__(self, codalab_home): ''' codalab_home: data/ is where all the bundles are actually stored, temp/ is temporary ''' self.codalab_home = path_util.normalize(codalab_home) self.data = os.path.join(self.codalab_home, self.DATA_SUBDIRECTORY) self.temp = os.path.join(self.codalab_home, self.TEMP_SUBDIRECTORY) self.make_directories()
def codalab_home(self): from codalab.lib import path_util # Default to this directory in the user's home directory. # In the future, allow customization based on. home = os.getenv('CODALAB_HOME', '~/.codalab') home = path_util.normalize(home) path_util.make_directory(home) return home
def get_default_name(bundle_subclass, args): if hasattr(args, 'path'): absolute_path = path_util.normalize(args.path) return os.path.basename(absolute_path) elif bundle_subclass is MakeBundle: if len(args.target) == 1 and ':' not in args.target[0]: return os.path.basename(args.target[0]) return MetadataDefaults.get_anonymous_name(bundle_subclass)
def setUpClass(cls): cls.test_root = path_util.normalize("~/.codalab_tests") path_util.make_directory(cls.test_root) cls.bundle_store = BundleStore(cls.test_root) cls.model = SQLiteModel("sqlite:///{}".format(os.path.join(cls.test_root, 'bundle.db')), {}) cls.model.root_user_id = '0' users = [User('root', '0'), User('user1', '1'), User('user2', '2'), User('user4', '4')] cls.auth_handler = MockAuthHandler(users) cls.client = LocalBundleClient('local', cls.bundle_store, cls.model, cls.auth_handler, verbose=1)
def zip(path, follow_symlinks, exclude_patterns, file_name): ''' Take a path to a file or directory |path| and return the path to a zip archive containing its contents. |file_name| is what the zip archive contains. ''' if isinstance(path, list): for p in path: absolute_path = path_util.normalize(p) path_util.check_isvalid(absolute_path, 'zip_directory') else: absolute_path = path_util.normalize(path) path_util.check_isvalid(absolute_path, 'zip_directory') # Recursively copy the directory into a temp directory. temp_path = tempfile.mkdtemp() temp_subpath = os.path.join(temp_path, file_name) print_util.open_line('Copying %s to %s' % (path, temp_subpath)) if isinstance(path, list): os.mkdir(temp_subpath) for p in path: absolute_path = path_util.normalize(p) path_util.copy(absolute_path, os.path.join(temp_subpath, os.path.basename(p)), follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns) else: absolute_path = path_util.normalize(path) path_util.copy(absolute_path, temp_subpath, follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns) print_util.clear_line() zip_path = temp_path + '.zip' opts = '-qr' if not follow_symlinks: opts += ' --symlinks' print_util.open_line('Zipping to %s' % zip_path) if os.system("cd %s && zip %s %s %s" % (temp_path, opts, zip_path, file_name)) != 0: raise UsageError('zip failed') path_util.remove(temp_path) return zip_path
def __init__(self, codalab_home, direct_upload_paths): """ codalab_home: data/ is where all the bundles are actually stored, temp/ is temporary direct_upload_paths: we can accept file://... uploads from these paths. """ self.codalab_home = path_util.normalize(codalab_home) self.direct_upload_paths = direct_upload_paths self.data = os.path.join(self.codalab_home, self.DATA_SUBDIRECTORY) self.temp = os.path.join(self.codalab_home, self.TEMP_SUBDIRECTORY) self.make_directories()
def __init__(self, codalab_home, direct_upload_paths): ''' codalab_home: data/ is where all the bundles are actually stored, temp/ is temporary direct_upload_paths: we can accept file://... uploads from these paths. ''' self.codalab_home = path_util.normalize(codalab_home) self.direct_upload_paths = direct_upload_paths self.data = os.path.join(self.codalab_home, self.DATA_SUBDIRECTORY) self.temp = os.path.join(self.codalab_home, self.TEMP_SUBDIRECTORY) self.make_directories()
def __init__(self, codalab_home): self.codalab_home = path_util.normalize(codalab_home) self.partitions = os.path.join(self.codalab_home, 'partitions') self.mtemp = os.path.join(self.codalab_home, MultiDiskBundleStore.MISC_TEMP_SUBDIRECTORY) # Perform initialization first to ensure that directories will be populated super(MultiDiskBundleStore, self).__init__() nodes, _ = path_util.ls(self.partitions) self.nodes = nodes self.lru_cache = OrderedDict() super(MultiDiskBundleStore, self).__init__()
def __init__(self, codalab_home): self.codalab_home = path_util.normalize(codalab_home) self.partitions = os.path.join(self.codalab_home, 'partitions') path_util.make_directory(self.partitions) self.refresh_partitions() if self.__get_num_partitions( ) == 0: # Ensure at least one partition exists. self.add_partition(None, 'default') self.lru_cache = OrderedDict()
def codalab_home(self): from codalab.lib import path_util # Default to this directory in the user's home directory. # In the future, allow customization based on. home = os.getenv('CODALAB_HOME', '~/.codalab') home = path_util.normalize(home) path_util.make_directory(home) # Global setting! Make temp directory the same as the bundle store # temporary directory. The default /tmp generally doesn't have enough # space. tempfile.tempdir = os.path.join(home, BundleStore.TEMP_SUBDIRECTORY) return home
def codalab_home(self): from codalab.lib import path_util # Default to this directory in the user's home directory. # In the future, allow customization based on. home = os.getenv('CODALAB_HOME', '~/.codalab') home = path_util.normalize(home) path_util.make_directory(home) # Global setting! Make temp directory the same as the bundle store # temporary directory. The default /tmp generally doesn't have enough # space. tempfile.tempdir = os.path.join(home, BundleStore.TEMP_SUBDIRECTORY) return home
def get_default_description(bundle_subclass, args): if bundle_subclass.BUNDLE_TYPE in UPLOADED_TYPES: absolute_path = path_util.normalize(args.path) return 'Upload %s' % (absolute_path,) elif bundle_subclass is MakeBundle: return 'Package %s' % (', '.join(args.target)) elif bundle_subclass is RunBundle: return 'Run {program} on {input}: {command}'.format( program=args.program_target, input=args.input_target, command=repr(args.command), ) return ''
def setUpClass(cls): cls.test_root = path_util.normalize("~/.codalab_tests") path_util.make_directory(cls.test_root) cls.bundle_store = MultiDiskBundleStore(cls.test_root) cls.model = SQLiteModel("sqlite:///{}".format(os.path.join(cls.test_root, 'bundle.db')), {'time_quota': 1e12, 'disk_quota': 1e12}) cls.model.root_user_id = '0' users = [User('root', '0'), User('user1', '1'), User('user2', '2'), User('user4', '4')] cls.auth_handler = MockAuthHandler(users) for user in users: cls.model.add_user(user.name, user.name + '@codalab.org', '', user_id=user.unique_id, is_verified=True) cls.client = LocalBundleClient('local', cls.bundle_store, cls.model, None, None, None, cls.auth_handler, verbose=1)
def __init__(self, codalab_home): self.codalab_home = path_util.normalize(codalab_home) self.partitions = os.path.join(self.codalab_home, 'partitions') self.mtemp = os.path.join(self.codalab_home, MultiDiskBundleStore.MISC_TEMP_SUBDIRECTORY) # Perform initialization first to ensure that directories will be populated super(MultiDiskBundleStore, self).__init__() nodes, _ = path_util.ls(self.partitions) self.ring = HashRing(nodes) super(MultiDiskBundleStore, self).__init__()
def zip(path, follow_symlinks, exclude_patterns, file_name): ''' Take a path to a file or directory |path| and return the path to a zip archive containing its contents. |file_name| is what the zip archive contains. ''' if isinstance(path, list): for p in path: absolute_path = path_util.normalize(p) path_util.check_isvalid(absolute_path, 'zip_directory') else: absolute_path = path_util.normalize(path) path_util.check_isvalid(absolute_path, 'zip_directory') # Recursively copy the directory into a temp directory. temp_path = tempfile.mkdtemp() temp_subpath = os.path.join(temp_path, file_name) print_util.open_line('Copying %s to %s' % (path, temp_subpath)) if isinstance(path, list): os.mkdir(temp_subpath) for p in path: absolute_path = path_util.normalize(p) path_util.copy(absolute_path, os.path.join(temp_subpath, os.path.basename(p)), follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns) else: absolute_path = path_util.normalize(path) path_util.copy(absolute_path, temp_subpath, follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns) print_util.clear_line() zip_path = temp_path + '.zip' opts = '-qr' if not follow_symlinks: opts += ' --symlinks' print_util.open_line('Zipping to %s' % zip_path) if os.system("cd %s && zip %s %s %s" % (temp_path, opts, zip_path, file_name)) != 0: raise UsageError('zip failed') path_util.remove(temp_path) return zip_path
def get_default_name(bundle_subclass, args): if issubclass(bundle_subclass, UploadedBundle): items = [] for path in args.path: absolute_path = path_util.normalize(path) items.append(os.path.basename(absolute_path)) return spec_util.create_default_name(None, '-'.join(items)) elif bundle_subclass is MakeBundle: if len(args.target_spec) == 1 and ':' not in args.target_spec[0]: # direct link return os.path.basename(args.target_spec[0]) else: # multiple targets name = ' '.join(args.target_spec) return spec_util.create_default_name(bundle_subclass.BUNDLE_TYPE, str(name)) elif bundle_subclass is RunBundle: return spec_util.create_default_name(bundle_subclass.BUNDLE_TYPE, args.command) else: raise UsageError("Unhandled class: %s" % bundle_subclass)
def setUpClass(cls): cls.test_root = path_util.normalize("~/.codalab_tests") path_util.make_directory(cls.test_root) cls.bundle_store = BundleStore(cls.test_root, []) cls.model = SQLiteModel(cls.test_root) cls.model.root_user_id = '0' users = [ User('root', '0'), User('user1', '1'), User('user2', '2'), User('user4', '4') ] cls.auth_handler = MockAuthHandler(users) cls.client = LocalBundleClient('local', cls.bundle_store, cls.model, cls.auth_handler, verbose=1)
def get_default_name(bundle_subclass, args): if issubclass(bundle_subclass, UploadedBundle): items = [] for path in args.path: absolute_path = path_util.normalize(path) items.append(os.path.basename(absolute_path)) return spec_util.create_default_name(None, '-'.join(items)) elif bundle_subclass is MakeBundle: if len(args.target_spec ) == 1 and ':' not in args.target_spec[0]: # direct link return os.path.basename(args.target_spec[0]) else: # multiple targets name = ' '.join(args.target_spec) return spec_util.create_default_name( bundle_subclass.BUNDLE_TYPE, str(name)) elif bundle_subclass is RunBundle: return spec_util.create_default_name(bundle_subclass.BUNDLE_TYPE, args.command) else: raise UsageError("Unhandled class: %s" % bundle_subclass)
def setUpClass(cls): cls.test_root = path_util.normalize("~/.codalab_tests") path_util.make_directory(cls.test_root) cls.bundle_store = MultiDiskBundleStore(cls.test_root) cls.model = SQLiteModel( "sqlite:///{}".format(os.path.join(cls.test_root, 'bundle.db')), {}) cls.model.root_user_id = '0' users = [ User('root', '0'), User('user1', '1'), User('user2', '2'), User('user4', '4') ] cls.auth_handler = MockAuthHandler(users) cls.client = LocalBundleClient('local', cls.bundle_store, cls.model, None, cls.auth_handler, verbose=1)
def upload(self, path, allow_symlinks=False): ''' Copy the contents of the directory at path into the data subdirectory, in a subfolder named by a hash of the contents of the new data directory. Return a (data_hash, metadata) pair, where the metadata is a dict mapping keys to precomputed statistics about the new data directory. ''' absolute_path = path_util.normalize(path) path_util.check_isvalid(absolute_path, 'upload') # Recursively copy the directory into a new BundleStore temp directory. temp_directory = uuid.uuid4().hex temp_path = os.path.join(self.temp, temp_directory) path_util.copy(absolute_path, temp_path) # Multiplex between uploading a directory and uploading a file here. # All other path_util calls will use these lists of directories and files. if os.path.isdir(temp_path): dirs_and_files = path_util.recursive_ls(temp_path) else: dirs_and_files = ([], [temp_path]) if not allow_symlinks: path_util.check_for_symlinks(temp_path, dirs_and_files) path_util.set_permissions(temp_path, 0o755, dirs_and_files) # Hash the contents of the temporary directory, and then if there is no # data with this hash value, move this directory into the data directory. data_hash = '0x%s' % (path_util.hash_directory(temp_path, dirs_and_files),) data_size = path_util.get_size(temp_path, dirs_and_files) final_path = os.path.join(self.data, data_hash) final_path_exists = False try: os.utime(final_path, None) final_path_exists = True except OSError, e: if e.errno == errno.ENOENT: os.rename(temp_path, final_path) else: raise
def upload(self, sources, follow_symlinks, exclude_patterns, git, unpack, remove_sources, uuid): """ |sources|: specifies the locations of the contents to upload. Each element is either a URL or a local path. |follow_symlinks|: for local path(s), whether to follow (resolve) symlinks |exclude_patterns|: for local path(s), don't upload these patterns (e.g., *.o) |git|: for URL, whether |source| is a git repo to clone. |unpack|: for each source in |sources|, whether to unpack it if it's an archive. |remove_sources|: remove |sources|. If |sources| contains one source, then the bundle contents will be that source. Otherwise, the bundle contents will be a directory with each of the sources. Exceptions: - If |git|, then each source is replaced with the result of running 'git clone |source|' - If |unpack| is True or a source is an archive (zip, tar.gz, etc.), then unpack the source. Install the contents of the directory at |source| into DATA_SUBDIRECTORY in a subdirectory named by a hash of the contents. Return a (data_hash, metadata) pair, where the metadata is a dict mapping keys to precomputed statistics about the new data directory. """ to_delete = [] # If just a single file, set the final path to be equal to that file single_path = len(sources) == 1 # Determine which disk this will go on disk_choice = self.ring.get_node(uuid) final_path = os.path.join(self.partitions, disk_choice, self.DATA_SUBDIRECTORY, uuid) if os.path.exists(final_path): raise UsageError('Path %s already present in bundle store' % final_path) # Only make if not there elif not single_path: path_util.make_directory(final_path) # Paths to resources subpaths = [] for source in sources: # Where to save |source| to (might change this value if we unpack). if not single_path: subpath = os.path.join(final_path, os.path.basename(source)) else: subpath = final_path if remove_sources: to_delete.append(source) source_unpack = unpack and zip_util.path_is_archive(source) if source_unpack and single_path: # Load the file into the bundle store under the given path subpath += zip_util.get_archive_ext(source) if path_util.path_is_url(source): # Download the URL. print_util.open_line('BundleStore.upload: downloading %s to %s' % (source, subpath)) if git: file_util.git_clone(source, subpath) else: file_util.download_url(source, subpath, print_status=True) if source_unpack: zip_util.unpack(subpath, zip_util.strip_archive_ext(subpath)) path_util.remove(subpath) subpath = zip_util.strip_archive_ext(subpath) print_util.clear_line() else: # Copy the local path. source_path = path_util.normalize(source) path_util.check_isvalid(source_path, 'upload') # Recursively copy the directory into the BundleStore print_util.open_line('BundleStore.upload: %s => %s' % (source_path, subpath)) if source_unpack: zip_util.unpack(source_path, zip_util.strip_archive_ext(subpath)) subpath = zip_util.strip_archive_ext(subpath) else: if remove_sources: path_util.rename(source_path, subpath) else: path_util.copy(source_path, subpath, follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns) print_util.clear_line() subpaths.append(subpath) dirs_and_files = None if os.path.isdir(final_path): dirs_and_files = path_util.recursive_ls(final_path) else: dirs_and_files = [], [final_path] # Hash the contents of the bundle directory. Update the data_hash attribute # for the bundle print_util.open_line('BundleStore.upload: hashing %s' % final_path) data_hash = '0x%s' % (path_util.hash_directory(final_path, dirs_and_files)) print_util.clear_line() print_util.open_line('BundleStore.upload: computing size of %s' % final_path) data_size = path_util.get_size(final_path, dirs_and_files) print_util.clear_line() # Delete paths. for path in to_delete: if os.path.exists(path): path_util.remove(path) # After this operation there should always be a directory at the final path. assert (os.path.lexists(final_path)), 'Uploaded to %s failed!' % (final_path,) return (data_hash, {'data_size': data_size})
def get_default_description(bundle_subclass, args): if bundle_subclass.BUNDLE_TYPE in UPLOADED_TYPES: description = " ".join(path_util.normalize(path) for path in args.path) return "Upload %s" % (description,) return ""
def __init__(self, codalab_home): self.codalab_home = path_util.normalize(codalab_home) self.data = os.path.join(self.codalab_home, self.DATA_SUBDIRECTORY) self.temp = os.path.join(self.codalab_home, self.TEMP_SUBDIRECTORY) self.make_directories()
def upload(self, sources, follow_symlinks, exclude_patterns, git, unpack, remove_sources): ''' |sources|: specifies the locations of the contents to upload. Each element is either a URL or a local path. |follow_symlinks|: for local path(s), whether to follow (resolve) symlinks |exclude_patterns|: for local path(s), don't upload these patterns (e.g., *.o) |git|: for URL, whether |source| is a git repo to clone. |unpack|: for each source in |sources|, whether to unpack it if it's an archive. |remove_sources|: remove |sources|. If |sources| contains one source, then the bundle contents will be that source. Otherwise, the bundle contents will be a directory with each of the sources. Exceptions: - If |git|, then each source is replaced with the result of running 'git clone |source|' - If |unpack| is True or a source is an archive (zip, tar.gz, etc.), then unpack the source. Install the contents of the directory at |source| into DATA_SUBDIRECTORY in a subdirectory named by a hash of the contents. Return a (data_hash, metadata) pair, where the metadata is a dict mapping keys to precomputed statistics about the new data directory. ''' to_delete = [] # Create temporary directory as a staging area and put everything there. temp_path = tempfile.mkdtemp('-bundle_store_upload') temp_subpaths = [] for source in sources: # Where to save |source| to (might change this value if we unpack). temp_subpath = os.path.join(temp_path, os.path.basename(source)) if remove_sources: to_delete.append(source) source_unpack = unpack and zip_util.path_is_archive(source) if path_util.path_is_url(source): # Download the URL. print_util.open_line('BundleStore.upload: downloading %s to %s' % (source, temp_path)) if git: file_util.git_clone(source, temp_subpath) else: file_util.download_url(source, temp_subpath, print_status=True) if source_unpack: zip_util.unpack(temp_subpath, zip_util.strip_archive_ext(temp_subpath)) path_util.remove(temp_subpath) temp_subpath = zip_util.strip_archive_ext(temp_subpath) print_util.clear_line() else: # Copy the local path. source_path = path_util.normalize(source) path_util.check_isvalid(source_path, 'upload') # Recursively copy the directory into a new BundleStore temp directory. print_util.open_line('BundleStore.upload: %s => %s' % (source_path, temp_subpath)) if source_unpack: zip_util.unpack(source_path, zip_util.strip_archive_ext(temp_subpath)) temp_subpath = zip_util.strip_archive_ext(temp_subpath) else: if remove_sources: path_util.rename(source_path, temp_subpath) else: path_util.copy(source_path, temp_subpath, follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns) print_util.clear_line() temp_subpaths.append(temp_subpath) # If exactly one source, then upload that directly. if len(temp_subpaths) == 1: to_delete.append(temp_path) temp_path = temp_subpaths[0] # Multiplex between uploading a directory and uploading a file here. # All other path_util calls will use these lists of directories and files. if os.path.isdir(temp_path): dirs_and_files = path_util.recursive_ls(temp_path) else: dirs_and_files = ([], [temp_path]) # Hash the contents of the temporary directory, and then if there is no # data with this hash value, move this directory into the data directory. print_util.open_line('BundleStore.upload: hashing %s' % temp_path) data_hash = '0x%s' % (path_util.hash_directory(temp_path, dirs_and_files),) print_util.clear_line() print_util.open_line('BundleStore.upload: computing size of %s' % temp_path) data_size = path_util.get_size(temp_path, dirs_and_files) print_util.clear_line() final_path = os.path.join(self.data, data_hash) if os.path.exists(final_path): # Already exists, just delete it path_util.remove(temp_path) else: print >>sys.stderr, 'BundleStore.upload: moving %s to %s' % (temp_path, final_path) path_util.rename(temp_path, final_path) # Delete paths. for path in to_delete: if os.path.exists(path): path_util.remove(path) # After this operation there should always be a directory at the final path. assert(os.path.lexists(final_path)), 'Uploaded to %s failed!' % (final_path,) return (data_hash, {'data_size': data_size})
def upload(self, path, follow_symlinks): """ Copy the contents of the directory at |path| into the data subdirectory, in a subfolder named by a hash of the contents of the new data directory. If |path| is in a temporary directory, then we just move it. Return a (data_hash, metadata) pair, where the metadata is a dict mapping keys to precomputed statistics about the new data directory. """ # Create temporary directory as a staging area. # If |path| is already temporary, then we use that directly # (with the understanding that |path| will be moved) if not isinstance(path, list) and os.path.realpath(path).startswith(os.path.realpath(self.temp)): temp_path = path else: temp_path = os.path.join(self.temp, uuid.uuid4().hex) if not isinstance(path, list) and path_util.path_is_url(path): # Have to be careful. Want to make sure if we're fetching a URL # that points to a file, we are allowing this. if path.startswith("file://"): path_suffix = path[7:] if os.path.islink(path_suffix): raise UsageError("Not allowed to upload symlink %s" % path_suffix) if not any(path_suffix.startswith(f) for f in self.direct_upload_paths): raise UsageError( "Not allowed to upload %s (only %s allowed)" % (path_suffix, self.direct_upload_paths) ) # Download |path| if it is a URL. print >>sys.stderr, "BundleStore.upload: downloading %s to %s" % (path, temp_path) file_util.download_url(path, temp_path, print_status=True) elif path != temp_path: # Copy |path| into the temp_path. if isinstance(path, list): absolute_path = [path_util.normalize(p) for p in path] for p in absolute_path: path_util.check_isvalid(p, "upload") else: absolute_path = path_util.normalize(path) path_util.check_isvalid(absolute_path, "upload") # Recursively copy the directory into a new BundleStore temp directory. print >>sys.stderr, "BundleStore.upload: copying %s to %s" % (absolute_path, temp_path) path_util.copy(absolute_path, temp_path, follow_symlinks=follow_symlinks) # Multiplex between uploading a directory and uploading a file here. # All other path_util calls will use these lists of directories and files. if os.path.isdir(temp_path): dirs_and_files = path_util.recursive_ls(temp_path) else: dirs_and_files = ([], [temp_path]) # Hash the contents of the temporary directory, and then if there is no # data with this hash value, move this directory into the data directory. print >>sys.stderr, "BundleStore.upload: hashing %s" % (temp_path) data_hash = "0x%s" % (path_util.hash_directory(temp_path, dirs_and_files),) data_size = path_util.get_size(temp_path, dirs_and_files) final_path = os.path.join(self.data, data_hash) final_path_exists = False try: # If data_hash already exists, then we don't need to move it over. os.utime(final_path, None) final_path_exists = True except OSError, e: if e.errno == errno.ENOENT: print >>sys.stderr, "BundleStore.upload: moving %s to %s" % (temp_path, final_path) path_util.rename(temp_path, final_path) else: raise
def upload_to_bundle_store(self, bundle, sources, follow_symlinks, exclude_patterns, remove_sources, git, unpack, simplify_archives): """ Uploads contents for the given bundle to the bundle store. |sources|: specifies the locations of the contents to upload. Each element is either a URL, a local path or a tuple (filename, file-like object). |follow_symlinks|: for local path(s), whether to follow (resolve) symlinks, but only if remove_sources is False. |exclude_patterns|: for local path(s), don't upload these patterns (e.g., *.o), but only if remove_sources is False. |remove_sources|: for local path(s), whether |sources| should be removed |git|: for URLs, whether |source| is a git repo to clone. |unpack|: for each source in |sources|, whether to unpack it if it's an archive. |simplify_archives|: whether to simplify unpacked archives so that if they contain a single file, the final path is just that file, not a directory containing that file. If |sources| contains one source, then the bundle contents will be that source. Otherwise, the bundle contents will be a directory with each of the sources. Exceptions: - If |git|, then each source is replaced with the result of running 'git clone |source|' - If |unpack| is True or a source is an archive (zip, tar.gz, etc.), then unpack the source. """ bundle_path = self._bundle_store.get_bundle_location(bundle.uuid) try: path_util.make_directory(bundle_path) # Note that for uploads with a single source, the directory # structure is simplified at the end. for source in sources: is_url, is_local_path, is_fileobj, filename = self._interpret_source(source) source_output_path = os.path.join(bundle_path, filename) if is_url: if git: source_output_path = file_util.strip_git_ext(source_output_path) file_util.git_clone(source, source_output_path) else: file_util.download_url(source, source_output_path) if unpack and self._can_unpack_file(source_output_path): self._unpack_file( source_output_path, zip_util.strip_archive_ext(source_output_path), remove_source=True, simplify_archive=simplify_archives) elif is_local_path: source_path = path_util.normalize(source) path_util.check_isvalid(source_path, 'upload') if unpack and self._can_unpack_file(source_path): self._unpack_file( source_path, zip_util.strip_archive_ext(source_output_path), remove_source=remove_sources, simplify_archive=simplify_archives) elif remove_sources: path_util.rename(source_path, source_output_path) else: path_util.copy(source_path, source_output_path, follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns) elif is_fileobj: if unpack and zip_util.path_is_archive(filename): self._unpack_fileobj( source[0], source[1], zip_util.strip_archive_ext(source_output_path), simplify_archive=simplify_archives) else: with open(source_output_path, 'wb') as out: shutil.copyfileobj(source[1], out) if len(sources) == 1: self._simplify_directory(bundle_path) except: if os.path.exists(bundle_path): path_util.remove(bundle_path) raise
def upload(self, path, follow_symlinks, exclude_patterns): ''' Copy the contents of the directory at |path| into the data subdirectory, in a subfolder named by a hash of the contents of the new data directory. If |path| is in a temporary directory, then we just move it. Return a (data_hash, metadata) pair, where the metadata is a dict mapping keys to precomputed statistics about the new data directory. ''' # Create temporary directory as a staging area. # If |path| is already temporary, then we use that directly # (with the understanding that |path| will be moved) if not isinstance(path, list) and os.path.realpath(path).startswith( os.path.realpath(self.temp)): temp_path = path else: temp_path = os.path.join(self.temp, uuid.uuid4().hex) if not isinstance(path, list) and path_util.path_is_url(path): # Have to be careful. Want to make sure if we're fetching a URL # that points to a file, we are allowing this. if path.startswith('file://'): path_suffix = path[7:] if os.path.islink(path_suffix): raise UsageError('Not allowed to upload symlink %s' % path_suffix) if not any( path_suffix.startswith(f) for f in self.direct_upload_paths): raise UsageError( 'Not allowed to upload %s (only %s allowed)' % (path_suffix, self.direct_upload_paths)) # Download |path| if it is a URL. print >> sys.stderr, 'BundleStore.upload: downloading %s to %s' % ( path, temp_path) file_util.download_url(path, temp_path, print_status=True) elif path != temp_path: # Copy |path| into the temp_path. if isinstance(path, list): absolute_path = [path_util.normalize(p) for p in path] for p in absolute_path: path_util.check_isvalid(p, 'upload') else: absolute_path = path_util.normalize(path) path_util.check_isvalid(absolute_path, 'upload') # Recursively copy the directory into a new BundleStore temp directory. print_util.open_line('BundleStore.upload: copying %s to %s' % (absolute_path, temp_path)) path_util.copy(absolute_path, temp_path, follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns) print_util.clear_line() # Multiplex between uploading a directory and uploading a file here. # All other path_util calls will use these lists of directories and files. if os.path.isdir(temp_path): dirs_and_files = path_util.recursive_ls(temp_path) else: dirs_and_files = ([], [temp_path]) # Hash the contents of the temporary directory, and then if there is no # data with this hash value, move this directory into the data directory. print_util.open_line('BundleStore.upload: hashing %s' % temp_path) data_hash = '0x%s' % (path_util.hash_directory(temp_path, dirs_and_files), ) print_util.clear_line() print_util.open_line('BundleStore.upload: computing size of %s' % temp_path) data_size = path_util.get_size(temp_path, dirs_and_files) print_util.clear_line() final_path = os.path.join(self.data, data_hash) final_path_exists = False try: # If data_hash already exists, then we don't need to move it over. os.utime(final_path, None) final_path_exists = True except OSError, e: if e.errno == errno.ENOENT: print >> sys.stderr, 'BundleStore.upload: moving %s to %s' % ( temp_path, final_path) path_util.rename(temp_path, final_path) else: raise
def upload_to_bundle_store( self, bundle, sources, follow_symlinks, exclude_patterns, remove_sources, git, unpack, simplify_archives, ): """ Uploads contents for the given bundle to the bundle store. |sources|: specifies the locations of the contents to upload. Each element is either a URL, a local path or a tuple (filename, binary file-like object). |follow_symlinks|: for local path(s), whether to follow (resolve) symlinks, but only if remove_sources is False. |exclude_patterns|: for local path(s), don't upload these patterns (e.g., *.o), but only if remove_sources is False. |remove_sources|: for local path(s), whether |sources| should be removed |git|: for URLs, whether |source| is a git repo to clone. |unpack|: for each source in |sources|, whether to unpack it if it's an archive. |simplify_archives|: whether to simplify unpacked archives so that if they contain a single file, the final path is just that file, not a directory containing that file. If |sources| contains one source, then the bundle contents will be that source. Otherwise, the bundle contents will be a directory with each of the sources. Exceptions: - If |git|, then each source is replaced with the result of running 'git clone |source|' - If |unpack| is True or a source is an archive (zip, tar.gz, etc.), then unpack the source. """ exclude_patterns = (self._default_exclude_patterns + exclude_patterns if exclude_patterns else self._default_exclude_patterns) bundle_link_url = getattr(bundle.metadata, "link_url", None) if bundle_link_url: # Don't do anything for linked bundles. return bundle_path = self._bundle_store.get_bundle_location(bundle.uuid) try: path_util.make_directory(bundle_path) # Note that for uploads with a single source, the directory # structure is simplified at the end. for source in sources: is_url, is_local_path, is_fileobj, filename = self._interpret_source( source) source_output_path = os.path.join(bundle_path, filename) if is_url: if git: source_output_path = file_util.strip_git_ext( source_output_path) file_util.git_clone(source, source_output_path) else: file_util.download_url(source, source_output_path) if unpack and self._can_unpack_file( source_output_path): self._unpack_file( source_output_path, zip_util.strip_archive_ext(source_output_path), remove_source=True, simplify_archive=simplify_archives, ) elif is_local_path: source_path = path_util.normalize(source) path_util.check_isvalid(source_path, 'upload') if unpack and self._can_unpack_file(source_path): self._unpack_file( source_path, zip_util.strip_archive_ext(source_output_path), remove_source=remove_sources, simplify_archive=simplify_archives, ) elif remove_sources: path_util.rename(source_path, source_output_path) else: path_util.copy( source_path, source_output_path, follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns, ) elif is_fileobj: if unpack and zip_util.path_is_archive(filename): self._unpack_fileobj( source[0], source[1], zip_util.strip_archive_ext(source_output_path), simplify_archive=simplify_archives, ) else: with open(source_output_path, 'wb') as out: shutil.copyfileobj(source[1], out) if len(sources) == 1: self._simplify_directory(bundle_path) except: if os.path.exists(bundle_path): path_util.remove(bundle_path) raise
def home(self): from codalab.lib import path_util result = path_util.normalize(self.config['home']) path_util.make_directory(result) return result
def __init__(self, bundle_model, codalab_home): self._bundle_model = bundle_model self.codalab_home = path_util.normalize(codalab_home)