Exemplo n.º 1
0
def zip(path, follow_symlinks, exclude_names=[], file_name=None):
    '''
    Take a path to a file or directory and return the path to a zip archive
    containing its contents.
    '''
    if isinstance(path, list):
        for p in path:
            absolute_path = path_util.normalize(p)
            path_util.check_isvalid(absolute_path, 'zip_directory')
    else:
        absolute_path = path_util.normalize(path)
        path_util.check_isvalid(absolute_path, 'zip_directory')

    # Add proper name
    if file_name:
        sub_path = file_name
    else:
        sub_path = ZIP_SUBPATH

    # Recursively copy the directory into a temp directory.
    temp_path = tempfile.mkdtemp()
    temp_subpath = os.path.join(temp_path, sub_path)

    # TODO: this is inefficient; do the zipping from the original source
    # directly.
    if isinstance(path, list):
        os.mkdir(temp_subpath)
        for p in path:
            absolute_path = path_util.normalize(p)
            path_util.copy(absolute_path, os.path.join(temp_subpath, os.path.basename(p)), follow_symlinks=follow_symlinks, exclude_names=exclude_names)
    else:
        absolute_path = path_util.normalize(path)
        path_util.copy(absolute_path, temp_subpath, follow_symlinks=follow_symlinks, exclude_names=exclude_names)

    # TODO: These methods of zipping don't preserve permissions, so using a
    # system call for now (only works in Linux)
    # Multiplex between zipping a directory and zipping a file here, because
    # make_archive does NOT handle the file case cleanly.
    #if os.path.isdir(temp_subpath):
    #    zip_path = shutil.make_archive(
    #      base_name=temp_path,
    #      base_dir=ZIP_SUBPATH,
    #      root_dir=temp_path,
    #      format='zip',
    #    )
    #else:
    #    zip_path = temp_path + '.zip'
    #    with ZipFile(zip_path, 'w') as zip_file:
    #        zip_file.write(temp_subpath, ZIP_SUBPATH)
    # Clean up the temporary directory and return the zip file's path.

    zip_path = temp_path + '.zip'
    opts = '-qr'
    if not follow_symlinks: opts += ' --symlinks'
    if os.system("cd %s && zip %s %s %s" % (temp_path, opts, zip_path, sub_path)) != 0:
        raise UsageError('zip failed')

    path_util.remove(temp_path)
    return zip_path, sub_path
Exemplo n.º 2
0
 def test_normalize(self):
   test_pairs = [
     ('~', os.path.expanduser('~')),
     (os.curdir, os.getcwd()),
     (os.pardir, os.path.abspath(os.path.join(os.getcwd(), os.pardir))),
   ]
   for (test_path, expected_result) in test_pairs:
     actual_result = path_util.normalize(test_path)
     self.assertTrue(os.path.isabs(actual_result))
     self.assertEqual(actual_result, expected_result)
     # Test idempotency. An absolute path be a fixed point of normalize.
     self.assertEqual(path_util.normalize(actual_result), actual_result)
Exemplo n.º 3
0
 def test_normalize(self):
     test_pairs = [
         ('~', os.path.expanduser('~')),
         (os.curdir, os.getcwd()),
         (os.pardir, os.path.abspath(os.path.join(os.getcwd(), os.pardir))),
     ]
     for (test_path, expected_result) in test_pairs:
         actual_result = path_util.normalize(test_path)
         self.assertTrue(os.path.isabs(actual_result))
         self.assertEqual(actual_result, expected_result)
         # Test idempotency. An absolute path be a fixed point of normalize.
         self.assertEqual(path_util.normalize(actual_result), actual_result)
Exemplo n.º 4
0
 def codalab_home(self):
     from codalab.lib import path_util
     # Default to this directory in the user's home directory.
     # In the future, allow customization based on.
     result = path_util.normalize("~/.codalab")
     path_util.make_directory(result)
     return result
Exemplo n.º 5
0
def zip(path):
  '''
  Take a path to a file or directory and return the path to a zip archive
  containing its contents.
  '''
  absolute_path = path_util.normalize(path)
  path_util.check_isvalid(absolute_path, 'zip_directory')
  # Recursively copy the directory into a temp directory.
  temp_path = tempfile.mkdtemp()
  temp_subpath = os.path.join(temp_path, ZIP_SUBPATH)
  path_util.copy(absolute_path, temp_subpath)
  # Multiplex between zipping a directory and zipping a file here, because
  # make_archive does NOT handle the file case cleanly.
  if os.path.isdir(temp_subpath):
    zip_path = shutil.make_archive(
      base_name=temp_path,
      base_dir=ZIP_SUBPATH,
      root_dir=temp_path,
      format='zip',
    )
  else:
    zip_path = temp_path + '.zip'
    with ZipFile(zip_path, 'w') as zip_file:
      zip_file.write(temp_subpath, ZIP_SUBPATH)
  # Clean up the temporary directory and return the zip file's path.
  path_util.remove(temp_path)
  return zip_path
Exemplo n.º 6
0
 def do_upload_command(self, argv, parser):
     worksheet_uuid = self.env_model.get_current_worksheet()
     help_text = 'bundle_type: [%s]' % ('|'.join(sorted(UPLOADED_TYPES)))
     parser.add_argument('bundle_type', help=help_text)
     parser.add_argument('path', help='path of the directory to upload')
     # Add metadata arguments for UploadedBundle and all of its subclasses.
     metadata_keys = set()
     metadata_util.add_arguments(UploadedBundle, metadata_keys, parser)
     for bundle_type in UPLOADED_TYPES:
         bundle_subclass = get_bundle_subclass(bundle_type)
         metadata_util.add_arguments(bundle_subclass, metadata_keys, parser)
     metadata_util.add_auto_argument(parser)
     args = parser.parse_args(argv)
     # Check that the upload path exists.
     path_util.check_isvalid(path_util.normalize(args.path), 'upload')
     # Pull out the upload bundle type from the arguments and validate it.
     if args.bundle_type not in UPLOADED_TYPES:
         raise UsageError('Invalid bundle type %s (options: [%s])' % (
           args.bundle_type, '|'.join(sorted(UPLOADED_TYPES)),
         ))
     bundle_subclass = get_bundle_subclass(args.bundle_type)
     metadata = metadata_util.request_missing_data(bundle_subclass, args)
     # Type-check the bundle metadata BEFORE uploading the bundle data.
     # This optimization will avoid file copies on failed bundle creations.
     bundle_subclass.construct(data_hash='', metadata=metadata).validate()
     print self.client.upload(args.bundle_type, args.path, metadata, worksheet_uuid)
Exemplo n.º 7
0
 def codalab_home(self):
     from codalab.lib import path_util
     # Default to this directory in the user's home directory.
     # In the future, allow customization based on.
     home = os.getenv('CODALAB_HOME', '~/.codalab')
     home = path_util.normalize(home)
     path_util.make_directory(home)
     return home
 def setUpClass(cls):
     cls.test_root = path_util.normalize("~/.codalab_tests")
     path_util.make_directory(cls.test_root)
     cls.bundle_store = BundleStore(cls.test_root)
     cls.model = SQLiteModel(cls.test_root)
     users = [User('root', 0), User('user1', 1), User('user2', 2), User('user4', 4)]
     cls.auth_handler = MockAuthHandler(users)
     cls.client = LocalBundleClient('local', cls.bundle_store, cls.model, cls.auth_handler)
Exemplo n.º 9
0
 def __init__(self, codalab_home):
     '''
     codalab_home: data/ is where all the bundles are actually stored, temp/ is temporary
     '''
     self.codalab_home = path_util.normalize(codalab_home)
     self.data = os.path.join(self.codalab_home, self.DATA_SUBDIRECTORY)
     self.temp = os.path.join(self.codalab_home, self.TEMP_SUBDIRECTORY)
     self.make_directories()
Exemplo n.º 10
0
 def codalab_home(self):
     from codalab.lib import path_util
     # Default to this directory in the user's home directory.
     # In the future, allow customization based on.
     home = os.getenv('CODALAB_HOME', '~/.codalab')
     home = path_util.normalize(home)
     path_util.make_directory(home)
     return home
Exemplo n.º 11
0
 def get_default_name(bundle_subclass, args):
   if hasattr(args, 'path'):
     absolute_path = path_util.normalize(args.path)
     return os.path.basename(absolute_path)
   elif bundle_subclass is MakeBundle:
     if len(args.target) == 1 and ':' not in args.target[0]:
       return os.path.basename(args.target[0])
   return MetadataDefaults.get_anonymous_name(bundle_subclass)
 def setUpClass(cls):
     cls.test_root = path_util.normalize("~/.codalab_tests")
     path_util.make_directory(cls.test_root)
     cls.bundle_store = BundleStore(cls.test_root)
     cls.model = SQLiteModel("sqlite:///{}".format(os.path.join(cls.test_root, 'bundle.db')), {})
     cls.model.root_user_id = '0'
     users = [User('root', '0'), User('user1', '1'), User('user2', '2'), User('user4', '4')]
     cls.auth_handler = MockAuthHandler(users)
     cls.client = LocalBundleClient('local', cls.bundle_store, cls.model, cls.auth_handler, verbose=1)
Exemplo n.º 13
0
def zip(path, follow_symlinks, exclude_patterns, file_name):
    '''
    Take a path to a file or directory |path| and return the path to a zip archive
    containing its contents.  |file_name| is what the zip archive contains.
    '''
    if isinstance(path, list):
        for p in path:
            absolute_path = path_util.normalize(p)
            path_util.check_isvalid(absolute_path, 'zip_directory')
    else:
        absolute_path = path_util.normalize(path)
        path_util.check_isvalid(absolute_path, 'zip_directory')

    # Recursively copy the directory into a temp directory.
    temp_path = tempfile.mkdtemp()
    temp_subpath = os.path.join(temp_path, file_name)

    print_util.open_line('Copying %s to %s' % (path, temp_subpath))
    if isinstance(path, list):
        os.mkdir(temp_subpath)
        for p in path:
            absolute_path = path_util.normalize(p)
            path_util.copy(absolute_path,
                           os.path.join(temp_subpath, os.path.basename(p)),
                           follow_symlinks=follow_symlinks,
                           exclude_patterns=exclude_patterns)
    else:
        absolute_path = path_util.normalize(path)
        path_util.copy(absolute_path,
                       temp_subpath,
                       follow_symlinks=follow_symlinks,
                       exclude_patterns=exclude_patterns)
    print_util.clear_line()

    zip_path = temp_path + '.zip'
    opts = '-qr'
    if not follow_symlinks: opts += ' --symlinks'
    print_util.open_line('Zipping to %s' % zip_path)
    if os.system("cd %s && zip %s %s %s" %
                 (temp_path, opts, zip_path, file_name)) != 0:
        raise UsageError('zip failed')

    path_util.remove(temp_path)
    return zip_path
Exemplo n.º 14
0
 def __init__(self, codalab_home, direct_upload_paths):
     """
     codalab_home: data/ is where all the bundles are actually stored, temp/ is temporary
     direct_upload_paths: we can accept file://... uploads from these paths.
     """
     self.codalab_home = path_util.normalize(codalab_home)
     self.direct_upload_paths = direct_upload_paths
     self.data = os.path.join(self.codalab_home, self.DATA_SUBDIRECTORY)
     self.temp = os.path.join(self.codalab_home, self.TEMP_SUBDIRECTORY)
     self.make_directories()
Exemplo n.º 15
0
 def __init__(self, codalab_home, direct_upload_paths):
     '''
     codalab_home: data/ is where all the bundles are actually stored, temp/ is temporary
     direct_upload_paths: we can accept file://... uploads from these paths.
     '''
     self.codalab_home = path_util.normalize(codalab_home)
     self.direct_upload_paths = direct_upload_paths
     self.data = os.path.join(self.codalab_home, self.DATA_SUBDIRECTORY)
     self.temp = os.path.join(self.codalab_home, self.TEMP_SUBDIRECTORY)
     self.make_directories()
Exemplo n.º 16
0
    def __init__(self, codalab_home):
        self.codalab_home = path_util.normalize(codalab_home)

        self.partitions = os.path.join(self.codalab_home, 'partitions')
        self.mtemp = os.path.join(self.codalab_home, MultiDiskBundleStore.MISC_TEMP_SUBDIRECTORY)

        # Perform initialization first to ensure that directories will be populated
        super(MultiDiskBundleStore, self).__init__()
        nodes, _ = path_util.ls(self.partitions)
        self.nodes = nodes
        self.lru_cache = OrderedDict()
        super(MultiDiskBundleStore, self).__init__()
Exemplo n.º 17
0
    def __init__(self, codalab_home):
        self.codalab_home = path_util.normalize(codalab_home)

        self.partitions = os.path.join(self.codalab_home, 'partitions')
        path_util.make_directory(self.partitions)

        self.refresh_partitions()
        if self.__get_num_partitions(
        ) == 0:  # Ensure at least one partition exists.
            self.add_partition(None, 'default')

        self.lru_cache = OrderedDict()
Exemplo n.º 18
0
 def codalab_home(self):
     from codalab.lib import path_util
     # Default to this directory in the user's home directory.
     # In the future, allow customization based on.
     home = os.getenv('CODALAB_HOME', '~/.codalab')
     home = path_util.normalize(home)
     path_util.make_directory(home)
     # Global setting!  Make temp directory the same as the bundle store
     # temporary directory.  The default /tmp generally doesn't have enough
     # space.
     tempfile.tempdir = os.path.join(home, BundleStore.TEMP_SUBDIRECTORY)
     return home
Exemplo n.º 19
0
 def codalab_home(self):
     from codalab.lib import path_util
     # Default to this directory in the user's home directory.
     # In the future, allow customization based on.
     home = os.getenv('CODALAB_HOME', '~/.codalab')
     home = path_util.normalize(home)
     path_util.make_directory(home)
     # Global setting!  Make temp directory the same as the bundle store
     # temporary directory.  The default /tmp generally doesn't have enough
     # space.
     tempfile.tempdir = os.path.join(home, BundleStore.TEMP_SUBDIRECTORY)
     return home
Exemplo n.º 20
0
 def get_default_description(bundle_subclass, args):
   if bundle_subclass.BUNDLE_TYPE in UPLOADED_TYPES:
     absolute_path = path_util.normalize(args.path)
     return 'Upload %s' % (absolute_path,)
   elif bundle_subclass is MakeBundle:
     return 'Package %s' % (', '.join(args.target))
   elif bundle_subclass is RunBundle:
     return 'Run {program} on {input}: {command}'.format(
       program=args.program_target,
       input=args.input_target,
       command=repr(args.command),
     )
   return ''
 def setUpClass(cls):
     cls.test_root = path_util.normalize("~/.codalab_tests")
     path_util.make_directory(cls.test_root)
     cls.bundle_store = MultiDiskBundleStore(cls.test_root)
     cls.model = SQLiteModel("sqlite:///{}".format(os.path.join(cls.test_root, 'bundle.db')),
                             {'time_quota': 1e12, 'disk_quota': 1e12})
     cls.model.root_user_id = '0'
     users = [User('root', '0'), User('user1', '1'), User('user2', '2'), User('user4', '4')]
     cls.auth_handler = MockAuthHandler(users)
     for user in users:
         cls.model.add_user(user.name, user.name + '@codalab.org', '',
                            user_id=user.unique_id, is_verified=True)
     cls.client = LocalBundleClient('local', cls.bundle_store, cls.model, None, None, None, cls.auth_handler, verbose=1)
Exemplo n.º 22
0
    def __init__(self, codalab_home):
        self.codalab_home = path_util.normalize(codalab_home)

        self.partitions = os.path.join(self.codalab_home, 'partitions')
        self.mtemp = os.path.join(self.codalab_home,
                                  MultiDiskBundleStore.MISC_TEMP_SUBDIRECTORY)

        # Perform initialization first to ensure that directories will be populated
        super(MultiDiskBundleStore, self).__init__()
        nodes, _ = path_util.ls(self.partitions)

        self.ring = HashRing(nodes)
        super(MultiDiskBundleStore, self).__init__()
Exemplo n.º 23
0
def zip(path, follow_symlinks, exclude_patterns, file_name):
    '''
    Take a path to a file or directory |path| and return the path to a zip archive
    containing its contents.  |file_name| is what the zip archive contains.
    '''
    if isinstance(path, list):
        for p in path:
            absolute_path = path_util.normalize(p)
            path_util.check_isvalid(absolute_path, 'zip_directory')
    else:
        absolute_path = path_util.normalize(path)
        path_util.check_isvalid(absolute_path, 'zip_directory')

    # Recursively copy the directory into a temp directory.
    temp_path = tempfile.mkdtemp()
    temp_subpath = os.path.join(temp_path, file_name)

    print_util.open_line('Copying %s to %s' % (path, temp_subpath))
    if isinstance(path, list):
        os.mkdir(temp_subpath)
        for p in path:
            absolute_path = path_util.normalize(p)
            path_util.copy(absolute_path, os.path.join(temp_subpath, os.path.basename(p)), follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns)
    else:
        absolute_path = path_util.normalize(path)
        path_util.copy(absolute_path, temp_subpath, follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns)
    print_util.clear_line()

    zip_path = temp_path + '.zip'
    opts = '-qr'
    if not follow_symlinks: opts += ' --symlinks'
    print_util.open_line('Zipping to %s' % zip_path)
    if os.system("cd %s && zip %s %s %s" % (temp_path, opts, zip_path, file_name)) != 0:
        raise UsageError('zip failed')

    path_util.remove(temp_path)
    return zip_path
Exemplo n.º 24
0
 def get_default_name(bundle_subclass, args):
     if issubclass(bundle_subclass, UploadedBundle):
         items = []
         for path in args.path:
             absolute_path = path_util.normalize(path)
             items.append(os.path.basename(absolute_path))
         return spec_util.create_default_name(None, '-'.join(items))
     elif bundle_subclass is MakeBundle:
         if len(args.target_spec) == 1 and ':' not in args.target_spec[0]:  # direct link
             return os.path.basename(args.target_spec[0])
         else:  # multiple targets
             name = ' '.join(args.target_spec)
             return spec_util.create_default_name(bundle_subclass.BUNDLE_TYPE, str(name))
     elif bundle_subclass is RunBundle:
         return spec_util.create_default_name(bundle_subclass.BUNDLE_TYPE, args.command)
     else:
         raise UsageError("Unhandled class: %s" % bundle_subclass)
 def setUpClass(cls):
     cls.test_root = path_util.normalize("~/.codalab_tests")
     path_util.make_directory(cls.test_root)
     cls.bundle_store = BundleStore(cls.test_root, [])
     cls.model = SQLiteModel(cls.test_root)
     cls.model.root_user_id = '0'
     users = [
         User('root', '0'),
         User('user1', '1'),
         User('user2', '2'),
         User('user4', '4')
     ]
     cls.auth_handler = MockAuthHandler(users)
     cls.client = LocalBundleClient('local',
                                    cls.bundle_store,
                                    cls.model,
                                    cls.auth_handler,
                                    verbose=1)
 def get_default_name(bundle_subclass, args):
     if issubclass(bundle_subclass, UploadedBundle):
         items = []
         for path in args.path:
             absolute_path = path_util.normalize(path)
             items.append(os.path.basename(absolute_path))
         return spec_util.create_default_name(None, '-'.join(items))
     elif bundle_subclass is MakeBundle:
         if len(args.target_spec
                ) == 1 and ':' not in args.target_spec[0]:  # direct link
             return os.path.basename(args.target_spec[0])
         else:  # multiple targets
             name = ' '.join(args.target_spec)
             return spec_util.create_default_name(
                 bundle_subclass.BUNDLE_TYPE, str(name))
     elif bundle_subclass is RunBundle:
         return spec_util.create_default_name(bundle_subclass.BUNDLE_TYPE,
                                              args.command)
     else:
         raise UsageError("Unhandled class: %s" % bundle_subclass)
 def setUpClass(cls):
     cls.test_root = path_util.normalize("~/.codalab_tests")
     path_util.make_directory(cls.test_root)
     cls.bundle_store = MultiDiskBundleStore(cls.test_root)
     cls.model = SQLiteModel(
         "sqlite:///{}".format(os.path.join(cls.test_root, 'bundle.db')),
         {})
     cls.model.root_user_id = '0'
     users = [
         User('root', '0'),
         User('user1', '1'),
         User('user2', '2'),
         User('user4', '4')
     ]
     cls.auth_handler = MockAuthHandler(users)
     cls.client = LocalBundleClient('local',
                                    cls.bundle_store,
                                    cls.model,
                                    None,
                                    cls.auth_handler,
                                    verbose=1)
Exemplo n.º 28
0
    def upload(self, path, allow_symlinks=False):
        '''
        Copy the contents of the directory at path into the data subdirectory,
        in a subfolder named by a hash of the contents of the new data directory.

        Return a (data_hash, metadata) pair, where the metadata is a dict mapping
        keys to precomputed statistics about the new data directory.
        '''
        absolute_path = path_util.normalize(path)
        path_util.check_isvalid(absolute_path, 'upload')
        # Recursively copy the directory into a new BundleStore temp directory.
        temp_directory = uuid.uuid4().hex
        temp_path = os.path.join(self.temp, temp_directory)
        path_util.copy(absolute_path, temp_path)
        # Multiplex between uploading a directory and uploading a file here.
        # All other path_util calls will use these lists of directories and files.
        if os.path.isdir(temp_path):
            dirs_and_files = path_util.recursive_ls(temp_path)
        else:
            dirs_and_files = ([], [temp_path])
        if not allow_symlinks:
            path_util.check_for_symlinks(temp_path, dirs_and_files)
        path_util.set_permissions(temp_path, 0o755, dirs_and_files)
        # Hash the contents of the temporary directory, and then if there is no
        # data with this hash value, move this directory into the data directory.
        data_hash = '0x%s' % (path_util.hash_directory(temp_path, dirs_and_files),)
        data_size = path_util.get_size(temp_path, dirs_and_files)
        final_path = os.path.join(self.data, data_hash)
        final_path_exists = False
        try:
            os.utime(final_path, None)
            final_path_exists = True
        except OSError, e:
            if e.errno == errno.ENOENT:
                os.rename(temp_path, final_path)
            else:
                raise
Exemplo n.º 29
0
    def upload(self, sources, follow_symlinks, exclude_patterns, git, unpack, remove_sources, uuid):
        """
        |sources|: specifies the locations of the contents to upload.  Each element is either a URL or a local path.
        |follow_symlinks|: for local path(s), whether to follow (resolve) symlinks
        |exclude_patterns|: for local path(s), don't upload these patterns (e.g., *.o)
        |git|: for URL, whether |source| is a git repo to clone.
        |unpack|: for each source in |sources|, whether to unpack it if it's an archive.
        |remove_sources|: remove |sources|.

        If |sources| contains one source, then the bundle contents will be that source.
        Otherwise, the bundle contents will be a directory with each of the sources.
        Exceptions:
        - If |git|, then each source is replaced with the result of running 'git clone |source|'
        - If |unpack| is True or a source is an archive (zip, tar.gz, etc.), then unpack the source.

        Install the contents of the directory at |source| into
        DATA_SUBDIRECTORY in a subdirectory named by a hash of the contents.

        Return a (data_hash, metadata) pair, where the metadata is a dict mapping
        keys to precomputed statistics about the new data directory.
        """
        to_delete = []

        # If just a single file, set the final path to be equal to that file
        single_path = len(sources) == 1

        # Determine which disk this will go on
        disk_choice = self.ring.get_node(uuid)

        final_path = os.path.join(self.partitions, disk_choice, self.DATA_SUBDIRECTORY, uuid)
        if os.path.exists(final_path):
            raise UsageError('Path %s already present in bundle store' % final_path)
        # Only make if not there
        elif not single_path:
            path_util.make_directory(final_path)

        # Paths to resources
        subpaths = []

        for source in sources:
            # Where to save |source| to (might change this value if we unpack).
            if not single_path:
                subpath = os.path.join(final_path, os.path.basename(source))
            else:
                subpath = final_path

            if remove_sources:
                to_delete.append(source)
            source_unpack = unpack and zip_util.path_is_archive(source)

            if source_unpack and single_path:
                # Load the file into the bundle store under the given path
                subpath += zip_util.get_archive_ext(source)

            if path_util.path_is_url(source):
                # Download the URL.
                print_util.open_line('BundleStore.upload: downloading %s to %s' % (source, subpath))
                if git:
                    file_util.git_clone(source, subpath)
                else:
                    file_util.download_url(source, subpath, print_status=True)
                    if source_unpack:
                        zip_util.unpack(subpath, zip_util.strip_archive_ext(subpath))
                        path_util.remove(subpath)
                        subpath = zip_util.strip_archive_ext(subpath)
                print_util.clear_line()
            else:
                # Copy the local path.
                source_path = path_util.normalize(source)
                path_util.check_isvalid(source_path, 'upload')

                # Recursively copy the directory into the BundleStore
                print_util.open_line('BundleStore.upload: %s => %s' % (source_path, subpath))
                if source_unpack:
                    zip_util.unpack(source_path, zip_util.strip_archive_ext(subpath))
                    subpath = zip_util.strip_archive_ext(subpath)
                else:
                    if remove_sources:
                        path_util.rename(source_path, subpath)
                    else:
                        path_util.copy(source_path, subpath, follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns)
                print_util.clear_line()

            subpaths.append(subpath)

        dirs_and_files = None
        if os.path.isdir(final_path):
            dirs_and_files = path_util.recursive_ls(final_path)
        else:
            dirs_and_files = [], [final_path]

        # Hash the contents of the bundle directory. Update the data_hash attribute
        # for the bundle
        print_util.open_line('BundleStore.upload: hashing %s' % final_path)
        data_hash = '0x%s' % (path_util.hash_directory(final_path, dirs_and_files))
        print_util.clear_line()
        print_util.open_line('BundleStore.upload: computing size of %s' % final_path)
        data_size = path_util.get_size(final_path, dirs_and_files)
        print_util.clear_line()

        # Delete paths.
        for path in to_delete:
            if os.path.exists(path):
                path_util.remove(path)

        # After this operation there should always be a directory at the final path.
        assert (os.path.lexists(final_path)), 'Uploaded to %s failed!' % (final_path,)
        return (data_hash, {'data_size': data_size})
 def get_default_description(bundle_subclass, args):
     if bundle_subclass.BUNDLE_TYPE in UPLOADED_TYPES:
         description = " ".join(path_util.normalize(path) for path in args.path)
         return "Upload %s" % (description,)
     return ""
Exemplo n.º 31
0
 def __init__(self, codalab_home):
     self.codalab_home = path_util.normalize(codalab_home)
     self.data = os.path.join(self.codalab_home, self.DATA_SUBDIRECTORY)
     self.temp = os.path.join(self.codalab_home, self.TEMP_SUBDIRECTORY)
     self.make_directories()
Exemplo n.º 32
0
    def upload(self, sources, follow_symlinks, exclude_patterns, git, unpack, remove_sources):
        '''
        |sources|: specifies the locations of the contents to upload.  Each element is either a URL or a local path.
        |follow_symlinks|: for local path(s), whether to follow (resolve) symlinks
        |exclude_patterns|: for local path(s), don't upload these patterns (e.g., *.o)
        |git|: for URL, whether |source| is a git repo to clone.
        |unpack|: for each source in |sources|, whether to unpack it if it's an archive.
        |remove_sources|: remove |sources|.

        If |sources| contains one source, then the bundle contents will be that source.
        Otherwise, the bundle contents will be a directory with each of the sources.
        Exceptions:
        - If |git|, then each source is replaced with the result of running 'git clone |source|'
        - If |unpack| is True or a source is an archive (zip, tar.gz, etc.), then unpack the source.

        Install the contents of the directory at |source| into
        DATA_SUBDIRECTORY in a subdirectory named by a hash of the contents.

        Return a (data_hash, metadata) pair, where the metadata is a dict mapping
        keys to precomputed statistics about the new data directory.
        '''
        to_delete = []

        # Create temporary directory as a staging area and put everything there.
        temp_path = tempfile.mkdtemp('-bundle_store_upload')
        temp_subpaths = []
        for source in sources:
            # Where to save |source| to (might change this value if we unpack).
            temp_subpath = os.path.join(temp_path, os.path.basename(source))
            if remove_sources:
                to_delete.append(source)
            source_unpack = unpack and zip_util.path_is_archive(source)

            if path_util.path_is_url(source):
                # Download the URL.
                print_util.open_line('BundleStore.upload: downloading %s to %s' % (source, temp_path))
                if git:
                    file_util.git_clone(source, temp_subpath)
                else:
                    file_util.download_url(source, temp_subpath, print_status=True)
                    if source_unpack:
                        zip_util.unpack(temp_subpath, zip_util.strip_archive_ext(temp_subpath))
                        path_util.remove(temp_subpath)
                        temp_subpath = zip_util.strip_archive_ext(temp_subpath)
                print_util.clear_line()
            else:
                # Copy the local path.
                source_path = path_util.normalize(source)
                path_util.check_isvalid(source_path, 'upload')

                # Recursively copy the directory into a new BundleStore temp directory.
                print_util.open_line('BundleStore.upload: %s => %s' % (source_path, temp_subpath))
                if source_unpack:
                    zip_util.unpack(source_path, zip_util.strip_archive_ext(temp_subpath))
                    temp_subpath = zip_util.strip_archive_ext(temp_subpath)
                else:
                    if remove_sources:
                        path_util.rename(source_path, temp_subpath)
                    else:
                        path_util.copy(source_path, temp_subpath, follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns)
                print_util.clear_line()

            temp_subpaths.append(temp_subpath)

        # If exactly one source, then upload that directly.
        if len(temp_subpaths) == 1:
            to_delete.append(temp_path)
            temp_path = temp_subpaths[0]

        # Multiplex between uploading a directory and uploading a file here.
        # All other path_util calls will use these lists of directories and files.
        if os.path.isdir(temp_path):
            dirs_and_files = path_util.recursive_ls(temp_path)
        else:
            dirs_and_files = ([], [temp_path])

        # Hash the contents of the temporary directory, and then if there is no
        # data with this hash value, move this directory into the data directory.
        print_util.open_line('BundleStore.upload: hashing %s' % temp_path)
        data_hash = '0x%s' % (path_util.hash_directory(temp_path, dirs_and_files),)
        print_util.clear_line()
        print_util.open_line('BundleStore.upload: computing size of %s' % temp_path)
        data_size = path_util.get_size(temp_path, dirs_and_files)
        print_util.clear_line()
        final_path = os.path.join(self.data, data_hash)
        if os.path.exists(final_path):
            # Already exists, just delete it
            path_util.remove(temp_path)
        else:
            print >>sys.stderr, 'BundleStore.upload: moving %s to %s' % (temp_path, final_path)
            path_util.rename(temp_path, final_path)

        # Delete paths.
        for path in to_delete:
            if os.path.exists(path):
                path_util.remove(path)

        # After this operation there should always be a directory at the final path.
        assert(os.path.lexists(final_path)), 'Uploaded to %s failed!' % (final_path,)
        return (data_hash, {'data_size': data_size})
Exemplo n.º 33
0
    def upload(self, path, follow_symlinks):
        """
        Copy the contents of the directory at |path| into the data subdirectory,
        in a subfolder named by a hash of the contents of the new data directory.
        If |path| is in a temporary directory, then we just move it.

        Return a (data_hash, metadata) pair, where the metadata is a dict mapping
        keys to precomputed statistics about the new data directory.
        """
        # Create temporary directory as a staging area.
        # If |path| is already temporary, then we use that directly
        # (with the understanding that |path| will be moved)
        if not isinstance(path, list) and os.path.realpath(path).startswith(os.path.realpath(self.temp)):
            temp_path = path
        else:
            temp_path = os.path.join(self.temp, uuid.uuid4().hex)

        if not isinstance(path, list) and path_util.path_is_url(path):
            # Have to be careful.  Want to make sure if we're fetching a URL
            # that points to a file, we are allowing this.
            if path.startswith("file://"):
                path_suffix = path[7:]
                if os.path.islink(path_suffix):
                    raise UsageError("Not allowed to upload symlink %s" % path_suffix)
                if not any(path_suffix.startswith(f) for f in self.direct_upload_paths):
                    raise UsageError(
                        "Not allowed to upload %s (only %s allowed)" % (path_suffix, self.direct_upload_paths)
                    )

            # Download |path| if it is a URL.
            print >>sys.stderr, "BundleStore.upload: downloading %s to %s" % (path, temp_path)
            file_util.download_url(path, temp_path, print_status=True)
        elif path != temp_path:
            # Copy |path| into the temp_path.
            if isinstance(path, list):
                absolute_path = [path_util.normalize(p) for p in path]
                for p in absolute_path:
                    path_util.check_isvalid(p, "upload")
            else:
                absolute_path = path_util.normalize(path)
                path_util.check_isvalid(absolute_path, "upload")

            # Recursively copy the directory into a new BundleStore temp directory.
            print >>sys.stderr, "BundleStore.upload: copying %s to %s" % (absolute_path, temp_path)
            path_util.copy(absolute_path, temp_path, follow_symlinks=follow_symlinks)

        # Multiplex between uploading a directory and uploading a file here.
        # All other path_util calls will use these lists of directories and files.
        if os.path.isdir(temp_path):
            dirs_and_files = path_util.recursive_ls(temp_path)
        else:
            dirs_and_files = ([], [temp_path])

        # Hash the contents of the temporary directory, and then if there is no
        # data with this hash value, move this directory into the data directory.
        print >>sys.stderr, "BundleStore.upload: hashing %s" % (temp_path)
        data_hash = "0x%s" % (path_util.hash_directory(temp_path, dirs_and_files),)
        data_size = path_util.get_size(temp_path, dirs_and_files)
        final_path = os.path.join(self.data, data_hash)
        final_path_exists = False
        try:
            # If data_hash already exists, then we don't need to move it over.
            os.utime(final_path, None)
            final_path_exists = True
        except OSError, e:
            if e.errno == errno.ENOENT:
                print >>sys.stderr, "BundleStore.upload: moving %s to %s" % (temp_path, final_path)
                path_util.rename(temp_path, final_path)
            else:
                raise
Exemplo n.º 34
0
    def upload_to_bundle_store(self, bundle, sources, follow_symlinks, exclude_patterns, remove_sources, git, unpack, simplify_archives):
        """
        Uploads contents for the given bundle to the bundle store.

        |sources|: specifies the locations of the contents to upload. Each element is
                   either a URL, a local path or a tuple (filename, file-like object).
        |follow_symlinks|: for local path(s), whether to follow (resolve) symlinks,
                           but only if remove_sources is False.
        |exclude_patterns|: for local path(s), don't upload these patterns (e.g., *.o),
                            but only if remove_sources is False.
        |remove_sources|: for local path(s), whether |sources| should be removed
        |git|: for URLs, whether |source| is a git repo to clone.
        |unpack|: for each source in |sources|, whether to unpack it if it's an archive.
        |simplify_archives|: whether to simplify unpacked archives so that if they
                             contain a single file, the final path is just that file,
                             not a directory containing that file.
    
        If |sources| contains one source, then the bundle contents will be that source.
        Otherwise, the bundle contents will be a directory with each of the sources.
        Exceptions:
        - If |git|, then each source is replaced with the result of running 'git clone |source|'
        - If |unpack| is True or a source is an archive (zip, tar.gz, etc.), then unpack the source.
        """
        bundle_path = self._bundle_store.get_bundle_location(bundle.uuid)
        try:
            path_util.make_directory(bundle_path)
            # Note that for uploads with a single source, the directory
            # structure is simplified at the end.
            for source in sources:
                is_url, is_local_path, is_fileobj, filename = self._interpret_source(source)
                source_output_path = os.path.join(bundle_path, filename)
    
                if is_url:
                    if git:
                        source_output_path = file_util.strip_git_ext(source_output_path)
                        file_util.git_clone(source, source_output_path)
                    else:
                        file_util.download_url(source, source_output_path)
                        if unpack and self._can_unpack_file(source_output_path):
                            self._unpack_file(
                                source_output_path, zip_util.strip_archive_ext(source_output_path),
                                remove_source=True, simplify_archive=simplify_archives)
                elif is_local_path:
                    source_path = path_util.normalize(source)
                    path_util.check_isvalid(source_path, 'upload')
                    
                    if unpack and self._can_unpack_file(source_path):
                        self._unpack_file(
                            source_path, zip_util.strip_archive_ext(source_output_path),
                            remove_source=remove_sources, simplify_archive=simplify_archives)
                    elif remove_sources:
                        path_util.rename(source_path, source_output_path)
                    else:
                        path_util.copy(source_path, source_output_path, follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns)
                elif is_fileobj:
                    if unpack and zip_util.path_is_archive(filename):
                        self._unpack_fileobj(
                            source[0], source[1],
                            zip_util.strip_archive_ext(source_output_path),
                            simplify_archive=simplify_archives)
                    else:
                        with open(source_output_path, 'wb') as out:
                            shutil.copyfileobj(source[1], out)

            if len(sources) == 1:
                self._simplify_directory(bundle_path)
        except:
            if os.path.exists(bundle_path):
                path_util.remove(bundle_path)
            raise
Exemplo n.º 35
0
    def upload(self, path, follow_symlinks, exclude_patterns):
        '''
        Copy the contents of the directory at |path| into the data subdirectory,
        in a subfolder named by a hash of the contents of the new data directory.
        If |path| is in a temporary directory, then we just move it.

        Return a (data_hash, metadata) pair, where the metadata is a dict mapping
        keys to precomputed statistics about the new data directory.
        '''
        # Create temporary directory as a staging area.
        # If |path| is already temporary, then we use that directly
        # (with the understanding that |path| will be moved)
        if not isinstance(path, list) and os.path.realpath(path).startswith(
                os.path.realpath(self.temp)):
            temp_path = path
        else:
            temp_path = os.path.join(self.temp, uuid.uuid4().hex)

        if not isinstance(path, list) and path_util.path_is_url(path):
            # Have to be careful.  Want to make sure if we're fetching a URL
            # that points to a file, we are allowing this.
            if path.startswith('file://'):
                path_suffix = path[7:]
                if os.path.islink(path_suffix):
                    raise UsageError('Not allowed to upload symlink %s' %
                                     path_suffix)
                if not any(
                        path_suffix.startswith(f)
                        for f in self.direct_upload_paths):
                    raise UsageError(
                        'Not allowed to upload %s (only %s allowed)' %
                        (path_suffix, self.direct_upload_paths))

            # Download |path| if it is a URL.
            print >> sys.stderr, 'BundleStore.upload: downloading %s to %s' % (
                path, temp_path)
            file_util.download_url(path, temp_path, print_status=True)
        elif path != temp_path:
            # Copy |path| into the temp_path.
            if isinstance(path, list):
                absolute_path = [path_util.normalize(p) for p in path]
                for p in absolute_path:
                    path_util.check_isvalid(p, 'upload')
            else:
                absolute_path = path_util.normalize(path)
                path_util.check_isvalid(absolute_path, 'upload')

            # Recursively copy the directory into a new BundleStore temp directory.
            print_util.open_line('BundleStore.upload: copying %s to %s' %
                                 (absolute_path, temp_path))
            path_util.copy(absolute_path,
                           temp_path,
                           follow_symlinks=follow_symlinks,
                           exclude_patterns=exclude_patterns)
            print_util.clear_line()

        # Multiplex between uploading a directory and uploading a file here.
        # All other path_util calls will use these lists of directories and files.
        if os.path.isdir(temp_path):
            dirs_and_files = path_util.recursive_ls(temp_path)
        else:
            dirs_and_files = ([], [temp_path])

        # Hash the contents of the temporary directory, and then if there is no
        # data with this hash value, move this directory into the data directory.
        print_util.open_line('BundleStore.upload: hashing %s' % temp_path)
        data_hash = '0x%s' % (path_util.hash_directory(temp_path,
                                                       dirs_and_files), )
        print_util.clear_line()
        print_util.open_line('BundleStore.upload: computing size of %s' %
                             temp_path)
        data_size = path_util.get_size(temp_path, dirs_and_files)
        print_util.clear_line()
        final_path = os.path.join(self.data, data_hash)
        final_path_exists = False
        try:
            # If data_hash already exists, then we don't need to move it over.
            os.utime(final_path, None)
            final_path_exists = True
        except OSError, e:
            if e.errno == errno.ENOENT:
                print >> sys.stderr, 'BundleStore.upload: moving %s to %s' % (
                    temp_path, final_path)
                path_util.rename(temp_path, final_path)
            else:
                raise
Exemplo n.º 36
0
    def upload_to_bundle_store(
        self,
        bundle,
        sources,
        follow_symlinks,
        exclude_patterns,
        remove_sources,
        git,
        unpack,
        simplify_archives,
    ):
        """
        Uploads contents for the given bundle to the bundle store.

        |sources|: specifies the locations of the contents to upload. Each element is
                   either a URL, a local path or a tuple (filename, binary file-like object).
        |follow_symlinks|: for local path(s), whether to follow (resolve) symlinks,
                           but only if remove_sources is False.
        |exclude_patterns|: for local path(s), don't upload these patterns (e.g., *.o),
                            but only if remove_sources is False.
        |remove_sources|: for local path(s), whether |sources| should be removed
        |git|: for URLs, whether |source| is a git repo to clone.
        |unpack|: for each source in |sources|, whether to unpack it if it's an archive.
        |simplify_archives|: whether to simplify unpacked archives so that if they
                             contain a single file, the final path is just that file,
                             not a directory containing that file.

        If |sources| contains one source, then the bundle contents will be that source.
        Otherwise, the bundle contents will be a directory with each of the sources.
        Exceptions:
        - If |git|, then each source is replaced with the result of running 'git clone |source|'
        - If |unpack| is True or a source is an archive (zip, tar.gz, etc.), then unpack the source.
        """
        exclude_patterns = (self._default_exclude_patterns +
                            exclude_patterns if exclude_patterns else
                            self._default_exclude_patterns)
        bundle_link_url = getattr(bundle.metadata, "link_url", None)
        if bundle_link_url:
            # Don't do anything for linked bundles.
            return
        bundle_path = self._bundle_store.get_bundle_location(bundle.uuid)
        try:
            path_util.make_directory(bundle_path)
            # Note that for uploads with a single source, the directory
            # structure is simplified at the end.
            for source in sources:
                is_url, is_local_path, is_fileobj, filename = self._interpret_source(
                    source)
                source_output_path = os.path.join(bundle_path, filename)
                if is_url:
                    if git:
                        source_output_path = file_util.strip_git_ext(
                            source_output_path)
                        file_util.git_clone(source, source_output_path)
                    else:
                        file_util.download_url(source, source_output_path)
                        if unpack and self._can_unpack_file(
                                source_output_path):
                            self._unpack_file(
                                source_output_path,
                                zip_util.strip_archive_ext(source_output_path),
                                remove_source=True,
                                simplify_archive=simplify_archives,
                            )
                elif is_local_path:
                    source_path = path_util.normalize(source)
                    path_util.check_isvalid(source_path, 'upload')

                    if unpack and self._can_unpack_file(source_path):
                        self._unpack_file(
                            source_path,
                            zip_util.strip_archive_ext(source_output_path),
                            remove_source=remove_sources,
                            simplify_archive=simplify_archives,
                        )
                    elif remove_sources:
                        path_util.rename(source_path, source_output_path)
                    else:
                        path_util.copy(
                            source_path,
                            source_output_path,
                            follow_symlinks=follow_symlinks,
                            exclude_patterns=exclude_patterns,
                        )
                elif is_fileobj:
                    if unpack and zip_util.path_is_archive(filename):
                        self._unpack_fileobj(
                            source[0],
                            source[1],
                            zip_util.strip_archive_ext(source_output_path),
                            simplify_archive=simplify_archives,
                        )
                    else:
                        with open(source_output_path, 'wb') as out:
                            shutil.copyfileobj(source[1], out)

            if len(sources) == 1:
                self._simplify_directory(bundle_path)
        except:
            if os.path.exists(bundle_path):
                path_util.remove(bundle_path)
            raise
Exemplo n.º 37
0
 def home(self):
     from codalab.lib import path_util
     result = path_util.normalize(self.config['home'])
     path_util.make_directory(result)
     return result
Exemplo n.º 38
0
 def __init__(self, bundle_model, codalab_home):
     self._bundle_model = bundle_model
     self.codalab_home = path_util.normalize(codalab_home)