示例#1
0
def upload(args):
    """
    Given a folder or file, upload all the folders and files contained
    within it, skipping ones that already exist on the remote.
    """
    base_remote_path, path_dict = Object.validate_full_path(
        args.full_path, vault=args.vault, path=args.path)

    # Assert the vault exists and is accessible
    vault = Vault.get_by_full_path(path_dict['vault_full_path'])

    # If not the vault root, validate remote path exists and is a folder
    if path_dict['path'] != '/':
        Object.get_by_full_path(base_remote_path, assert_type='folder')

    for local_path in args.local_path:
        local_path = local_path.rstrip('/')
        local_start = os.path.basename(local_path)

        if os.path.isdir(local_path):
            _upload_folder(path_dict['domain'], vault,
                           base_remote_path, local_path, local_start)
        else:
            Object.upload_file(local_path, path_dict['path'],
                               vault.full_path)
示例#2
0
def _create_folder(vault, full_path, tags=None):
    """Create a folder if not exists"""
    full_path, path_dict = \
        Object.validate_full_path(full_path)
    folder_name = path_dict['filename']

    try:
        new_obj = Object.get_by_full_path(full_path)
        if not new_obj.is_folder:
            raise SolveError('Object type {} already exists at location: {}'
                             .format(new_obj.object_type, full_path))
    except NotFoundError:
        # Create the folder
        if path_dict['parent_path'] == '/':
            parent_object_id = None
        else:
            parent = Object.get_by_full_path(path_dict['parent_full_path'],
                                             assert_type='folder')
            parent_object_id = parent.id

        # Make the API call
        new_obj = Object.create(
            vault_id=vault.id,
            parent_object_id=parent_object_id,
            object_type='folder',
            filename=folder_name,
            tags=tags or []
        )

        print('Notice: Folder created for {0} at {1}'
              .format(folder_name, new_obj.path))

    return new_obj
示例#3
0
def upload(args):
    """
    Given a folder or file, upload all the folders and files contained
    within it, skipping ones that already exist on the remote.
    """
    base_remote_path, path_dict = Object.validate_full_path(args.full_path,
                                                            vault=args.vault,
                                                            path=args.path)

    # Assert the vault exists and is accessible
    vault = Vault.get_by_full_path(path_dict['vault_full_path'])

    # If not the vault root, validate remote path exists and is a folder
    if path_dict['path'] != '/':
        Object.get_by_full_path(base_remote_path, assert_type='folder')

    for local_path in args.local_path:
        local_path = local_path.rstrip('/')
        local_start = os.path.basename(local_path)

        if os.path.isdir(local_path):
            _upload_folder(path_dict['domain'], vault, base_remote_path,
                           local_path, local_start)
        else:
            Object.upload_file(local_path, path_dict['path'], vault.full_path)
示例#4
0
def import_file(args):
    """
    Given a dataset and a local path, upload and import the file(s).

    Command arguments (args):

        * create_dataset
        * template_id
        * full_path
        * vault (optional, overrides the vault in full_path)
        * path (optional, overrides the path in full_path)
        * commit_mode
        * capacity
        * file (list)
        * follow (default: False)

    """
    full_path, path_dict = Object.validate_full_path(
        args.full_path, vault=args.vault, path=args.path)

    # Ensure the dataset exists. Create if necessary.
    if args.create_dataset:
        dataset = create_dataset(args)
    else:
        try:
            dataset = solvebio.Dataset.get_by_full_path(full_path)
        except solvebio.SolveError as e:
            if e.status_code != 404:
                raise e

            print("Dataset not found: {0}".format(full_path))
            print("Tip: use the --create-dataset flag "
                  "to create one from a template")
            sys.exit(1)

    # Generate a manifest from the local files
    manifest = solvebio.Manifest()
    manifest.add(*args.file)

    # Create the manifest-based import
    imp = solvebio.DatasetImport.create(
        dataset_id=dataset.id,
        manifest=manifest.manifest,
        commit_mode=args.commit_mode
    )

    if args.follow:
        imp.follow()
    else:
        mesh_url = 'https://my.solvebio.com/activity/'
        print("Your import has been submitted, view details at: {0}"
              .format(mesh_url))
示例#5
0
def import_file(args):
    """
    Given a dataset and a local path, upload and import the file(s).

    Command arguments (args):

        * create_dataset
        * template_id
        * full_path
        * vault (optional, overrides the vault in full_path)
        * path (optional, overrides the path in full_path)
        * commit_mode
        * capacity
        * file (list)
        * follow (default: False)

    """
    full_path, path_dict = Object.validate_full_path(args.full_path,
                                                     vault=args.vault,
                                                     path=args.path)

    # Ensure the dataset exists. Create if necessary.
    if args.create_dataset:
        dataset = create_dataset(args)
    else:
        try:
            dataset = solvebio.Dataset.get_by_full_path(full_path)
        except solvebio.SolveError as e:
            if e.status_code != 404:
                raise e

            print("Dataset not found: {0}".format(full_path))
            print("Tip: use the --create-dataset flag "
                  "to create one from a template")
            sys.exit(1)

    # Generate a manifest from the local files
    manifest = solvebio.Manifest()
    manifest.add(*args.file)

    # Create the manifest-based import
    imp = solvebio.DatasetImport.create(dataset_id=dataset.id,
                                        manifest=manifest.manifest,
                                        commit_mode=args.commit_mode)

    if args.follow:
        imp.follow()
    else:
        mesh_url = 'https://my.solvebio.com/activity/'
        print("Your import has been submitted, view details at: {0}".format(
            mesh_url))
示例#6
0
    def get_or_create_by_full_path(cls, full_path, **kwargs):
        from solvebio import Vault
        from solvebio import Object

        _client = kwargs.pop('client', None) or cls._client or client
        create_vault = kwargs.pop('create_vault', False)
        create_folders = kwargs.pop('create_folders', True)

        # Check for object type assertion, if not explicitly added, see
        # if user has passed object_type, as their intent was to get/create
        # an object of that type.
        assert_type = kwargs.pop('assert_type',
                                 kwargs.get('object_type', None))

        try:
            return cls.get_by_full_path(full_path,
                                        assert_type=assert_type,
                                        client=_client)
        except NotFoundError:
            pass

        # Object type required when creating Object
        object_type = kwargs.get('object_type')
        if not object_type:
            raise Exception("'object_type' is required when creating a new "
                            "Object. Pass one of: file, folder, dataset")

        # TODO should we require file contents?
        # Technically a user could then use this object to the call
        # upload_file()
        # if object_type == 'file' and not kwargs.get('content'):
        #     raise Exception('')

        # Object not found, create it step-by-step
        full_path, parts = Object.validate_full_path(full_path, client=_client)

        if create_vault:
            vault = Vault.get_or_create_by_full_path('{0}:{1}'.format(
                parts['domain'], parts['vault']),
                                                     client=_client)
        else:
            vaults = Vault.all(account_domain=parts['domain'],
                               name=parts['vault'],
                               client=_client)
            if len(vaults.solve_objects()) == 0:
                raise Exception('Vault with name {0}:{1} does not exist. Pass '
                                'create_vault=True to auto-create'.format(
                                    parts['domain'], parts['vault']))
            vault = vaults.solve_objects()[0]

        # Create the folders to hold the object if they do not already exist.
        object_path = parts['path']
        curr_path = os.path.dirname(object_path)
        folders_to_create = []
        new_folders = []
        id_map = {'/': None}

        while curr_path != '/':
            try:
                obj = Object.get_by_path(curr_path,
                                         vault_id=vault.id,
                                         assert_type='folder',
                                         client=_client)
                id_map[curr_path] = obj.id
                break
            except NotFoundError:
                if not create_folders:
                    raise Exception('Folder {} does not exist.  Pass '
                                    'create_folders=True to auto-create '
                                    'missing folders')

                folders_to_create.append(curr_path)
                curr_path = '/'.join(curr_path.split('/')[:-1])
                if curr_path == '':
                    break

        for folder in reversed(folders_to_create):
            new_folder = Object.create(
                object_type='folder',
                vault_id=vault.id,
                filename=os.path.basename(folder),
                parent_object_id=id_map[os.path.dirname(folder)],
                client=_client)
            new_folders.append(new_folder)
            id_map[folder] = new_folder.id

        if os.path.dirname(object_path) == '/':
            parent_folder_id = None
        elif new_folders:
            parent_folder_id = new_folders[-1].id
        else:
            parent_folder_id = id_map[os.path.dirname(object_path)]

        return Object.create(filename=os.path.basename(object_path),
                             vault_id=vault.id,
                             parent_object_id=parent_folder_id,
                             client=_client,
                             **kwargs)
示例#7
0
def _upload_folder(domain, vault, base_remote_path, base_local_path,
                   local_start, exclude_paths=None, dry_run=False):

    # Create the upload root folder if it does not exist on the remote
    try:
        upload_root_path, _ = Object.validate_full_path(
            os.path.join(base_remote_path, local_start)
        )
        Object.get_by_full_path(upload_root_path, assert_type='folder')
    except NotFoundError:
        base_remote_path, path_dict = \
            Object.validate_full_path(base_remote_path)
        base_folder_path = os.path.join(base_remote_path, local_start)
        if dry_run:
            print('[Dry Run] Creating folder {}'.format(base_folder_path))
        else:
            _create_folder(vault, base_folder_path)

    # Create folders and upload files
    for abs_local_parent_path, folders, files in os.walk(base_local_path):

        # Strips off the local path and adds the parent directory at
        # each phase of the loop
        local_parent_path = re.sub(
            '^' + os.path.dirname(base_local_path), '', abs_local_parent_path
        ).lstrip('/')

        if should_exclude(abs_local_parent_path, exclude_paths,
                          dry_run=dry_run):
            continue

        remote_folder_full_path = \
            os.path.join(base_remote_path, local_parent_path)

        # Create folders
        for folder in folders:
            new_folder_path = os.path.join(abs_local_parent_path, folder)
            if should_exclude(new_folder_path, exclude_paths,
                              dry_run=dry_run):
                continue

            remote_path = os.path.join(remote_folder_full_path, folder)
            if dry_run:
                print('[Dry Run] Creating folder {}'.format(remote_path))
            else:
                _create_folder(vault, remote_path)

        # Upload the files that do not yet exist on the remote
        for f in files:
            local_file_path = os.path.join(abs_local_parent_path, f)
            if should_exclude(local_file_path, exclude_paths, dry_run=dry_run):
                continue

            if dry_run:
                print('[Dry Run] Uploading {} to {}'
                      .format(local_file_path, remote_folder_full_path))
            else:
                remote_parent = Object.get_by_full_path(
                    remote_folder_full_path, assert_type='folder')
                Object.upload_file(local_file_path, remote_parent.path,
                                   vault.full_path)
示例#8
0
def import_file(args):
    """
    Given a dataset and a local path, upload and import the file(s).

    Command arguments (args):

        * create_dataset and it's args
            * capacity
            * template_id
            * template_file
            * capacity
            * tag
            * metadata
            * metadata_json_file
            * create_vault
        * full_path
        * commit_mode
        * remote_source
        * dry_run
        * follow
        * file (list)

    """
    if args.dry_run:
        print("NOTE: Running import command in dry run mode")

    full_path, path_dict = Object.validate_full_path(args.full_path)

    files_list = []
    if args.remote_source:
        # Validate files
        for file_fp in args.file:
            files_ = list(Object.all(glob=file_fp, limit=1000))
            if not files_:
                print("Did not find any {}files at path {}".format(
                    'remote ' if args.remote_source else '', file_fp))
            else:
                for file_ in files_:
                    print("Found file: {}".format(file_.full_path))
                    files_list.append(file_)

    else:
        # Local files
        # Note: if these are globs or folders, then this will
        # create a multi-file manifest which is deprecated
        # and should be updated to one file per import.
        files_list = [fp for fp in args.file]

    if not files_list:
        print("Exiting. No files were found at the following {}paths: {}"
              .format('remote ' if args.remote_source else '',
                      ', '.join(args.file)))
        sys.exit(1)

    if args.template_id:
        try:
            template = DatasetTemplate.retrieve(args.template_id)
        except SolveError as e:
            if e.status_code != 404:
                raise e
            print("No template with ID {0} found!".format(args.template_id))
            sys.exit(1)
    elif args.template_file:
        template = _create_template_from_file(args.template_file, args.dry_run)
    else:
        template = None

    # Ensure the dataset exists. Create if necessary.
    if args.create_dataset:
        dataset = create_dataset(args, template=template)
    else:
        try:
            dataset = Object.get_by_full_path(full_path, assert_type='dataset')
        except solvebio.errors.NotFoundError:
            print("Dataset not found: {0}".format(full_path))
            print("Tip: use the --create-dataset flag "
                  "to create one from a template")
            sys.exit(1)

    if args.dry_run:
        print("Importing the following files/paths into dataset: {}"
              .format(full_path))
        for file_ in files_list:
            if args.remote_source:
                print(file_.full_path)
            else:
                print(file_)
        return

    # Generate a manifest from the local files
    imports = []
    for file_ in files_list:
        if args.remote_source:
            kwargs = dict(object_id=file_.id)
        else:
            manifest = solvebio.Manifest()
            manifest.add(file_)
            kwargs = dict(manifest=manifest.manifest)

        # Add template params
        if template:
            kwargs.update(template.import_params)

        # Create the import
        import_ = DatasetImport.create(
            dataset_id=dataset.id,
            commit_mode=args.commit_mode,
            **kwargs
        )

        imports.append(import_)

    if args.follow:
        dataset.activity(follow=True)
    else:
        mesh_url = 'https://my.solvebio.com/activity/'
        print("Your import has been submitted, view details at: {0}"
              .format(mesh_url))

    return imports, dataset
示例#9
0
def upload(args):
    """
    Given a folder or file, upload all the folders and files contained
    within it, skipping ones that already exist on the remote.
    """

    base_remote_path, path_dict = Object.validate_full_path(args.full_path)

    # Assert the vault exists and is accessible
    vault = Vault.get_by_full_path(path_dict['vault_full_path'])

    # If not the vault root, validate remote path exists and is a folder
    if path_dict['path'] != '/':
        try:
            Object.get_by_full_path(base_remote_path, assert_type='folder')
        except:
            if not args.create_full_path:
                raise

            if args.dry_run:
                print('[Dry Run] Creating {}'.format(base_remote_path))
            else:
                # Create the destination path (including subfolders)
                # if not found
                parent_folder_path = vault.full_path + ':'
                folders = path_dict['path'].lstrip('/').split('/')
                for folder in folders:
                    folder_full_path = os.path.join(parent_folder_path, folder)
                    parent_folder = _create_folder(vault, folder_full_path)
                    parent_folder_path = parent_folder.full_path

    # Exit if there are multiple local paths and the
    # exclude paths are not absolute
    base_exclude_paths = args.exclude or []
    if base_exclude_paths and len(args.local_path) > 1:
        rel_exclude_paths = [p for p in base_exclude_paths
                             if not os.path.isabs(p)]
        local_path_parents = set([os.path.dirname(os.path.abspath(p))
                                  for p in args.local_path])
        if rel_exclude_paths and len(local_path_parents) > 1:
            sys.exit('Exiting. Cannot apply the --exclude relative paths when '
                     'multiple upload paths with different parent directories '
                     'are specified. Make --exclude paths absolute or run '
                     'upload paths one at a time.')

    for local_path in args.local_path:

        # Expand local path and strip trailing slash
        local_path = os.path.abspath(local_path).rstrip('/')
        local_name = os.path.basename(local_path)

        # add basepath to excludes
        exclude_paths = [
            os.path.join(local_path, os.path.normpath(exclude_path))
            for exclude_path in base_exclude_paths
        ]

        if os.path.isdir(local_path):
            _upload_folder(path_dict['domain'], vault,
                           base_remote_path, local_path,
                           local_name, exclude_paths=exclude_paths,
                           dry_run=args.dry_run)
        else:
            if args.dry_run:
                print('[Dry Run] Uploading {} to {}'
                      .format(local_path, path_dict['path']))
            else:
                Object.upload_file(local_path, path_dict['path'],
                                   vault.full_path)
示例#10
0
def create_dataset(args, template=None):
    """
    Attempt to create a new dataset given the following params:

        * template_id
        * template_file
        * capacity
        * tag
        * metadata
        * metadata_json_file
        * create_vault
        * full_path
        * dry_run
    """
    if args.dry_run:
        print("NOTE: Running create-dataset command in dry run mode")

    full_path, path_dict = Object.validate_full_path(args.full_path)

    try:
        # Fail if a dataset already exists.
        Object.get_by_full_path(full_path, assert_type='dataset')
        print('A dataset already exists at path: {0}'.format(full_path))
        sys.exit(1)
    except NotFoundError:
        pass

    # Accept a template_id or a template_file
    if template:
        # Template has already been validated/created
        # in the import command that called this
        pass
    elif args.template_id:
        try:
            template = DatasetTemplate.retrieve(args.template_id)
        except SolveError as e:
            if e.status_code != 404:
                raise e
            print("No template with ID {0} found!".format(args.template_id))
            sys.exit(1)
    elif args.template_file:
        template = _create_template_from_file(args.template_file, args.dry_run)
    else:
        template = None

    if template:
        print("Creating new dataset {0} using the template '{1}'."
              .format(full_path, template.name))
        fields = template.fields
        description = 'Created with dataset template: {0}' \
            .format(str(template.id))
    else:
        fields = []
        description = None

    # Create dataset metadata
    # Looks at --metadata_json_file first and will update
    # that with any other key/value pairs passed in to --metadata
    metadata = {}
    if args.metadata and args.metadata_json_file:
        print('WARNING: Received --metadata and --metadata-json-file. '
              'Will update the JSON file values with the --metadata values')

    if args.metadata_json_file:
        with open(args.metadata_json_file, 'r') as fp:
            try:
                metadata = json.load(fp)
            except:
                print('Metadata JSON file {0} could not be loaded. Please '
                      'pass valid JSON'.format(args.metadata_json_file))
                sys.exit(1)

    if args.metadata:
        metadata.update(args.metadata)

    if args.dry_run:
        print("Creating new '{}' capacity dataset at {}"
              .format(args.capacity, full_path))
        if description:
            print("Description: {}".format(description))
        if fields:
            print("Fields: {}".format(fields))
        if args.tag:
            print("Tags: {}".format(args.tag))
        if metadata:
            print("Metadata: {}".format(metadata))
        return

    return Dataset.get_or_create_by_full_path(
        full_path,
        capacity=args.capacity,
        fields=fields,
        description=description,
        tags=args.tag or [],
        metadata=metadata,
        create_vault=args.create_vault,
    )
示例#11
0
def _upload_folder(domain, vault, base_remote_path, base_local_path,
                   local_start):

    # Create the upload root folder if it does not exist on the remote
    try:
        upload_root_path, _ = Object.validate_full_path(
            os.path.join(base_remote_path, local_start))
        obj = Object.get_by_full_path(upload_root_path, assert_type='folder')
    except NotFoundError:
        base_remote_path, path_dict = \
            Object.validate_full_path(base_remote_path)

        if path_dict['path'] == '/':
            parent_object_id = None
        else:
            obj = Object.get_by_full_path(base_remote_path,
                                          assert_type='folder')
            parent_object_id = obj.id

        # Create base folder
        new_folder = Object.create(vault_id=vault.id,
                                   parent_object_id=parent_object_id,
                                   object_type='folder',
                                   filename=local_start)

        print('Notice: Folder created for {0} at {1}'.format(
            base_local_path,
            new_folder.path,
        ))

    for root, dirs, files in os.walk(base_local_path):

        # Create the sub-folders that do not exist on the remote
        for d in dirs:
            dirpath = os.path.join(
                base_remote_path,
                re.sub('^' + os.path.dirname(base_local_path), '',
                       root).lstrip('/'),  # noqa
                d)

            try:
                Object.get_by_full_path(dirpath, object_type='folder')
            except NotFoundError:
                # Create the folder
                if os.path.dirname(dirpath.split(':')[-1]) == '/':
                    parent_object_id = None
                else:
                    parent_full_path = os.path.dirname(dirpath)
                    parent = Object.get_by_full_path(parent_full_path,
                                                     assert_type='folder')
                    parent_object_id = parent.id

                # Make the API call
                new_obj = Object.create(
                    vault_id=vault.id,
                    parent_object_id=parent_object_id,
                    object_type='folder',
                    filename=d,
                )

                print('Notice: Folder created for {0} at {1}'.format(
                    os.path.join(root, d), new_obj.path))

        # Upload the files that do not yet exist on the remote
        for f in files:
            file_full_path = os.path.join(
                base_remote_path,
                re.sub('^' + os.path.dirname(base_local_path), '',
                       root).lstrip('/'),
                f,
            )
            try:
                Object.get_by_full_path(file_full_path)
            except NotFoundError:
                parent_full_path = os.path.dirname(
                    os.path.join(
                        base_remote_path,
                        re.sub('^' + os.path.dirname(base_local_path), '',
                               root).lstrip('/'),
                        f,
                    ))
                parent = Object.get_by_full_path(parent_full_path,
                                                 assert_type='folder')
                Object.upload_file(os.path.join(root, f), parent.path,
                                   vault.full_path)
示例#12
0
def _upload_folder(domain, vault, base_remote_path,
                   base_local_path, local_start):

    # Create the upload root folder if it does not exist on the remote
    try:
        upload_root_path, _ = Object.validate_full_path(
            os.path.join(base_remote_path, local_start)
        )
        obj = Object.get_by_full_path(upload_root_path, assert_type='folder')
    except NotFoundError:
        base_remote_path, path_dict = \
            Object.validate_full_path(base_remote_path)

        if path_dict['path'] == '/':
            parent_object_id = None
        else:
            obj = Object.get_by_full_path(base_remote_path,
                                          assert_type='folder')
            parent_object_id = obj.id

        # Create base folder
        new_folder = Object.create(
            vault_id=vault.id,
            parent_object_id=parent_object_id,
            object_type='folder',
            filename=local_start
        )

        print('Notice: Folder created for {0} at {1}'.format(
            base_local_path,
            new_folder.path,
        ))

    for root, dirs, files in os.walk(base_local_path):

        # Create the sub-folders that do not exist on the remote
        for d in dirs:
            dirpath = os.path.join(
                base_remote_path,
                re.sub('^' + os.path.dirname(base_local_path), '', root).lstrip('/'),  # noqa
                d
            )

            try:
                Object.get_by_full_path(dirpath, object_type='folder')
            except NotFoundError:
                # Create the folder
                if os.path.dirname(dirpath.split(':')[-1]) == '/':
                    parent_object_id = None
                else:
                    parent_full_path = os.path.dirname(dirpath)
                    parent = Object.get_by_full_path(
                        parent_full_path, assert_type='folder')
                    parent_object_id = parent.id

                # Make the API call
                new_obj = Object.create(
                    vault_id=vault.id,
                    parent_object_id=parent_object_id,
                    object_type='folder',
                    filename=d,
                )

                print('Notice: Folder created for {0} at {1}'
                      .format(os.path.join(root, d), new_obj.path))

        # Upload the files that do not yet exist on the remote
        for f in files:
            file_full_path = os.path.join(
                base_remote_path,
                re.sub('^' + os.path.dirname(base_local_path),
                       '',
                       root).lstrip('/'),
                f,
            )
            try:
                Object.get_by_full_path(file_full_path)
            except NotFoundError:
                parent_full_path = os.path.dirname(
                    os.path.join(
                        base_remote_path,
                        re.sub('^' + os.path.dirname(base_local_path),
                               '',
                               root).lstrip('/'),
                        f,
                    )
                )
                parent = Object.get_by_full_path(
                    parent_full_path, assert_type='folder')
                Object.upload_file(os.path.join(root, f),
                                   parent.path, vault.full_path)
示例#13
0
def create_dataset(args):
    """
    Attempt to create a new dataset given the following params:

        * template_id
        * template_file
        * capacity
        * create_vault
        * [argument] dataset name or full path

    NOTE: genome_build has been deprecated and is no longer used.

    """
    # For backwards compatibility, the "full_path" argument
    # can be a dataset filename, but only if vault and path
    # are set. If vault/path are both provided and there
    # are no forward-slashes in the "full_path", assume
    # the user has provided a dataset filename.
    if '/' not in args.full_path and args.vault and args.path:
        full_path, path_dict = Object.validate_full_path(
            '{0}:/{1}/{2}'.format(args.vault, args.path, args.full_path))
    else:
        full_path, path_dict = Object.validate_full_path(
            args.full_path, vault=args.vault, path=args.path)

    # Accept a template_id or a template_file
    if args.template_id:
        # Validate the template ID
        try:
            tpl = solvebio.DatasetTemplate.retrieve(args.template_id)
        except solvebio.SolveError as e:
            if e.status_code != 404:
                raise e
            print("No template with ID {0} found!"
                  .format(args.template_id))
            sys.exit(1)
    elif args.template_file:
        mode = 'r'
        fopen = open
        if check_gzip_path(args.template_file):
            mode = 'rb'
            fopen = gzip.open

        # Validate the template file
        with fopen(args.template_file, mode) as fp:
            try:
                tpl_json = json.load(fp)
            except:
                print('Template file {0} could not be loaded. Please '
                      'pass valid JSON'.format(args.template_file))
                sys.exit(1)

        tpl = solvebio.DatasetTemplate.create(**tpl_json)
        print("A new dataset template was created with id: {0}".format(tpl.id))
    else:
        print("Creating a new dataset {0} without a template."
              .format(full_path))
        tpl = None
        fields = []
        entity_type = None
        description = None

    if tpl:
        print("Creating new dataset {0} using the template '{1}'."
              .format(full_path, tpl.name))
        fields = tpl.fields
        entity_type = tpl.entity_type
        # include template used to create
        description = 'Created with dataset template: {0}'.format(str(tpl.id))

    return solvebio.Dataset.get_or_create_by_full_path(
        full_path,
        capacity=args.capacity,
        entity_type=entity_type,
        fields=fields,
        description=description,
        create_vault=args.create_vault,
    )
示例#14
0
    def get_or_create_by_full_path(cls, full_path, **kwargs):
        from solvebio import Vault
        from solvebio import Object

        _client = kwargs.pop('client', None) or cls._client or client
        create_vault = kwargs.pop('create_vault', False)
        create_folders = kwargs.pop('create_folders', True)

        try:
            return Dataset.get_by_full_path(full_path,
                                            assert_type='dataset',
                                            client=_client)
        except NotFoundError:
            pass

        # Dataset not found, create it step-by-step
        full_path, parts = Object.validate_full_path(full_path, client=_client)

        if create_vault:
            vault = Vault.get_or_create_by_full_path('{0}:{1}'.format(
                parts['domain'], parts['vault']),
                                                     client=_client)
        else:
            vaults = Vault.all(account_domain=parts['domain'],
                               name=parts['vault'],
                               client=_client)
            if len(vaults.solve_objects()) == 0:
                raise Exception(
                    'Vault does not exist with name {0}:{1}'.format(
                        parts['domain'], parts['vault']))
            vault = vaults.solve_objects()[0]

        # Create the folders to hold the dataset if they do not already exist.
        object_path = parts['path']
        curr_path = os.path.dirname(object_path)
        folders_to_create = []
        new_folders = []
        id_map = {'/': None}

        while curr_path != '/':
            try:
                obj = Object.get_by_path(curr_path,
                                         vault_id=vault.id,
                                         assert_type='folder',
                                         client=_client)
                id_map[curr_path] = obj.id
                break
            except NotFoundError:
                if not create_folders:
                    raise Exception('Folder {} does not exist.  Pass '
                                    'create_folders=True to auto-create '
                                    'missing folders')

                folders_to_create.append(curr_path)
                curr_path = '/'.join(curr_path.split('/')[:-1])
                if curr_path == '':
                    break

        for folder in reversed(folders_to_create):
            new_folder = Object.create(
                object_type='folder',
                vault_id=vault.id,
                filename=os.path.basename(folder),
                parent_object_id=id_map[os.path.dirname(folder)],
                client=_client)
            new_folders.append(new_folder)
            id_map[folder] = new_folder.id

        if os.path.dirname(object_path) == '/':
            parent_folder_id = None
        elif new_folders:
            parent_folder_id = new_folders[-1].id
        else:
            parent_folder_id = id_map[os.path.dirname(object_path)]

        return Dataset.create(name=os.path.basename(object_path),
                              vault_id=vault.id,
                              vault_parent_object_id=parent_folder_id,
                              client=_client,
                              **kwargs)
示例#15
0
    def upload_file(cls, local_path, remote_path, vault_full_path, **kwargs):
        from solvebio import Vault
        from solvebio import Object

        _client = kwargs.pop('client', None) or cls._client or client

        local_path = os.path.expanduser(local_path)
        if os.stat(local_path).st_size == 0:
            print('Notice: Cannot upload empty file {0}'.format(local_path))
            return

        # Get vault
        vault = Vault.get_by_full_path(vault_full_path, client=_client)

        # Get MD5, mimetype, and file size for the object
        local_md5, _ = md5sum(local_path, multipart_threshold=None)
        _, mimetype = mimetypes.guess_type(local_path)
        size = os.path.getsize(local_path)

        # Check if object exists already and compare md5sums
        full_path, path_dict = Object.validate_full_path(os.path.join(
            '{}:{}'.format(vault.full_path, remote_path),
            os.path.basename(local_path)),
                                                         client=_client)
        try:
            obj = cls.get_by_full_path(full_path, client=_client)
            if not obj.is_file:
                print('WARNING: A {} currently exists at {}'.format(
                    obj.object_type, full_path))
            else:
                # Check against md5sum of remote file
                if obj.md5 == local_md5:
                    print('WARNING: File {} (md5sum {}) already exists, '
                          'not uploading'.format(full_path, local_md5))
                    return obj
                else:
                    print('WARNING: File {} exists on SolveBio with different '
                          'md5sum (local: {} vs remote: {}) Uploading anyway, '
                          'but not overwriting.'.format(
                              full_path, local_md5, obj.md5))
        except NotFoundError:
            pass

        # Lookup parent object
        if path_dict['parent_path'] == '/':
            parent_object_id = None
        else:
            parent_obj = Object.get_by_full_path(path_dict['parent_full_path'],
                                                 assert_type='folder',
                                                 client=_client)
            parent_object_id = parent_obj.id

        description = kwargs.get('description')

        # Create the file, and upload it to the Upload URL
        obj = Object.create(vault_id=vault.id,
                            parent_object_id=parent_object_id,
                            object_type='file',
                            filename=os.path.basename(local_path),
                            md5=local_md5,
                            mimetype=mimetype,
                            size=size,
                            description=description,
                            tags=kwargs.get('tags', []) or [],
                            client=_client)

        print('Notice: File created for {0} at {1}'.format(
            local_path, obj.path))
        print('Notice: Upload initialized')

        upload_url = obj.upload_url

        headers = {
            'Content-MD5': base64.b64encode(binascii.unhexlify(local_md5)),
            'Content-Type': mimetype,
            'Content-Length': str(size),
        }

        # Use a session with a retry policy to handle connection errors.
        session = requests.Session()
        max_retries = 5
        retry = Retry(
            total=max_retries,
            read=max_retries,
            connect=max_retries,
            backoff_factor=0.3,
            status_forcelist=(500, 502, 504, 400),
        )
        session.mount('https://',
                      requests.adapters.HTTPAdapter(max_retries=retry))
        upload_resp = session.put(upload_url,
                                  data=open(local_path, 'rb'),
                                  headers=headers)

        if upload_resp.status_code != 200:
            print('WARNING: Upload status code for {0} was {1}'.format(
                local_path, upload_resp.status_code))
            # Clean up the failed upload
            obj.delete(force=True)
            raise FileUploadError(upload_resp.content)
        else:
            print('Notice: Successfully uploaded {0} to {1}'.format(
                local_path, obj.path))

        return obj
示例#16
0
    def get_or_create_by_full_path(cls, full_path, **kwargs):
        from solvebio import Vault
        from solvebio import Object

        _client = kwargs.pop('client', None) or cls._client or client
        create_vault = kwargs.pop('create_vault', False)
        create_folders = kwargs.pop('create_folders', True)

        try:
            return Dataset.get_by_full_path(full_path, assert_type='dataset',
                                            client=_client)
        except NotFoundError:
            pass

        # Dataset not found, create it step-by-step
        full_path, parts = Object.validate_full_path(full_path, client=_client)

        if create_vault:
            vault = Vault.get_or_create_by_full_path(
                '{0}:{1}'.format(parts['domain'], parts['vault']),
                client=_client)
        else:
            vaults = Vault.all(account_domain=parts['domain'],
                               name=parts['vault'],
                               client=_client)
            if len(vaults.solve_objects()) == 0:
                raise Exception(
                    'Vault does not exist with name {0}:{1}'.format(
                        parts['domain'], parts['vault'])
                )
            vault = vaults.solve_objects()[0]

        # Create the folders to hold the dataset if they do not already exist.
        object_path = parts['path']
        curr_path = os.path.dirname(object_path)
        folders_to_create = []
        new_folders = []
        id_map = {'/': None}

        while curr_path != '/':
            try:
                obj = Object.get_by_path(curr_path,
                                         vault_id=vault.id,
                                         assert_type='folder',
                                         client=_client)
                id_map[curr_path] = obj.id
                break
            except NotFoundError:
                if not create_folders:
                    raise Exception('Folder {} does not exist.  Pass '
                                    'create_folders=True to auto-create '
                                    'missing folders')

                folders_to_create.append(curr_path)
                curr_path = '/'.join(curr_path.split('/')[:-1])
                if curr_path == '':
                    break

        for folder in reversed(folders_to_create):
            new_folder = Object.create(
                object_type='folder',
                vault_id=vault.id,
                filename=os.path.basename(folder),
                parent_object_id=id_map[os.path.dirname(folder)],
                client=_client
            )
            new_folders.append(new_folder)
            id_map[folder] = new_folder.id

        if os.path.dirname(object_path) == '/':
            parent_folder_id = None
        elif new_folders:
            parent_folder_id = new_folders[-1].id
        else:
            parent_folder_id = id_map[os.path.dirname(object_path)]

        return Dataset.create(name=os.path.basename(object_path),
                              vault_id=vault.id,
                              vault_parent_object_id=parent_folder_id,
                              client=_client,
                              **kwargs)
示例#17
0
def create_dataset(args):
    """
    Attempt to create a new dataset given the following params:

        * template_id
        * template_file
        * capacity
        * create_vault
        * [argument] dataset name or full path

    NOTE: genome_build has been deprecated and is no longer used.

    """
    # For backwards compatibility, the "full_path" argument
    # can be a dataset filename, but only if vault and path
    # are set. If vault/path are both provided and there
    # are no forward-slashes in the "full_path", assume
    # the user has provided a dataset filename.
    if '/' not in args.full_path and args.vault and args.path:
        full_path, path_dict = Object.validate_full_path('{0}:/{1}/{2}'.format(
            args.vault, args.path, args.full_path))
    else:
        full_path, path_dict = Object.validate_full_path(args.full_path,
                                                         vault=args.vault,
                                                         path=args.path)

    # Accept a template_id or a template_file
    if args.template_id:
        # Validate the template ID
        try:
            tpl = solvebio.DatasetTemplate.retrieve(args.template_id)
        except solvebio.SolveError as e:
            if e.status_code != 404:
                raise e
            print("No template with ID {0} found!".format(args.template_id))
            sys.exit(1)
    elif args.template_file:
        mode = 'r'
        fopen = open
        if check_gzip_path(args.template_file):
            mode = 'rb'
            fopen = gzip.open

        # Validate the template file
        with fopen(args.template_file, mode) as fp:
            try:
                tpl_json = json.load(fp)
            except:
                print('Template file {0} could not be loaded. Please '
                      'pass valid JSON'.format(args.template_file))
                sys.exit(1)

        tpl = solvebio.DatasetTemplate.create(**tpl_json)
        print("A new dataset template was created with id: {0}".format(tpl.id))
    else:
        print(
            "Creating a new dataset {0} without a template.".format(full_path))
        tpl = None
        fields = []
        entity_type = None
        description = None

    if tpl:
        print("Creating new dataset {0} using the template '{1}'.".format(
            full_path, tpl.name))
        fields = tpl.fields
        entity_type = tpl.entity_type
        # include template used to create
        description = 'Created with dataset template: {0}'.format(str(tpl.id))

    return solvebio.Dataset.get_or_create_by_full_path(
        full_path,
        capacity=args.capacity,
        entity_type=entity_type,
        fields=fields,
        description=description,
        create_vault=args.create_vault,
    )