def _create_folder(vault, full_path, tags=None): """Create a folder if not exists""" full_path, path_dict = \ Object.validate_full_path(full_path) folder_name = path_dict['filename'] try: new_obj = Object.get_by_full_path(full_path) if not new_obj.is_folder: raise SolveError('Object type {} already exists at location: {}' .format(new_obj.object_type, full_path)) except NotFoundError: # Create the folder if path_dict['parent_path'] == '/': parent_object_id = None else: parent = Object.get_by_full_path(path_dict['parent_full_path'], assert_type='folder') parent_object_id = parent.id # Make the API call new_obj = Object.create( vault_id=vault.id, parent_object_id=parent_object_id, object_type='folder', filename=folder_name, tags=tags or [] ) print('Notice: Folder created for {0} at {1}' .format(folder_name, new_obj.path)) return new_obj
def create_folder(self, filename, **params): from solvebio import Object path = params.pop('path', None) if path and path != '/': parent_object = self._get_parent_folder(path) params['parent_object_id'] = parent_object.id params.update({ 'filename': filename, 'vault_id': self.id, 'object_type': 'folder' }) return Object.create(client=self._client, **params)
def create_dataset(self, name, **params): from solvebio import Object params['vault_id'] = self.id params['object_type'] = 'dataset' path = params.pop('path', None) if path == '/' or path is None: params['parent_object_id'] = None else: parent_object = self._get_parent_folder(path) params['parent_object_id'] = parent_object.id params['filename'] = name return Object.create(**params)
def get_or_create_uploads_path(cls, **kwargs): from solvebio import Object _client = kwargs.pop('client', None) or cls._client or client v = cls.get_personal_vault(client=_client) default_path = 'Uploads' full_path = '{0}:/{1}'.format(v.full_path, default_path) try: upload_dir = Object.get_by_full_path(full_path, assert_type='folder', client=_client) except NotFoundError: print( "Uploads directory not found. Creating {0}".format(full_path)) upload_dir = Object.create(vault_id=v.id, object_type='folder', filename=default_path, client=_client) return upload_dir.path
def get_or_create_uploads_path(cls, **kwargs): from solvebio import Object _client = kwargs.pop('client', None) or cls._client or client v = cls.get_personal_vault(client=_client) default_path = 'Uploads' full_path = '{0}:/{1}'.format(v.full_path, default_path) try: upload_dir = Object.get_by_full_path( full_path, assert_type='folder', client=_client) except NotFoundError: print("Uploads directory not found. Creating {0}" .format(full_path)) upload_dir = Object.create( vault_id=v.id, object_type='folder', filename=default_path, client=_client ) return upload_dir.path
def get_or_create_by_full_path(cls, full_path, **kwargs): from solvebio import Vault from solvebio import Object _client = kwargs.pop('client', None) or cls._client or client create_vault = kwargs.pop('create_vault', False) create_folders = kwargs.pop('create_folders', True) try: return Dataset.get_by_full_path(full_path, assert_type='dataset', client=_client) except NotFoundError: pass # Dataset not found, create it step-by-step full_path, parts = Object.validate_full_path(full_path, client=_client) if create_vault: vault = Vault.get_or_create_by_full_path( '{0}:{1}'.format(parts['domain'], parts['vault']), client=_client) else: vaults = Vault.all(account_domain=parts['domain'], name=parts['vault'], client=_client) if len(vaults.solve_objects()) == 0: raise Exception( 'Vault does not exist with name {0}:{1}'.format( parts['domain'], parts['vault']) ) vault = vaults.solve_objects()[0] # Create the folders to hold the dataset if they do not already exist. object_path = parts['path'] curr_path = os.path.dirname(object_path) folders_to_create = [] new_folders = [] id_map = {'/': None} while curr_path != '/': try: obj = Object.get_by_path(curr_path, vault_id=vault.id, assert_type='folder', client=_client) id_map[curr_path] = obj.id break except NotFoundError: if not create_folders: raise Exception('Folder {} does not exist. Pass ' 'create_folders=True to auto-create ' 'missing folders') folders_to_create.append(curr_path) curr_path = '/'.join(curr_path.split('/')[:-1]) if curr_path == '': break for folder in reversed(folders_to_create): new_folder = Object.create( object_type='folder', vault_id=vault.id, filename=os.path.basename(folder), parent_object_id=id_map[os.path.dirname(folder)], client=_client ) new_folders.append(new_folder) id_map[folder] = new_folder.id if os.path.dirname(object_path) == '/': parent_folder_id = None elif new_folders: parent_folder_id = new_folders[-1].id else: parent_folder_id = id_map[os.path.dirname(object_path)] return Dataset.create(name=os.path.basename(object_path), vault_id=vault.id, vault_parent_object_id=parent_folder_id, client=_client, **kwargs)
def upload_file(cls, local_path, remote_path, vault_full_path, **kwargs): from solvebio import Vault from solvebio import Object _client = kwargs.pop('client', None) or cls._client or client local_path = os.path.expanduser(local_path) if os.stat(local_path).st_size == 0: print('Notice: Cannot upload empty file {0}'.format(local_path)) return # Get vault vault = Vault.get_by_full_path(vault_full_path, client=_client) # Get MD5, mimetype, and file size for the object local_md5, _ = md5sum(local_path, multipart_threshold=None) _, mimetype = mimetypes.guess_type(local_path) size = os.path.getsize(local_path) # Check if object exists already and compare md5sums full_path, path_dict = Object.validate_full_path(os.path.join( '{}:{}'.format(vault.full_path, remote_path), os.path.basename(local_path)), client=_client) try: obj = cls.get_by_full_path(full_path, client=_client) if not obj.is_file: print('WARNING: A {} currently exists at {}'.format( obj.object_type, full_path)) else: # Check against md5sum of remote file if obj.md5 == local_md5: print('WARNING: File {} (md5sum {}) already exists, ' 'not uploading'.format(full_path, local_md5)) return obj else: print('WARNING: File {} exists on SolveBio with different ' 'md5sum (local: {} vs remote: {}) Uploading anyway, ' 'but not overwriting.'.format( full_path, local_md5, obj.md5)) except NotFoundError: pass # Lookup parent object if path_dict['parent_path'] == '/': parent_object_id = None else: parent_obj = Object.get_by_full_path(path_dict['parent_full_path'], assert_type='folder', client=_client) parent_object_id = parent_obj.id description = kwargs.get('description') # Create the file, and upload it to the Upload URL obj = Object.create(vault_id=vault.id, parent_object_id=parent_object_id, object_type='file', filename=os.path.basename(local_path), md5=local_md5, mimetype=mimetype, size=size, description=description, tags=kwargs.get('tags', []) or [], client=_client) print('Notice: File created for {0} at {1}'.format( local_path, obj.path)) print('Notice: Upload initialized') upload_url = obj.upload_url headers = { 'Content-MD5': base64.b64encode(binascii.unhexlify(local_md5)), 'Content-Type': mimetype, 'Content-Length': str(size), } # Use a session with a retry policy to handle connection errors. session = requests.Session() max_retries = 5 retry = Retry( total=max_retries, read=max_retries, connect=max_retries, backoff_factor=0.3, status_forcelist=(500, 502, 504, 400), ) session.mount('https://', requests.adapters.HTTPAdapter(max_retries=retry)) upload_resp = session.put(upload_url, data=open(local_path, 'rb'), headers=headers) if upload_resp.status_code != 200: print('WARNING: Upload status code for {0} was {1}'.format( local_path, upload_resp.status_code)) # Clean up the failed upload obj.delete(force=True) raise FileUploadError(upload_resp.content) else: print('Notice: Successfully uploaded {0} to {1}'.format( local_path, obj.path)) return obj
def get_or_create_by_full_path(cls, full_path, **kwargs): from solvebio import Vault from solvebio import Object _client = kwargs.pop('client', None) or cls._client or client create_vault = kwargs.pop('create_vault', False) create_folders = kwargs.pop('create_folders', True) # Check for object type assertion, if not explicitly added, see # if user has passed object_type, as their intent was to get/create # an object of that type. assert_type = kwargs.pop('assert_type', kwargs.get('object_type', None)) try: return cls.get_by_full_path(full_path, assert_type=assert_type, client=_client) except NotFoundError: pass # Object type required when creating Object object_type = kwargs.get('object_type') if not object_type: raise Exception("'object_type' is required when creating a new " "Object. Pass one of: file, folder, dataset") # TODO should we require file contents? # Technically a user could then use this object to the call # upload_file() # if object_type == 'file' and not kwargs.get('content'): # raise Exception('') # Object not found, create it step-by-step full_path, parts = Object.validate_full_path(full_path, client=_client) if create_vault: vault = Vault.get_or_create_by_full_path('{0}:{1}'.format( parts['domain'], parts['vault']), client=_client) else: vaults = Vault.all(account_domain=parts['domain'], name=parts['vault'], client=_client) if len(vaults.solve_objects()) == 0: raise Exception('Vault with name {0}:{1} does not exist. Pass ' 'create_vault=True to auto-create'.format( parts['domain'], parts['vault'])) vault = vaults.solve_objects()[0] # Create the folders to hold the object if they do not already exist. object_path = parts['path'] curr_path = os.path.dirname(object_path) folders_to_create = [] new_folders = [] id_map = {'/': None} while curr_path != '/': try: obj = Object.get_by_path(curr_path, vault_id=vault.id, assert_type='folder', client=_client) id_map[curr_path] = obj.id break except NotFoundError: if not create_folders: raise Exception('Folder {} does not exist. Pass ' 'create_folders=True to auto-create ' 'missing folders') folders_to_create.append(curr_path) curr_path = '/'.join(curr_path.split('/')[:-1]) if curr_path == '': break for folder in reversed(folders_to_create): new_folder = Object.create( object_type='folder', vault_id=vault.id, filename=os.path.basename(folder), parent_object_id=id_map[os.path.dirname(folder)], client=_client) new_folders.append(new_folder) id_map[folder] = new_folder.id if os.path.dirname(object_path) == '/': parent_folder_id = None elif new_folders: parent_folder_id = new_folders[-1].id else: parent_folder_id = id_map[os.path.dirname(object_path)] return Object.create(filename=os.path.basename(object_path), vault_id=vault.id, parent_object_id=parent_folder_id, client=_client, **kwargs)
def _upload_folder(domain, vault, base_remote_path, base_local_path, local_start): # Create the upload root folder if it does not exist on the remote try: upload_root_path, _ = Object.validate_full_path( os.path.join(base_remote_path, local_start) ) obj = Object.get_by_full_path(upload_root_path, assert_type='folder') except NotFoundError: base_remote_path, path_dict = \ Object.validate_full_path(base_remote_path) if path_dict['path'] == '/': parent_object_id = None else: obj = Object.get_by_full_path(base_remote_path, assert_type='folder') parent_object_id = obj.id # Create base folder new_folder = Object.create( vault_id=vault.id, parent_object_id=parent_object_id, object_type='folder', filename=local_start ) print('Notice: Folder created for {0} at {1}'.format( base_local_path, new_folder.path, )) for root, dirs, files in os.walk(base_local_path): # Create the sub-folders that do not exist on the remote for d in dirs: dirpath = os.path.join( base_remote_path, re.sub('^' + os.path.dirname(base_local_path), '', root).lstrip('/'), # noqa d ) try: Object.get_by_full_path(dirpath, object_type='folder') except NotFoundError: # Create the folder if os.path.dirname(dirpath.split(':')[-1]) == '/': parent_object_id = None else: parent_full_path = os.path.dirname(dirpath) parent = Object.get_by_full_path( parent_full_path, assert_type='folder') parent_object_id = parent.id # Make the API call new_obj = Object.create( vault_id=vault.id, parent_object_id=parent_object_id, object_type='folder', filename=d, ) print('Notice: Folder created for {0} at {1}' .format(os.path.join(root, d), new_obj.path)) # Upload the files that do not yet exist on the remote for f in files: file_full_path = os.path.join( base_remote_path, re.sub('^' + os.path.dirname(base_local_path), '', root).lstrip('/'), f, ) try: Object.get_by_full_path(file_full_path) except NotFoundError: parent_full_path = os.path.dirname( os.path.join( base_remote_path, re.sub('^' + os.path.dirname(base_local_path), '', root).lstrip('/'), f, ) ) parent = Object.get_by_full_path( parent_full_path, assert_type='folder') Object.upload_file(os.path.join(root, f), parent.path, vault.full_path)
def get_or_create_by_full_path(cls, full_path, **kwargs): from solvebio import Vault from solvebio import Object _client = kwargs.pop('client', None) or cls._client or client create_vault = kwargs.pop('create_vault', False) create_folders = kwargs.pop('create_folders', True) try: return Dataset.get_by_full_path(full_path, assert_type='dataset', client=_client) except NotFoundError: pass # Dataset not found, create it step-by-step full_path, parts = Object.validate_full_path(full_path, client=_client) if create_vault: vault = Vault.get_or_create_by_full_path('{0}:{1}'.format( parts['domain'], parts['vault']), client=_client) else: vaults = Vault.all(account_domain=parts['domain'], name=parts['vault'], client=_client) if len(vaults.solve_objects()) == 0: raise Exception( 'Vault does not exist with name {0}:{1}'.format( parts['domain'], parts['vault'])) vault = vaults.solve_objects()[0] # Create the folders to hold the dataset if they do not already exist. object_path = parts['path'] curr_path = os.path.dirname(object_path) folders_to_create = [] new_folders = [] id_map = {'/': None} while curr_path != '/': try: obj = Object.get_by_path(curr_path, vault_id=vault.id, assert_type='folder', client=_client) id_map[curr_path] = obj.id break except NotFoundError: if not create_folders: raise Exception('Folder {} does not exist. Pass ' 'create_folders=True to auto-create ' 'missing folders') folders_to_create.append(curr_path) curr_path = '/'.join(curr_path.split('/')[:-1]) if curr_path == '': break for folder in reversed(folders_to_create): new_folder = Object.create( object_type='folder', vault_id=vault.id, filename=os.path.basename(folder), parent_object_id=id_map[os.path.dirname(folder)], client=_client) new_folders.append(new_folder) id_map[folder] = new_folder.id if os.path.dirname(object_path) == '/': parent_folder_id = None elif new_folders: parent_folder_id = new_folders[-1].id else: parent_folder_id = id_map[os.path.dirname(object_path)] return Dataset.create(name=os.path.basename(object_path), vault_id=vault.id, vault_parent_object_id=parent_folder_id, client=_client, **kwargs)
def upload_file(cls, local_path, remote_path, vault_full_path, **kwargs): from solvebio import Vault from solvebio import Object _client = kwargs.pop('client', None) or cls._client or client local_path = os.path.expanduser(local_path) if os.stat(local_path).st_size == 0: print('Notice: Cannot upload empty file {0}'.format(local_path)) return # Get vault vault = Vault.get_by_full_path(vault_full_path, client=_client) # Get MD5, mimetype, and file size for the object md5, _ = md5sum(local_path, multipart_threshold=None) _, mimetype = mimetypes.guess_type(local_path) size = os.path.getsize(local_path) # Lookup parent object if remote_path == '/': parent_object_id = None else: parent_obj = Object.get_by_full_path(':'.join( [vault.full_path, remote_path]), assert_type='folder', client=_client) parent_object_id = parent_obj.id description = kwargs.get('description', 'File uploaded via python client') # Create the file, and upload it to the Upload URL obj = Object.create(vault_id=vault.id, parent_object_id=parent_object_id, object_type='file', filename=os.path.basename(local_path), md5=md5, mimetype=mimetype, size=size, description=description, client=_client) print('Notice: File created for {0} at {1}'.format( local_path, obj.path)) print('Notice: Upload initialized') upload_url = obj.upload_url headers = { 'Content-MD5': base64.b64encode(binascii.unhexlify(md5)), 'Content-Type': mimetype, 'Content-Length': str(size), } upload_resp = requests.put(upload_url, data=open(local_path, 'rb'), headers=headers) if upload_resp.status_code != 200: print('Notice: Upload status code for {0} was {1}'.format( local_path, upload_resp.status_code)) print('See error message below:') print(upload_resp.content) # Clean up the failed upload obj.delete(force=True) else: print('Notice: Successfully uploaded {0} to {1}'.format( local_path, obj.path)) return obj
def upload_file(cls, local_path, remote_path, vault_full_path, **kwargs): from solvebio import Vault from solvebio import Object _client = kwargs.pop('client', None) or cls._client or client local_path = os.path.expanduser(local_path) if os.stat(local_path).st_size == 0: print('Notice: Cannot upload empty file {0}'.format(local_path)) return # Get vault vault = Vault.get_by_full_path(vault_full_path, client=_client) # Get MD5, mimetype, and file size for the object md5, _ = md5sum(local_path, multipart_threshold=None) _, mimetype = mimetypes.guess_type(local_path) size = os.path.getsize(local_path) # Lookup parent object if remote_path == '/': parent_object_id = None else: parent_obj = Object.get_by_full_path( ':'.join([vault.full_path, remote_path]), assert_type='folder', client=_client) parent_object_id = parent_obj.id description = kwargs.get( 'description', 'File uploaded via python client' ) # Create the file, and upload it to the Upload URL obj = Object.create( vault_id=vault.id, parent_object_id=parent_object_id, object_type='file', filename=os.path.basename(local_path), md5=md5, mimetype=mimetype, size=size, description=description, client=_client ) print('Notice: File created for {0} at {1}'.format(local_path, obj.path)) print('Notice: Upload initialized') upload_url = obj.upload_url headers = { 'Content-MD5': base64.b64encode(binascii.unhexlify(md5)), 'Content-Type': mimetype, 'Content-Length': str(size), } # Use a session with a retry policy to handle connection errors. session = requests.Session() session.mount('https://', requests.adapters.HTTPAdapter(max_retries=5)) upload_resp = session.put(upload_url, data=open(local_path, 'rb'), headers=headers) if upload_resp.status_code != 200: print('Notice: Upload status code for {0} was {1}'.format( local_path, upload_resp.status_code )) print('See error message below:') print(upload_resp.content) # Clean up the failed upload obj.delete(force=True) else: print('Notice: Successfully uploaded {0} to {1}'.format(local_path, obj.path)) return obj
def _upload_folder(domain, vault, base_remote_path, base_local_path, local_start): # Create the upload root folder if it does not exist on the remote try: upload_root_path, _ = Object.validate_full_path( os.path.join(base_remote_path, local_start)) obj = Object.get_by_full_path(upload_root_path, assert_type='folder') except NotFoundError: base_remote_path, path_dict = \ Object.validate_full_path(base_remote_path) if path_dict['path'] == '/': parent_object_id = None else: obj = Object.get_by_full_path(base_remote_path, assert_type='folder') parent_object_id = obj.id # Create base folder new_folder = Object.create(vault_id=vault.id, parent_object_id=parent_object_id, object_type='folder', filename=local_start) print('Notice: Folder created for {0} at {1}'.format( base_local_path, new_folder.path, )) for root, dirs, files in os.walk(base_local_path): # Create the sub-folders that do not exist on the remote for d in dirs: dirpath = os.path.join( base_remote_path, re.sub('^' + os.path.dirname(base_local_path), '', root).lstrip('/'), # noqa d) try: Object.get_by_full_path(dirpath, object_type='folder') except NotFoundError: # Create the folder if os.path.dirname(dirpath.split(':')[-1]) == '/': parent_object_id = None else: parent_full_path = os.path.dirname(dirpath) parent = Object.get_by_full_path(parent_full_path, assert_type='folder') parent_object_id = parent.id # Make the API call new_obj = Object.create( vault_id=vault.id, parent_object_id=parent_object_id, object_type='folder', filename=d, ) print('Notice: Folder created for {0} at {1}'.format( os.path.join(root, d), new_obj.path)) # Upload the files that do not yet exist on the remote for f in files: file_full_path = os.path.join( base_remote_path, re.sub('^' + os.path.dirname(base_local_path), '', root).lstrip('/'), f, ) try: Object.get_by_full_path(file_full_path) except NotFoundError: parent_full_path = os.path.dirname( os.path.join( base_remote_path, re.sub('^' + os.path.dirname(base_local_path), '', root).lstrip('/'), f, )) parent = Object.get_by_full_path(parent_full_path, assert_type='folder') Object.upload_file(os.path.join(root, f), parent.path, vault.full_path)