def _update_bundles(): """ Bulk update bundles. """ bundle_updates = (BundleSchema( strict=True, many=True, dump_only=BUNDLE_UPDATE_RESTRICTED_FIELDS).load(request.json, partial=True).data) # Check permissions bundle_uuids = [b.pop('uuid') for b in bundle_updates] check_bundles_have_all_permission(local.model, request.user, bundle_uuids) bundles = local.model.batch_get_bundles(uuid=bundle_uuids) # Update bundles for bundle, update in izip(bundles, bundle_updates): local.model.update_bundle(bundle, update) # Get updated bundles bundles_dict = get_bundle_infos(bundle_uuids) # Create list of bundles in original order updated_bundles = [bundles_dict[uuid] for uuid in bundle_uuids] return BundleSchema(many=True).dump(updated_bundles).data
def create_bundle_actions(): """ Sends the message to the worker to do the bundle action, and adds the action string to the bundle metadata. """ actions = BundleActionSchema(strict=True, many=True).load(request.json).data check_bundles_have_all_permission(local.model, request.user, [a['uuid'] for a in actions]) for action in actions: bundle = local.model.get_bundle(action['uuid']) if bundle.state not in [State.RUNNING, State.PREPARING]: raise UsageError('Cannot execute this action on a bundle that is not running.') worker = local.worker_model.get_bundle_worker(action['uuid']) precondition( local.worker_model.send_json_message(worker['socket_id'], action, 60), 'Unable to reach worker.', ) new_actions = getattr(bundle.metadata, 'actions', []) + [BundleAction.as_string(action)] db_update = {'metadata': {'actions': new_actions}} local.model.update_bundle(bundle, db_update) return BundleActionSchema(many=True).dump(actions).data
def kill_bundles(self, bundle_uuids): ''' Send a kill command to all the given bundles. ''' check_bundles_have_all_permission(self.model, self._current_user(), bundle_uuids) for bundle_uuid in bundle_uuids: self.model.add_bundle_action(bundle_uuid, Command.KILL)
def create_bundle_actions(): """ Sends the message to the worker to do the bundle action, and adds the action string to the bundle metadata. """ actions = BundleActionSchema(strict=True, many=True).load(request.json).data check_bundles_have_all_permission(local.model, request.user, [a['uuid'] for a in actions]) for action in actions: bundle = local.model.get_bundle(action['uuid']) if bundle.state not in [State.RUNNING, State.PREPARING]: raise UsageError( 'Cannot execute this action on a bundle that is not running.') worker = local.worker_model.get_bundle_worker(action['uuid']) precondition( local.worker_model.send_json_message(worker['socket_id'], action, 60), 'Unable to reach worker.', ) new_actions = getattr(bundle.metadata, 'actions', []) + [BundleAction.as_string(action)] db_update = {'metadata': {'actions': new_actions}} local.model.update_bundle(bundle, db_update) return BundleActionSchema(many=True).dump(actions).data
def set_bundle_permissions(new_permissions): # Check if current user has permission to set bundle permissions check_bundles_have_all_permission( local.model, request.user, [p['object_uuid'] for p in new_permissions] ) # Sequentially set bundle permissions for p in new_permissions: local.model.set_group_bundle_permission(p['group_uuid'], p['object_uuid'], p['permission'])
def chown_bundles(self, bundle_uuids, user_spec): ''' Set the owner of the bundles to the user. ''' check_bundles_have_all_permission(self.model, self._current_user(), bundle_uuids) user_info = self.user_info(user_spec) # Update bundles for bundle_uuid in bundle_uuids: bundle = self.model.get_bundle(bundle_uuid) self.model.update_bundle(bundle, {'owner_id': user_info['id']})
def create_bundle_actions(): """ Sends the message to the worker to do the bundle action, and adds the action string to the bundle metadata. """ actions = BundleActionSchema(strict=True, many=True).load(request.json).data check_bundles_have_all_permission(local.model, request.user, [a['uuid'] for a in actions]) for action in actions: bundle = local.model.get_bundle(action['uuid']) if bundle.state in [State.READY, State.FAILED, State.KILLED]: print("usage arror in ready, failed or killed.") raise UsageError( 'Cannot execute this action on a bundle that is in the following states: ready, failed, killed. ' 'Kill action can be executed on bundles in created, uploading, staged, making, starting, ' 'running, preparing, or finalizing state.') worker = local.model.get_bundle_worker(action['uuid']) new_actions = getattr(bundle.metadata, 'actions', []) + [BundleAction.as_string(action)] # The state updates of bundles in PREPARING, RUNNING, or FINALIZING state will be handled on the worker side. if worker: print(">>> worker exist....") precondition( local.worker_model.send_json_message(worker['socket_id'], action, 60), 'Unable to reach worker.', ) local.model.update_bundle(bundle, {'metadata': { 'actions': new_actions }}) else: print(">>> worker does not exist....") # The state updates of bundles in CREATED, UPLOADING, MAKING, STARTING or STAGED state # will be handled on the rest-server side. local.model.update_bundle(bundle, { 'state': State.KILLED, 'metadata': { 'actions': new_actions } }) return BundleActionSchema(many=True).dump(actions).data
def delete_bundles(self, uuids, force, recursive, data_only, dry_run): ''' Delete the bundles specified by |uuids|. If |recursive|, add all bundles downstream too. If |data_only|, only remove from the bundle store, not the bundle metadata. ''' relevant_uuids = self.model.get_self_and_descendants(uuids, depth=sys.maxint) uuids_set = set(uuids) relevant_uuids_set = set(relevant_uuids) if not recursive: # If any descendants exist, then we only delete uuids if force = True. if (not force) and uuids_set != relevant_uuids_set: relevant = self.model.batch_get_bundles(uuid=(set(relevant_uuids) - set(uuids))) raise UsageError('Can\'t delete bundles %s because the following bundles depend on them:\n %s' % ( ' '.join(uuids), '\n '.join(bundle.simple_str() for bundle in relevant), )) relevant_uuids = uuids check_bundles_have_all_permission(self.model, self._current_user(), relevant_uuids) # Make sure that bundles are not referenced in multiple places (otherwise, it's very dangerous) if not force: result = self.model.get_host_worksheet_uuids(relevant_uuids) for uuid, host_worksheet_uuids in result.items(): if len(set(host_worksheet_uuids)) > 1: worksheets = self.model.batch_get_worksheets(fetch_items=False, uuid=host_worksheet_uuids) raise UsageError('Can\'t delete bundle %s because it appears in multiple worksheets:\n %s' % ( uuid, '\n '.join(worksheet.simple_str() for worksheet in worksheets))) # Get data hashes relevant_data_hashes = set(bundle.data_hash for bundle in self.model.batch_get_bundles(uuid=relevant_uuids) if bundle.data_hash) # Delete the actual bundle if not dry_run: if data_only: # Just remove references to the data hashes self.model.remove_data_hash_references(relevant_uuids) else: # Actually delete the bundle self.model.delete_bundles(relevant_uuids) # Delete the data_hash for data_hash in relevant_data_hashes: self.bundle_store.cleanup(self.model, data_hash, relevant_uuids, dry_run) return relevant_uuids
def set_bundles_perm(self, bundle_uuids, group_spec, permission_spec): ''' Give the given |group_spec| the desired |permission_spec| on |bundle_uuids|. ''' check_bundles_have_all_permission(self.model, self._current_user(), bundle_uuids) group_info = self._get_group_info(group_spec, need_admin=False) for bundle_uuid in bundle_uuids: old_permission = self.model.get_group_bundle_permission(group_info['uuid'], bundle_uuid) new_permission = parse_permission(permission_spec) if new_permission > 0: if old_permission > 0: self.model.update_bundle_permission(group_info['uuid'], bundle_uuid, new_permission) else: self.model.add_bundle_permission(group_info['uuid'], bundle_uuid, new_permission) else: if old_permission > 0: self.model.delete_bundle_permission(group_info['uuid'], bundle_uuid) return {'group_info': group_info, 'permission': new_permission}
def _update_bundles(): """ Bulk update bundles. """ bundle_updates = (BundleSchema( strict=True, many=True, dump_only=BUNDLE_UPDATE_RESTRICTED_FIELDS).load(request.json, partial=True).data) # Check permissions bundle_uuids = [b.pop('uuid') for b in bundle_updates] check_bundles_have_all_permission(local.model, request.user, bundle_uuids) bundles = local.model.batch_get_bundles(uuid=bundle_uuids) for bundle, update in zip(bundles, bundle_updates): if "frozen" not in update: bundle_util.check_bundle_not_frozen(bundle) else: # If we're freezing or unfreezing the bundle, check that # the bundle is in a final state. # If we're freezing, additionally check that the bundle is not already frozen. bundle_util.check_bundle_freezable(bundle) if update["frozen"]: bundle_util.check_bundle_not_frozen(bundle) # Update bundles for bundle, update in zip(bundles, bundle_updates): local.model.update_bundle(bundle, update) # Get updated bundles bundles_dict = get_bundle_infos(bundle_uuids) # Create list of bundles in original order # Need to check if the UUID is in the dict, since there is a chance that a bundle is deleted # right after being updated. updated_bundles = [ bundles_dict[uuid] for uuid in bundle_uuids if uuid in bundles_dict ] return BundleSchema(many=True).dump(updated_bundles).data
def delete_bundles(uuids, force, recursive, data_only, dry_run): """ Delete the bundles specified by |uuids|. If |force|, allow deletion of bundles that have descendants or that appear across multiple worksheets. If |recursive|, add all bundles downstream too. If |data_only|, only remove from the bundle store, not the bundle metadata. """ relevant_uuids = local.model.get_self_and_descendants(uuids, depth=sys.maxsize) if not recursive: # If any descendants exist, then we only delete uuids if force = True. if (not force) and set(uuids) != set(relevant_uuids): relevant = local.model.batch_get_bundles( uuid=(set(relevant_uuids) - set(uuids))) raise UsageError( 'Can\'t delete bundles %s because the following bundles depend on them:\n %s' % (' '.join(uuids), '\n '.join(bundle.simple_str() for bundle in relevant))) relevant_uuids = uuids check_bundles_have_all_permission(local.model, request.user, relevant_uuids) # Make sure we don't delete bundles which are active. states = local.model.get_bundle_states(uuids) logger.debug('delete states: %s', states) active_uuids = [ uuid for (uuid, state) in states.items() if state in State.ACTIVE_STATES ] logger.debug('delete actives: %s', active_uuids) if len(active_uuids) > 0: raise UsageError('Can\'t delete bundles: %s. ' % (' '.join(active_uuids)) + 'For run bundles, kill them first. ' + 'Bundles stuck not running will eventually ' + 'automatically be moved to a state where they ' + 'can be deleted.') # Make sure that bundles are not referenced in multiple places (otherwise, it's very dangerous) result = local.model.get_all_host_worksheet_uuids(relevant_uuids) for uuid, host_worksheet_uuids in result.items(): worksheets = local.model.batch_get_worksheets( fetch_items=False, uuid=host_worksheet_uuids) frozen_worksheets = [ worksheet for worksheet in worksheets if worksheet.frozen ] if len(frozen_worksheets) > 0: raise UsageError( "Can't delete bundle %s because it appears in frozen worksheets " "(need to delete worksheet first):\n %s" % (uuid, '\n '.join(worksheet.simple_str() for worksheet in frozen_worksheets))) if not force and len(host_worksheet_uuids) > 1: raise UsageError( "Can't delete bundle %s because it appears in multiple worksheets " "(--force to override):\n %s" % (uuid, '\n '.join(worksheet.simple_str() for worksheet in worksheets))) # Delete the actual bundle if not dry_run: if data_only: # Just remove references to the data hashes local.model.remove_data_hash_references(relevant_uuids) else: # Actually delete the bundle local.model.delete_bundles(relevant_uuids) # Update user statistics local.model.update_user_disk_used(request.user.user_id) # Delete the data. bundle_link_urls = local.model.get_bundle_metadata(relevant_uuids, "link_url") for uuid in relevant_uuids: # check first is needs to be deleted bundle_link_url = bundle_link_urls.get(uuid) if bundle_link_url: # Don't physically delete linked bundles. pass else: bundle_location = local.bundle_store.get_bundle_location(uuid) if os.path.lexists(bundle_location): local.bundle_store.cleanup(uuid, dry_run) return relevant_uuids
def _update_bundle_contents_blob(uuid): """ Update the contents of the given running or uploading bundle. Query parameters: - `urls`: (optional) comma-separated list of URLs from which to fetch data to fill the bundle, using this option will ignore any uploaded file data - `git`: (optional) 1 if URL should be interpreted as git repos to clone or 0 otherwise, default is 0. - `filename`: (optional) filename of the uploaded file, used to indicate whether or not it is an archive, default is 'contents' - `unpack`: (optional) 1 if the uploaded file should be unpacked if it is an archive, or 0 otherwise, default is 1 - `simplify`: (optional) 1 if the uploaded file should be 'simplified' if it is an archive, or 0 otherwise, default is 1. - `finalize_on_failure`: (optional) 1 if bundle state should be set to 'failed' in the case of a failure during upload, or 0 if the bundle state should not change on failure. Default is 0. - `finalize_on_success`: (optional) 1 if bundle state should be set to 'state_on_success' when the upload finishes successfully. Default is True - `state_on_success`: (optional) Update the bundle state to this state if the upload completes successfully. Must be either 'ready' or 'failed'. Default is 'ready'. """ check_bundles_have_all_permission(local.model, request.user, [uuid]) bundle = local.model.get_bundle(uuid) if bundle.state in State.FINAL_STATES: abort(http.client.FORBIDDEN, 'Contents cannot be modified, bundle already finalized.') # Get and validate query parameters finalize_on_failure = query_get_bool('finalize_on_failure', default=False) finalize_on_success = query_get_bool('finalize_on_success', default=True) final_state = request.query.get('state_on_success', default=State.READY) if finalize_on_success and final_state not in State.FINAL_STATES: abort( http.client.BAD_REQUEST, 'state_on_success must be one of %s' % '|'.join(State.FINAL_STATES), ) # If this bundle already has data, remove it. if local.upload_manager.has_contents(bundle): local.upload_manager.cleanup_existing_contents(bundle) # Store the data. try: sources = None if request.query.urls: sources = query_get_list('urls') # request without "filename" doesn't need to upload to bundle store if request.query.filename: filename = request.query.get('filename', default='contents') sources = [(filename, request['wsgi.input'])] if sources: local.upload_manager.upload_to_bundle_store( bundle, sources=sources, follow_symlinks=False, exclude_patterns=None, remove_sources=False, git=query_get_bool('git', default=False), unpack=query_get_bool('unpack', default=True), simplify_archives=query_get_bool('simplify', default=True), ) # See UploadManager for full explanation of 'simplify' bundle_link_url = getattr(bundle.metadata, "link_url", None) bundle_location = bundle_link_url or local.bundle_store.get_bundle_location( bundle.uuid) local.model.update_disk_metadata(bundle, bundle_location, enforce_disk_quota=True) except UsageError as err: # This is a user error (most likely disk quota overuser) so raise a client HTTP error if local.upload_manager.has_contents(bundle): local.upload_manager.cleanup_existing_contents(bundle) msg = "Upload failed: %s" % err local.model.update_bundle(bundle, { 'state': State.FAILED, 'metadata': { 'failure_message': msg } }) abort(http.client.BAD_REQUEST, msg) except Exception as e: # Upload failed: cleanup, update state if desired, and return HTTP error if local.upload_manager.has_contents(bundle): local.upload_manager.cleanup_existing_contents(bundle) msg = "Upload failed: %s" % e # The client may not want to finalize the bundle on failure, to keep # open the possibility of retrying the upload in the case of transient # failure. # Workers also use this API endpoint to upload partial contents of # running bundles, and they should use finalize_on_failure=0 to avoid # letting transient errors during upload fail the bundles prematurely. if finalize_on_failure: local.model.update_bundle(bundle, { 'state': State.FAILED, 'metadata': { 'failure_message': msg } }) abort(http.client.INTERNAL_SERVER_ERROR, msg) else: if finalize_on_success: # Upload succeeded: update state local.model.update_bundle(bundle, {'state': final_state})
def update_bundle_metadata(self, uuid, metadata): check_bundles_have_all_permission(self.model, self._current_user(), [uuid]) bundle = self.model.get_bundle(uuid) self.validate_user_metadata(bundle, metadata) self.model.update_bundle(bundle, {'metadata': metadata})
def _update_bundle_contents_blob(uuid): """ Update the contents of the given running or uploading bundle. Query parameters: - `urls`: (optional) URL from which to fetch data to fill the bundle; using this option will ignore any uploaded file data. Only supports one URL. - `git`: (optional) 1 if URL should be interpreted as git repos to clone or 0 otherwise, default is 0. - `filename`: (optional) filename of the uploaded file, used to indicate whether or not it is an archive, default is 'contents' - `unpack`: (optional) 1 if the uploaded file should be unpacked if it is an archive, or 0 otherwise, default is 1 - `finalize_on_failure`: (optional) 1 if bundle state should be set to 'failed' in the case of a failure during upload, or 0 if the bundle state should not change on failure. Default is 0. - `finalize_on_success`: (optional) 1 if bundle state should be set to 'state_on_success' when the upload finishes successfully. Default is True - `state_on_success`: (optional) Update the bundle state to this state if the upload completes successfully. Must be either 'ready' or 'failed'. Default is 'ready'. - `use_azure_blob_beta`: (optional) Use Azure Blob Storage to store the bundle. Default is False. If CODALAB_ALWAYS_USE_AZURE_BLOB_BETA is set, this parameter is disregarded, as Azure Blob Storage will always be used. """ check_bundles_have_all_permission(local.model, request.user, [uuid]) bundle = local.model.get_bundle(uuid) if bundle.state in State.FINAL_STATES: abort(http.client.FORBIDDEN, 'Contents cannot be modified, bundle already finalized.') # Get and validate query parameters finalize_on_failure = query_get_bool('finalize_on_failure', default=False) finalize_on_success = query_get_bool('finalize_on_success', default=True) use_azure_blob_beta = os.getenv("CODALAB_ALWAYS_USE_AZURE_BLOB_BETA") or query_get_bool( 'use_azure_blob_beta', default=False ) final_state = request.query.get('state_on_success', default=State.READY) if finalize_on_success and final_state not in State.FINAL_STATES: abort( http.client.BAD_REQUEST, 'state_on_success must be one of %s' % '|'.join(State.FINAL_STATES), ) # If this bundle already has data, remove it. if local.upload_manager.has_contents(bundle): local.upload_manager.cleanup_existing_contents(bundle) # Store the data. try: source = None if request.query.urls: sources = query_get_list('urls') if len(sources) != 1: abort(http.client.BAD_REQUEST, "Exactly one url must be provided.") source = sources[0] # request without "filename" doesn't need to upload to bundle store if request.query.filename: filename = request.query.get('filename', default='contents') source = (filename, request['wsgi.input']) bundle_link_url = getattr(bundle.metadata, "link_url", None) if bundle_link_url: # Don't upload to bundle store if using --link, as the path # already exists. pass elif source: local.upload_manager.upload_to_bundle_store( bundle, source=source, git=query_get_bool('git', default=False), unpack=query_get_bool('unpack', default=True), use_azure_blob_beta=use_azure_blob_beta, ) bundle_link_url = getattr(bundle.metadata, "link_url", None) bundle_location = bundle_link_url or local.bundle_store.get_bundle_location(bundle.uuid) local.model.update_disk_metadata(bundle, bundle_location, enforce_disk_quota=True) except UsageError as err: # This is a user error (most likely disk quota overuser) so raise a client HTTP error if local.upload_manager.has_contents(bundle): local.upload_manager.cleanup_existing_contents(bundle) msg = "Upload failed: %s" % err local.model.update_bundle( bundle, { 'state': State.FAILED, 'metadata': {'failure_message': msg, 'error_traceback': traceback.format_exc()}, }, ) abort(http.client.BAD_REQUEST, msg) except Exception as e: # Upload failed: cleanup, update state if desired, and return HTTP error if local.upload_manager.has_contents(bundle): local.upload_manager.cleanup_existing_contents(bundle) msg = "Upload failed: %s" % e # The client may not want to finalize the bundle on failure, to keep # open the possibility of retrying the upload in the case of transient # failure. # Workers also use this API endpoint to upload partial contents of # running bundles, and they should use finalize_on_failure=0 to avoid # letting transient errors during upload fail the bundles prematurely. if finalize_on_failure: local.model.update_bundle( bundle, { 'state': State.FAILED, 'metadata': {'failure_message': msg, 'error_traceback': traceback.format_exc()}, }, ) abort(http.client.INTERNAL_SERVER_ERROR, msg) else: if finalize_on_success: # Upload succeeded: update state local.model.update_bundle(bundle, {'state': final_state})
def update_bundle_metadata(self, uuid, metadata): check_bundles_have_all_permission(local.model, request.user, [uuid]) bundle = local.model.get_bundle(uuid) self.validate_user_metadata(bundle, metadata) local.model.update_bundle(bundle, {'metadata': metadata})
def _update_bundle_contents_blob(uuid): """ Update the contents of the given running or uploading bundle. Query parameters: urls - comma-separated list of URLs from which to fetch data to fill the bundle, using this option will ignore any uploaded file data git - (optional) 1 if URL should be interpreted as git repos to clone or 0 otherwise, default is 0 OR filename - (optional) filename of the uploaded file, used to indicate whether or not it is an archive, default is 'contents' Query parameters that are always available: unpack - (optional) 1 if the uploaded file should be unpacked if it is an archive, or 0 otherwise, default is 1 simplify - (optional) 1 if the uploaded file should be 'simplified' if it is an archive, or 0 otherwise, default is 1 (See UploadManager for full explanation of 'simplification') finalize_on_failure - (optional) True ('1') if bundle state should be set to 'failed' in the case of a failure during upload, or False ('0') if the bundle state should not change on failure. Default is False. state_on_success - (optional) Update the bundle state to this state if the upload completes successfully. Must be either 'ready' or 'failed'. Default is 'ready'. """ check_bundles_have_all_permission(local.model, request.user, [uuid]) bundle = local.model.get_bundle(uuid) if bundle.state in State.FINAL_STATES: abort(httplib.FORBIDDEN, 'Contents cannot be modified, bundle already finalized.') # Get and validate query parameters finalize_on_failure = query_get_bool('finalize_on_failure', default=False) final_state = request.query.get('state_on_success', default=State.READY) if final_state not in State.FINAL_STATES: abort( httplib.BAD_REQUEST, 'state_on_success must be one of %s' % '|'.join(State.FINAL_STATES)) # If this bundle already has data, remove it. if local.upload_manager.has_contents(bundle): local.upload_manager.cleanup_existing_contents(bundle) # Store the data. try: if request.query.urls: sources = query_get_list('urls') else: filename = request.query.get('filename', default='contents') sources = [(filename, request['wsgi.input'])] local.upload_manager.upload_to_bundle_store( bundle, sources=sources, follow_symlinks=False, exclude_patterns=None, remove_sources=False, git=query_get_bool('git', default=False), unpack=query_get_bool('unpack', default=True), simplify_archives=query_get_bool('simplify', default=True)) local.upload_manager.update_metadata_and_save(bundle, new_bundle=False) except Exception as e: # Upload failed: cleanup, update state if desired, and return HTTP error if local.upload_manager.has_contents(bundle): local.upload_manager.cleanup_existing_contents(bundle) msg = "Upload failed: %s" % e # The client may not want to finalize the bundle on failure, to keep # open the possibility of retrying the upload in the case of transient # failure. # Workers also use this API endpoint to upload partial contents of # running bundles, and they should use finalize_on_failure=0 to avoid # letting transient errors during upload fail the bundles prematurely. if finalize_on_failure: local.model.update_bundle(bundle, { 'state': State.FAILED, 'metadata': { 'failure_message': msg }, }) abort(httplib.INTERNAL_SERVER_ERROR, msg) else: # Upload succeeded: update state local.model.update_bundle(bundle, {'state': final_state})