def _handleTaskTagData(request): yield tasks_tx.updateTask(request.body['task_name'], lambda t : t.setState(tasks_tx.task.TASK_RUNNING).update(numTasks=1)) if 'urls' in request.body and request.body['urls']: metadata = func.updateDict(request.body['metadata'], {'urls': request.body['urls']}) else: metadata = request.body['metadata'] yield tagData(request.state, request.body['tag_name'], request.body['task_name'], request.body.get('files', []), metadata, request.body['action'], request.body.get('recursive', False), request.body.get('expand', False), request.body.get('compress_dir', None), filterF=_restrictDirs) yield tasks_tx.updateTask(request.body['task_name'], lambda t : t.progress()) defer.returnValue(request)
def _handleTaskTagData(request): yield tasks_tx.updateTask( request.body['task_name'], lambda t: t.setState(tasks_tx.task.TASK_RUNNING).update(numTasks=1)) if 'urls' in request.body and request.body['urls']: metadata = func.updateDict(request.body['metadata'], {'urls': request.body['urls']}) else: metadata = request.body['metadata'] yield tagData(request.state, request.body['tag_name'], request.body['task_name'], request.body.get('files', []), metadata, request.body['action'], request.body.get('recursive', False), request.body.get('expand', False), request.body.get('compress_dir', None), filterF=_restrictDirs) yield tasks_tx.updateTask(request.body['task_name'], lambda t: t.progress()) defer.returnValue(request)
def _handleTerminateInstances(request): yield tasks_tx.updateTask(request.body['task_name'], lambda t: t.setState(tasks_tx.task.TASK_RUNNING)) persistManager = request.state.persistManager cluster = yield persistManager.loadCluster(request.body['cluster_name'], request.body['user_name']) credClient = cred_client.CredentialClient(cluster.credName, request.mq, request.state.conf) if request.body['cluster_name'] != 'local': try: remoteTaskName = yield clusters_client_www.terminateInstances( cluster.master['public_dns'], 'local', request.body['user_name'], request.body['by_attribute'], request.body['attribute_values']) localTask = yield tasks_tx.loadTask(request.body['task_name']) yield tasks_tx.blockOnTaskAndForward('localhost', request.body['cluster_name'], remoteTaskName, localTask) except: yield terminateInstancesByAttribute( persistManager, credClient, request.body['cluster_name'], request.body['user_name'], request.body['by_attribute'], request.body['attribute_values']) else: cl = yield terminateInstancesByAttribute( persistManager, credClient, 'local', None, request.body['by_attribute'], request.body['attribute_values']) yield tasks_tx.updateTask(request.body['task_name'], lambda t: t.progress()) defer.returnValue(request)
def _handleTerminateCluster(request): # Start task running yield tasks_tx.updateTask(request.body['task_name'], lambda t: t.setState(tasks_tx.task.TASK_RUNNING)) persistManager = request.state.persistManager credClient = cred_client.CredentialClient('local', request.mq, request.state.conf) if request.body['cluster_name'] != 'local': cluster = yield terminateRemoteCluster(request) yield persistManager.saveCluster(cluster) removeTerminatedCluster(persistManager, credClient, request.body['cluster_name'], request.body['user_name']) else: if ('auth_token' in request.body and auth_token.validateToken(request.body['auth_token'])): yield terminateCluster(credClient, persistManager, 'local', request.body['user_name']) removeTerminatedCluster(persistManager, credClient, request.body['cluster_name'], request.body['user_name']) else: raise auth_token.AuthTokenError() yield tasks_tx.updateTask(request.body['task_name'], lambda t: t.progress()) defer.returnValue(request)
def _handleDeleteTag(request): yield tasks_tx.updateTask(request.body['task_name'], lambda t : t.setState(tasks_tx.task.TASK_RUNNING).update(numTasks=1)) yield request.state.tagPersist.removeTag(request.body['tag_name'], request.body.get('delete_everything', False)) yield tasks_tx.updateTask(request.body['task_name'], lambda t : t.progress()) defer.returnValue(request)
def instantiateAndSaveCredential(taskName, cred, credentialPersist): """ Make sure that the credential can be loaded, otherwise chuck it """ try: yield cred.ctype.instantiateCredential(cred.conf, cred) yield credentialPersist.saveCredential(cred) yield tasks_tx.updateTask(taskName, lambda t : t.setState(tasks_tx.task.TASK_COMPLETED).progress()) except Exception, err: log.err('Error loading credential') log.err(err) yield tasks_tx.updateTask(taskName, lambda t : t.setState(tasks_tx.task.TASK_FAILED))
def instantiateAndSaveCredential(taskName, cred, credentialPersist): """ Make sure that the credential can be loaded, otherwise chuck it """ try: yield cred.ctype.instantiateCredential(cred.conf, cred) yield credentialPersist.saveCredential(cred) yield tasks_tx.updateTask( taskName, lambda t: t.setState(tasks_tx.task.TASK_COMPLETED).progress()) except Exception, err: log.err('Error loading credential') log.err(err) yield tasks_tx.updateTask( taskName, lambda t: t.setState(tasks_tx.task.TASK_FAILED))
def _handleStartCluster(request): persistManager = request.state.persistManager yield tasks_tx.updateTask(request.body['task_name'], lambda t : t.setState(tasks_tx.task.TASK_RUNNING)) cluster = yield persistManager.loadCluster(request.body['cluster_name'], request.body['user_name']) cluster = cluster.update(startTask=request.body['task_name']) credClient = cred_client.CredentialClient(cluster.credName, request.mq, request.state.conf) try: cluster = yield instance_flow.startMaster(request.state, credClient, request.body['task_name'], cluster) except Exception, err: log.err('STARTCLUSETER: Failed') log.err(err) cluster = yield request.state.persistManager.loadCluster(request.body['cluster_name'], request.body['user_name']) cluster = cluster.setState(cluster.FAILED) yield defer_utils.sleep(120)() yield request.state.persistManager.removeCluster(request.body['cluster_name'], request.body['user_name']) raise err
def _handleStartCluster(request): persistManager = request.state.persistManager yield tasks_tx.updateTask(request.body['task_name'], lambda t: t.setState(tasks_tx.task.TASK_RUNNING)) cluster = yield persistManager.loadCluster(request.body['cluster_name'], request.body['user_name']) cluster = cluster.update(startTask=request.body['task_name']) credClient = cred_client.CredentialClient(cluster.credName, request.mq, request.state.conf) try: cluster = yield instance_flow.startMaster(request.state, credClient, request.body['task_name'], cluster) except Exception, err: log.err('STARTCLUSETER: Failed') log.err(err) cluster = yield request.state.persistManager.loadCluster( request.body['cluster_name'], request.body['user_name']) cluster = cluster.setState(cluster.FAILED) yield defer_utils.sleep(120)() yield request.state.persistManager.removeCluster( request.body['cluster_name'], request.body['user_name']) raise err
def runTaskletWithTask(taskName, initialText, tasklets): yield tasks.updateTask( taskName, lambda t: t.setState(task.TASK_RUNNING).addMessage( task.MSG_SILENT, 'Starting to run ' + ' | '.join(tasklets)).update( numTasks=len(tasklets))) try: output = yield runTasklet(taskName, initialText, tasklets) yield tasks.updateTask( taskName, lambda t: t.setState(task.TASK_COMPLETED).addMessage( task.MSG_NOTIFICATION, 'Completed').addResult(output)) except MetricError, err: yield tasks.updateTask( taskName, lambda t: t.setState(task.TASK_FAILED).addException( str(err), err, errors.getStacktrace())) raise err
def runTaskletWithTask(taskName, initialText, tasklets): yield tasks.updateTask(taskName, lambda t : t.setState(task.TASK_RUNNING ).addMessage(task.MSG_SILENT, 'Starting to run ' + ' | '.join(tasklets)).update(numTasks=len(tasklets))) try: output = yield runTasklet(taskName, initialText, tasklets) yield tasks.updateTask(taskName, lambda t : t.setState(task.TASK_COMPLETED ).addMessage(task.MSG_NOTIFICATION, 'Completed' ).addResult(output)) except MetricError, err: yield tasks.updateTask(taskName, lambda t : t.setState(task.TASK_FAILED ).addException(str(err), err, errors.getStacktrace())) raise err
def _handleTerminateInstances(request): yield tasks_tx.updateTask(request.body['task_name'], lambda t : t.setState(tasks_tx.task.TASK_RUNNING)) persistManager = request.state.persistManager cluster = yield persistManager.loadCluster(request.body['cluster_name'], request.body['user_name']) credClient = cred_client.CredentialClient(cluster.credName, request.mq, request.state.conf) if request.body['cluster_name'] != 'local': try: remoteTaskName = yield clusters_client_www.terminateInstances( cluster.master['public_dns'], 'local', request.body['user_name'], request.body['by_attribute'], request.body['attribute_values']) localTask = yield tasks_tx.loadTask(request.body['task_name']) yield tasks_tx.blockOnTaskAndForward('localhost', request.body['cluster_name'], remoteTaskName, localTask) except: yield terminateInstancesByAttribute(persistManager, credClient, request.body['cluster_name'], request.body['user_name'], request.body['by_attribute'], request.body['attribute_values']) else: cl = yield terminateInstancesByAttribute(persistManager, credClient, 'local', None, request.body['by_attribute'], request.body['attribute_values']) yield tasks_tx.updateTask(request.body['task_name'], lambda t : t.progress()) defer.returnValue(request)
def _updateTask(instances): def _addMsg(t): tt = t.addMessage(task.MSG_SILENT, msg) tt = tt.addMessage(task.MSG_SILENT, 'Instances: %d' % len(instances)) tt = tt.progress() return tt yield tasks_tx.updateTask(taskName, _addMsg) defer.returnValue(func.Record(succeeded=instances, failed=[]))
def _run(text, tasklet): stdout = StringIO.StringIO() stderr = StringIO.StringIO() p = commands.runProcess(parseCmd(tasklet), initialText=text, stdoutf=stdout.write, stderrf=stderr.write) p.addCallback(lambda _ : tasks.updateTask(taskName, lambda t : t.progress())) p.addCallback(lambda _ : stdout.getvalue()) p.addErrback(lambda _ : _raise(tasklet, stderr.getvalue())) return p
def _run(text, tasklet): stdout = StringIO.StringIO() stderr = StringIO.StringIO() p = commands.runProcess(parseCmd(tasklet), initialText=text, stdoutf=stdout.write, stderrf=stderr.write) p.addCallback( lambda _: tasks.updateTask(taskName, lambda t: t.progress())) p.addCallback(lambda _: stdout.getvalue()) p.addErrback(lambda _: _raise(tasklet, stderr.getvalue())) return p
def handleRealizePhantom(request): yield tasks_tx.updateTask(request.body['task_name'], lambda t : t.setState(tasks_tx.task.TASK_RUNNING).update(numTasks=1)) localClusters = yield www_clusters.listClusters('localhost', {'cluster_name': 'local'}, request.body['user_name']) localCluster = localClusters[0] ctype = localCluster['config']['general.ctype'] dstTagPath = os.path.join(localCluster['config']['dirs.upload_dir'], request.body['tag_name']) metadata = func.updateDict(request.body['metadata'], {'tag_base_dir': dstTagPath}) yield commands.runProcess(['mkdir', '-p', dstTagPath]) try: yield _realizePhantom(ctype, dstTagPath, request.body['phantom']) except RunCommandError, err: yield tasks_tx.updateTask(request.body['task_name'], lambda t : t.addMessage(tasks_tx.task.MSG_ERROR, str(err))) raise err
def _handleTerminateCluster(request): # Start task running yield tasks_tx.updateTask(request.body["task_name"], lambda t: t.setState(tasks_tx.task.TASK_RUNNING)) persistManager = request.state.persistManager credClient = cred_client.CredentialClient("local", request.mq, request.state.conf) if request.body["cluster_name"] != "local": cluster = yield terminateRemoteCluster(request) yield persistManager.saveCluster(cluster) removeTerminatedCluster(persistManager, credClient, request.body["cluster_name"], request.body["user_name"]) else: if "auth_token" in request.body and auth_token.validateToken(request.body["auth_token"]): yield terminateCluster(credClient, persistManager, "local", request.body["user_name"]) removeTerminatedCluster(persistManager, credClient, request.body["cluster_name"], request.body["user_name"]) else: raise auth_token.AuthTokenError() yield tasks_tx.updateTask(request.body["task_name"], lambda t: t.progress()) defer.returnValue(request)
def _handleImportCluster(request): """Imports a VM found on a remote host.""" persistManager = request.state.persistManager yield tasks_tx.updateTask(request.body['task_name'], lambda t: t.setState(tasks_tx.task.TASK_RUNNING)) cluster = yield request.state.persistManager.loadCluster(request.body['dst_cluster'], request.body['user_name']) cluster = cluster.update(startTask=request.body['task_name']) credClient = cred_client.CredentialClient(cluster.credName, request.mq, request.state.conf) try: cluster = yield instance_flow.importCluster(request.state, credClient, request.body['task_name'], request.body['host'], request.body['src_cluster'], cluster) except Exception, err: if isinstance(err, auth_token.AuthTokenError): log.err('IMPORTCLUSTER: Authorization failed') else: log.err('IMPORTCLUSTER: Failed') log.err(err) cluster = yield request.state.persistManager.loadCluster(request.body['dst_cluster'], request.body['user_name']) log.msg('DEBUG importcluster.py: cluster -', cluster) cluster = cluster.setState(cluster.FAILED) yield defer_utils.sleep(120)() log.msg('DEBUG importcluster.py: About to remove cluster') yield request.state.persistManager.removeCluster(request.body['dst_cluster'], request.body['user_name']) raise err
def _handleAddInstances(request): yield tasks_tx.updateTask(request.body['task_name'], lambda t: t.setState(tasks_tx.task.TASK_RUNNING)) cluster = yield request.state.persistManager.loadCluster( request.body['cluster'], request.body['user_name']) credClient = cred_client.CredentialClient(cluster.credName, request.mq, request.state.conf) cType = yield credClient.getCType() if cType != 'local': if request.body['num_exec'] > 0: yield instance_flow.startExecs( request.state, credClient, request.body['task_name'], request.body['num_exec'], request.body.get('exec_instance_type', None), cluster) defer.returnValue(request)
def _handleImportCluster(request): """Imports a VM found on a remote host.""" persistManager = request.state.persistManager yield tasks_tx.updateTask(request.body['task_name'], lambda t: t.setState(tasks_tx.task.TASK_RUNNING)) cluster = yield request.state.persistManager.loadCluster( request.body['dst_cluster'], request.body['user_name']) cluster = cluster.update(startTask=request.body['task_name']) credClient = cred_client.CredentialClient(cluster.credName, request.mq, request.state.conf) try: cluster = yield instance_flow.importCluster( request.state, credClient, request.body['task_name'], request.body['host'], request.body['src_cluster'], cluster) except Exception, err: if isinstance(err, auth_token.AuthTokenError): log.err('IMPORTCLUSTER: Authorization failed') else: log.err('IMPORTCLUSTER: Failed') log.err(err) cluster = yield request.state.persistManager.loadCluster( request.body['dst_cluster'], request.body['user_name']) log.msg('DEBUG importcluster.py: cluster -', cluster) cluster = cluster.setState(cluster.FAILED) yield defer_utils.sleep(120)() log.msg('DEBUG importcluster.py: About to remove cluster') yield request.state.persistManager.removeCluster( request.body['dst_cluster'], request.body['user_name']) raise err
def _updateTask(batchState, f): if 'clovr_wrapper_task_name' in batchState: task = yield tasks.updateTask(batchState['clovr_wrapper_task_name'], f) # This is cheap, but we need a way for the pipelines cache to realize # the pipeline we just modified the task for has been changed. We do # this by loading the config and resaving it, which cause an invalidation # in the cache. There is not a more direct way for an outside process # to cause an invalidation yet. pipeline = yield pipelines_client.pipelineList( 'localhost', 'local', 'guest', batchState['pipeline_name'], detail=True) pipeline = pipeline[0] yield pipelines_client.updateConfigPipeline( 'localhost', 'local', 'guest', {'pipeline_name': batchState['pipeline_name']}, pipeline['config']) defer.returnValue(task)
def _updateTask(batchState, f): if 'lgt_wrapper_task_name' in batchState: task = yield tasks.updateTask(batchState['lgt_wrapper_task_name'], f) # This is cheap, but we need a way for the pipelines cache to realize # the pipeline we just modified the task for has been changed. We do # this by loading the config and resaving it, which cause an invalidation # in the cache. There is not a more direct way for an outside process # to cause an invalidation yet. pipeline = yield pipelines_client.pipelineList('localhost', 'local', 'guest', batchState['pipeline_name'], detail=True) pipeline = pipeline[0] yield pipelines_client.updateConfigPipeline('localhost', 'local', 'guest', {'pipeline_name': batchState['pipeline_name']}, pipeline['config']) defer.returnValue(task)
def _handleAddInstances(request): yield tasks_tx.updateTask(request.body['task_name'], lambda t : t.setState(tasks_tx.task.TASK_RUNNING)) cluster = yield request.state.persistManager.loadCluster(request.body['cluster'], request.body['user_name']) credClient = cred_client.CredentialClient(cluster.credName, request.mq, request.state.conf) cType = yield credClient.getCType() if cType != 'local': if request.body['num_exec'] > 0: yield instance_flow.startExecs(request.state, credClient, request.body['task_name'], request.body['num_exec'], request.body.get('exec_instance_type', None), cluster) defer.returnValue(request)
def _handleTransferTag(request): yield tasks_tx.updateTask( request.body['task_name'], lambda t: t.setState(tasks_tx.task.TASK_RUNNING).update(numTasks=2)) srcTag = yield www_tags.loadTag('localhost', request.body['src_cluster'], request.body['user_name'], request.body['tag_name']) if not srcTag['phantom'] and (request.body['src_cluster'] != 'local' or request.body['dst_cluster'] != 'local'): if request.body['src_cluster'] == 'local': tag = yield _uploadTag(request) elif request.body['dst_cluster'] == 'local': tag = yield _downloadTag(request) else: raise NoLocalClusterError( 'Source cluster or destination cluster must be local') yield tasks_tx.updateTask(request.body['task_name'], lambda t: t.progress()) if request.body.get('compress', False) or request.body.get( 'compress_dir', False): defaultDir = '/mnt/output' if request.body[ 'dst_cluster'] == 'local' else tag.metadata['tag_base_dir'] compressDir = request.body.get('compress_dir') if request.body.get( 'compress_dir', False) else defaultDir else: compressDir = None if request.body['dst_cluster'] == 'local': yield tag_mq_data.tagData(request.state, request.body['tag_name'], request.body['task_name'], files=tag.files, action=tag_mq_data.ACTION_OVERWRITE, metadata=tag.metadata, recursive=False, expand=False, compressDir=compressDir) else: newTag = yield www_tags.tagData( 'localhost', request.body['dst_cluster'], request.body['user_name'], action=tag_mq_data.ACTION_OVERWRITE, tagName=tag.tagName, files=tag.files, metadata=tag.metadata, recursive=False, expand=False, compressDir=compressDir) localTask = yield tasks_tx.loadTask(request.body['task_name']) endState, tsk = yield tasks_tx.blockOnTaskAndForward( 'localhost', request.body['dst_cluster'], newTag['task_name'], localTask) if endState == tasks_tx.task.TASK_FAILED: yield tasks_tx.updateTask( request.body['task_name'], lambda t: t.setState(tasks_tx.task.TASK_FAILED)) raise TransferTagError(request.body['tag_name']) yield tasks_tx.updateTask(request.body['task_name'], lambda t: t.progress()) elif not srcTag['phantom'] and srcTag['metadata'].get( 'urls', []) and not srcTag['metadata'].get('urls_realized', False): # It's a local to local but we have urls and haven't realized them yield _realizeUrls(request) yield tasks_tx.updateTask(request.body['task_name'], lambda t: t.progress(2)) elif srcTag['phantom']: # Upload the depends file srcClusters = yield www_clusters.listClusters( 'localhost', {'cluster_name': request.body['src_cluster']}, request.body['user_name']) srcCluster = srcClusters[0] dstClusters = yield www_clusters.listClusters( 'localhost', {'cluster_name': request.body['dst_cluster']}, request.body['user_name']) dstCluster = dstClusters[0] dependsOn = srcTag['phantom'].get('depends_on', '').split() yield rsync.rsyncTo(dstCluster['master']['public_dns'], '/', '/', dependsOn, srcCluster['config']['rsync.options'], srcCluster['config']['rsync.user'], log=True) taskName = yield www_tags.realizePhantom('localhost', request.body['dst_cluster'], request.body['user_name'], srcTag['tag_name'], srcTag['phantom'], srcTag['metadata']) localTask = yield tasks_tx.loadTask(request.body['task_name']) endState, tsk = yield tasks_tx.blockOnTaskAndForward( 'localhost', request.body['dst_cluster'], taskName, localTask) if endState == tasks_tx.task.TASK_FAILED: yield tasks_tx.updateTask( request.body['task_name'], lambda t: t.setState(tasks_tx.task.TASK_FAILED)) raise RealizePhantomError(request.body['tag_name']) yield tasks_tx.updateTask(request.body['task_name'], lambda t: t.update(numTasks=1).progress()) else: yield tag_mq_data.tagData( request.state, request.body['tag_name'], request.body['task_name'], files=[], action=tag_mq_data.ACTION_APPEND, metadata={}, recursive=False, expand=False, compressDir='/mnt/output' if request.body.get('compress', False) else None) yield tasks_tx.updateTask(request.body['task_name'], lambda t: t.progress(2)) defer.returnValue(request)
def handleWWWRunPipeline(request): """ In the case of a pipeline we will do all the work necessary to run the pipeline and then setup a listener to run in the background tracking its progress. If bare_run is False then the pipeline run will actually be wrapped in `clovr_wrapper`. Otherwise a pipeline of the type pipeline.PIPELINE_TEMPLATE is run. Input: { cluster: string user_name: string ?parent_pipeline: string ?queue: string ?overwrite: boolean bare_run: boolean config: { key/value } } Output: lite_pipeline """ @defer.inlineCallbacks def _createPipeline(request): taskName = yield tasks_tx.createTaskAndSave('runPipelines', 0) # The name of a pipeline is being stored as a checksum. Pipeline names # are arbitrary and the user will likely never know or care what it is. # The pipeline name still exists though because other tools will likely # find it useful to refer to a pipeline by a particular name, but if # we decide to change the pipeline name to something more meaningful they # won't have to chagne their code to use pipelineName instead of checksum protocol = _determineProtocol(request) if not request.body['bare_run']: request.body['config']['pipeline.PIPELINE_WRAPPER_NAME'] = request.body['config']['pipeline.PIPELINE_NAME'] defer.returnValue(persist.Pipeline(pipelineId=None, pipelineName=checksum, userName=request.body['user_name'], protocol=protocol, checksum=checksum, taskName=taskName, queue=request.body.get('queue', 'pipelinewrapper.q'), children=[], config=request.body['config'])) @defer.inlineCallbacks def _startRemotePipeline(request): clusters = yield clusters_client.listClusters({'cluster_name': request.body['cluster']}, request.body['user_name']) cluster = clusters[0] # Forward the request on to the remote cluster, set parent_pipeline to None ret = yield pipelines_www_client.runPipeline(cluster['master']['public_dns'], 'local', request.body['user_name'], None, request.body['bare_run'], request.body.get('queue', 'pipelinewrapper.q'), request.body['config'], request.body.get('overwrite', False)) defer.returnValue(ret) # If the parent pipeline is set and doesn't exist, error if request.body.get('parent_pipeline'): parentPipelines = yield request.state.pipelinePersist.loadAllPipelinesBy({'pipeline_name': request.body['parent_pipeline']}, request.body['user_name']) if not parentPipelines: raise InvalidParentPipeline(request.body['parent_pipeline']) if len(parentPipelines) == 1: parentPipeline = parentPipelines[0] else: raise Exception('More than one possible parent pipeline choice, not sure what to do here') else: parentPipeline = None if request.body['cluster'] == 'local': checksum = pipeline_misc.checksumInput(request.body['config']) protocol = _determineProtocol(request) if protocol == 'clovr_batch_wrapper': errors = yield pipeline_misc.validateBatchPipelineConfig(request) else: errors = yield pipeline_misc.validatePipelineConfig(request) if errors: raise InvalidPipelineConfig('Configuration did not pass validation') request.body['config']['pipeline.PIPELINE_NAME'] = checksum try: # Pretty lame way to force control to the exceptional case # We aren't in a try block just for this line, though. The line # that loads the pipeline could also fail if request.body.get('overwrite', False): raise persist.PipelineNotFoundError('flow control') existingPipeline = yield request.state.pipelinePersist.loadPipelineBy({'checksum': checksum, 'protocol': protocol}, request.body['user_name']) pipelineDict = yield request.state.pipelinesCache.pipelineToDict(existingPipeline) defer.returnValue(request.update(response=pipelineDict)) except persist.PipelineNotFoundError: pipeline = yield _createPipeline(request) yield request.state.pipelinePersist.savePipeline(pipeline) # We want to do a deeper validation of the configuration and then run the pipeline. # Then we want to monitor it both through the ergatis observer and a timed update # of any children it has. # # We are going to do all this work in the background so we can exit the # handler. Since incoming requests are rate-limited, we don't want to # block the handler for too long. In this case we weren't pushing the # request and pipleine onto the queue for another handler to pick up # like we do in many other cases because we don't have to. Deeper # validation is through a tasklet which is rate limited and submitting # a pipeline and monitoring it are all fairly light operations. d = pipeline_misc.deepValidation(request, pipeline) d.addCallback(lambda p : pipeline_misc.runPipeline(request, p)) # runPipeline returns a pipeline monitor, not a pipeline d.addCallback(lambda pm : request.state.pipelinePersist.savePipeline(pm.pipeline).addCallback(lambda _ : pm.pipeline)) d.addErrback(lambda f : tasks_tx.updateTask(pipeline.taskName, lambda t : t.setState(tasks_tx.task.TASK_FAILED).addFailure(f))) pipelineDict = yield request.state.pipelinesCache.pipelineToDict(pipeline) if parentPipeline: parentPipeline = parentPipeline.update(children=list(set([tuple(e) for e in parentPipeline.children + [('local', pipeline.pipelineName)]]))) yield request.state.pipelinePersist.savePipeline(parentPipeline) defer.returnValue(request.update(response=pipelineDict)) else: pipelineDict = yield _startRemotePipeline(request) if parentPipeline: childPipeline = [(request.body['cluster'], pipelineDict['pipeline_name'])] parentPipeline = parentPipeline.update(children=list(set([tuple(e) for e in parentPipeline.children + childPipeline]))) yield request.state.pipelinePersist.savePipeline(parentPipeline) defer.returnValue(request.update(response=pipelineDict))
def _realizeUrls(request): localTag = yield request.state.tagPersist.loadTag(request.body['tag_name']) # If we have urls we create a fake phantom tag fakePhantom = {'cluster.ALL.command': 'reliableDownloader.py -m 300 -t 20 -b ${base_dir} ' + ' '.join(localTag.metadata['urls'])} taskName = yield www_tags.realizePhantom('localhost', request.body['dst_cluster'], request.body['user_name'], localTag.tagName, fakePhantom, func.updateDict(localTag.metadata, {'urls_realized': True})) localTask = yield tasks_tx.loadTask(request.body['task_name']) endState, tsk = yield tasks_tx.blockOnTaskAndForward('localhost', request.body['dst_cluster'], taskName, localTask) if endState == tasks_tx.task.TASK_FAILED: yield tasks_tx.updateTask(request.body['task_name'], lambda t : t.setState(tasks_tx.task.TASK_FAILED)) raise RealizePhantomError(request.body['tag_name']) if request.body['dst_cluster'] == 'local': yield tag_mq_data.tagData(request.state, request.body['tag_name'], request.body['task_name'], files=localTag.files, action=tag_mq_data.ACTION_APPEND, metadata={}, recursive=False, expand=False, compressDir=None) else: localTask = yield www_tags.tagData('localhost', request.body['dst_cluster'], request.body['user_name'], action=tag_mq_data.ACTION_APPEND, tagName=localTag.tagName, files=localTag.files, metadata={}, recursive=False, expand=False, compressDir=None) localTask = yield tasks_tx.loadTask(request.body['task_name']) endState, tsk = yield tasks_tx.blockOnTaskAndForward('localhost', request.body['dst_cluster'], taskName, localTask) if endState == tasks_tx.task.TASK_FAILED: yield tasks_tx.updateTask(request.body['task_name'], lambda t : t.setState(tasks_tx.task.TASK_FAILED)) raise RealizePhantomError(request.body['tag_name']) # Load the tag up and return it so we can have the files it created tag = yield www_tags.loadTag('localhost', request.body['dst_cluster'], request.body['user_name'], request.body['tag_name']) defer.returnValue(tag)
def _handleTransferTag(request): yield tasks_tx.updateTask(request.body['task_name'], lambda t : t.setState(tasks_tx.task.TASK_RUNNING).update(numTasks=2)) srcTag = yield www_tags.loadTag('localhost', request.body['src_cluster'], request.body['user_name'], request.body['tag_name']) if not srcTag['phantom'] and (request.body['src_cluster'] != 'local' or request.body['dst_cluster'] != 'local'): if request.body['src_cluster'] == 'local': tag = yield _uploadTag(request) elif request.body['dst_cluster'] == 'local': tag = yield _downloadTag(request) else: raise NoLocalClusterError('Source cluster or destination cluster must be local') yield tasks_tx.updateTask(request.body['task_name'], lambda t : t.progress()) if request.body.get('compress', False) or request.body.get('compress_dir', False): defaultDir = '/mnt/output' if request.body['dst_cluster'] == 'local' else tag.metadata['tag_base_dir'] compressDir = request.body.get('compress_dir') if request.body.get('compress_dir', False) else defaultDir else: compressDir = None if request.body['dst_cluster'] == 'local': yield tag_mq_data.tagData(request.state, request.body['tag_name'], request.body['task_name'], files=tag.files, action=tag_mq_data.ACTION_OVERWRITE, metadata=tag.metadata, recursive=False, expand=False, compressDir=compressDir) else: newTag = yield www_tags.tagData('localhost', request.body['dst_cluster'], request.body['user_name'], action=tag_mq_data.ACTION_OVERWRITE, tagName=tag.tagName, files=tag.files, metadata=tag.metadata, recursive=False, expand=False, compressDir=compressDir) localTask = yield tasks_tx.loadTask(request.body['task_name']) endState, tsk = yield tasks_tx.blockOnTaskAndForward('localhost', request.body['dst_cluster'], newTag['task_name'], localTask) if endState == tasks_tx.task.TASK_FAILED: yield tasks_tx.updateTask(request.body['task_name'], lambda t : t.setState(tasks_tx.task.TASK_FAILED)) raise TransferTagError(request.body['tag_name']) yield tasks_tx.updateTask(request.body['task_name'], lambda t : t.progress()) elif not srcTag['phantom'] and srcTag['metadata'].get('urls', []) and not srcTag['metadata'].get('urls_realized', False): # It's a local to local but we have urls and haven't realized them yield _realizeUrls(request) yield tasks_tx.updateTask(request.body['task_name'], lambda t : t.progress(2)) elif srcTag['phantom']: # Upload the depends file srcClusters = yield www_clusters.listClusters('localhost', {'cluster_name': request.body['src_cluster']}, request.body['user_name']) srcCluster = srcClusters[0] dstClusters = yield www_clusters.listClusters('localhost', {'cluster_name': request.body['dst_cluster']}, request.body['user_name']) dstCluster = dstClusters[0] dependsOn = srcTag['phantom'].get('depends_on', '').split() yield rsync.rsyncTo(dstCluster['master']['public_dns'], '/', '/', dependsOn, srcCluster['config']['rsync.options'], srcCluster['config']['rsync.user'], log=True) taskName = yield www_tags.realizePhantom('localhost', request.body['dst_cluster'], request.body['user_name'], srcTag['tag_name'], srcTag['phantom'], srcTag['metadata']) localTask = yield tasks_tx.loadTask(request.body['task_name']) endState, tsk = yield tasks_tx.blockOnTaskAndForward('localhost', request.body['dst_cluster'], taskName, localTask) if endState == tasks_tx.task.TASK_FAILED: yield tasks_tx.updateTask(request.body['task_name'], lambda t : t.setState(tasks_tx.task.TASK_FAILED)) raise RealizePhantomError(request.body['tag_name']) yield tasks_tx.updateTask(request.body['task_name'], lambda t : t.update(numTasks=1).progress()) else: yield tag_mq_data.tagData(request.state, request.body['tag_name'], request.body['task_name'], files=[], action=tag_mq_data.ACTION_APPEND, metadata={}, recursive=False, expand=False, compressDir='/mnt/output' if request.body.get('compress', False) else None) yield tasks_tx.updateTask(request.body['task_name'], lambda t : t.progress(2)) defer.returnValue(request)
clusters = yield clusters_www.listClusters('localhost', {'cluster_name': clusterName}, 'guest') cluster = clusters[0] yield pipeline_www.resumePipeline(cluster['master']['public_dns'], 'local', pipeline.userName, child) except Exception, err: log.err('Error trying to resume child') log.err(err) # Reset the pipeline task to IDLE yield tasks_tx.updateTask(pipeline.taskName, lambda t : t.setState(tasks_tx.task.TASK_IDLE)) if pipeline.pipelineId: cmd = ['resume_pipeline.pl', '--pipeline_id=' + pipeline.pipelineId, '--taskname=' + pipeline.taskName] if pipeline.queue: cmd.append('--queue=' + pipeline.queue) yield commands.getOutput(cmd, log=True) defer.returnValue(pipeline)
def handleWWWRunPipeline(request): """ In the case of a pipeline we will do all the work necessary to run the pipeline and then setup a listener to run in the background tracking its progress. If bare_run is False then the pipeline run will actually be wrapped in `clovr_wrapper`. Otherwise a pipeline of the type pipeline.PIPELINE_TEMPLATE is run. Input: { cluster: string user_name: string ?parent_pipeline: string ?queue: string ?overwrite: boolean bare_run: boolean config: { key/value } } Output: lite_pipeline """ @defer.inlineCallbacks def _createPipeline(request): taskName = yield tasks_tx.createTaskAndSave('runPipelines', 0) # The name of a pipeline is being stored as a checksum. Pipeline names # are arbitrary and the user will likely never know or care what it is. # The pipeline name still exists though because other tools will likely # find it useful to refer to a pipeline by a particular name, but if # we decide to change the pipeline name to something more meaningful they # won't have to chagne their code to use pipelineName instead of checksum protocol = _determineProtocol(request) if not request.body['bare_run']: request.body['config'][ 'pipeline.PIPELINE_WRAPPER_NAME'] = request.body['config'][ 'pipeline.PIPELINE_NAME'] defer.returnValue( persist.Pipeline(pipelineId=None, pipelineName=checksum, userName=request.body['user_name'], protocol=protocol, checksum=checksum, taskName=taskName, queue=request.body.get('queue', 'pipelinewrapper.q'), children=[], config=request.body['config'])) @defer.inlineCallbacks def _startRemotePipeline(request): clusters = yield clusters_client.listClusters( {'cluster_name': request.body['cluster']}, request.body['user_name']) cluster = clusters[0] # Forward the request on to the remote cluster, set parent_pipeline to None ret = yield pipelines_www_client.runPipeline( cluster['master']['public_dns'], 'local', request.body['user_name'], None, request.body['bare_run'], request.body.get('queue', 'pipelinewrapper.q'), request.body['config'], request.body.get('overwrite', False)) defer.returnValue(ret) # If the parent pipeline is set and doesn't exist, error if request.body.get('parent_pipeline'): parentPipelines = yield request.state.pipelinePersist.loadAllPipelinesBy( {'pipeline_name': request.body['parent_pipeline']}, request.body['user_name']) if not parentPipelines: raise InvalidParentPipeline(request.body['parent_pipeline']) if len(parentPipelines) == 1: parentPipeline = parentPipelines[0] else: raise Exception( 'More than one possible parent pipeline choice, not sure what to do here' ) else: parentPipeline = None if request.body['cluster'] == 'local': checksum = pipeline_misc.checksumInput(request.body['config']) protocol = _determineProtocol(request) if protocol == 'clovr_batch_wrapper': errors = yield pipeline_misc.validateBatchPipelineConfig(request) else: errors = yield pipeline_misc.validatePipelineConfig(request) if errors: raise InvalidPipelineConfig( 'Configuration did not pass validation') request.body['config']['pipeline.PIPELINE_NAME'] = checksum try: # Pretty lame way to force control to the exceptional case # We aren't in a try block just for this line, though. The line # that loads the pipeline could also fail if request.body.get('overwrite', False): raise persist.PipelineNotFoundError('flow control') existingPipeline = yield request.state.pipelinePersist.loadPipelineBy( { 'checksum': checksum, 'protocol': protocol }, request.body['user_name']) pipelineDict = yield request.state.pipelinesCache.pipelineToDict( existingPipeline) defer.returnValue(request.update(response=pipelineDict)) except persist.PipelineNotFoundError: pipeline = yield _createPipeline(request) yield request.state.pipelinePersist.savePipeline(pipeline) # We want to do a deeper validation of the configuration and then run the pipeline. # Then we want to monitor it both through the ergatis observer and a timed update # of any children it has. # # We are going to do all this work in the background so we can exit the # handler. Since incoming requests are rate-limited, we don't want to # block the handler for too long. In this case we weren't pushing the # request and pipleine onto the queue for another handler to pick up # like we do in many other cases because we don't have to. Deeper # validation is through a tasklet which is rate limited and submitting # a pipeline and monitoring it are all fairly light operations. d = pipeline_misc.deepValidation(request, pipeline) d.addCallback(lambda p: pipeline_misc.runPipeline(request, p)) # runPipeline returns a pipeline monitor, not a pipeline d.addCallback( lambda pm: request.state.pipelinePersist.savePipeline( pm.pipeline).addCallback(lambda _: pm.pipeline)) d.addErrback(lambda f: tasks_tx.updateTask( pipeline.taskName, lambda t: t.setState( tasks_tx.task.TASK_FAILED).addFailure(f))) pipelineDict = yield request.state.pipelinesCache.pipelineToDict( pipeline) if parentPipeline: parentPipeline = parentPipeline.update(children=list( set([ tuple(e) for e in parentPipeline.children + [('local', pipeline.pipelineName)] ]))) yield request.state.pipelinePersist.savePipeline( parentPipeline) defer.returnValue(request.update(response=pipelineDict)) else: pipelineDict = yield _startRemotePipeline(request) if parentPipeline: childPipeline = [(request.body['cluster'], pipelineDict['pipeline_name'])] parentPipeline = parentPipeline.update(children=list( set([ tuple(e) for e in parentPipeline.children + childPipeline ]))) yield request.state.pipelinePersist.savePipeline(parentPipeline) defer.returnValue(request.update(response=pipelineDict))
def loadLocalCluster(mq, state): """ If local cluster is not present, load it """ def _credential(): if os.path.exists('/tmp/cred-info'): cert, pkey, ctype, metadata = open('/tmp/cred-info').read().split('\t') return {'name': 'local', 'desc': 'Local credential', 'ctype': ctype, 'cert': open(cert).read(), 'pkey': open(pkey).read(), 'metadata': metadata and dict([v.split('=', 1) for v in metadata.split(',')]) or {}, 'conf': config.configFromStream(open('/tmp/machine.conf'), lazy=True)} else: return {'name': 'local', 'desc': 'Local credential', 'ctype': 'local', 'cert': None, 'pkey': None, 'metadata': {}, 'conf': config.configFromMap({})} try: cluster = yield state.persistManager.loadCluster('local', None) baseConf = config.configFromStream(open('/tmp/machine.conf'), base=config.configFromEnv()) conf = config.configFromMap({'config_loaded': True, 'cluster.cluster_public_key': '/mnt/keys/devel1.pem.pub'}, base=baseConf) if (cluster.credName == 'local' and conf('MASTER_IP') not in [cluster.master['public_dns'], cluster.master['private_dns']]): master = dict(instance_id='local', ami_id=None, public_dns=conf('MASTER_IP'), private_dns=conf('MASTER_IP'), state='running', key=None, index=None, instance_type=None, launch=None, availability_zone=None, monitor=None, spot_request_id=None, bid_price=None) cluster = cluster.setMaster(master).update(config=conf) yield state.persistManager.saveCluster(cluster) defer.returnValue(cluster) except persist.ClusterNotFoundError: credential = _credential() credTaskName = yield cred_client.saveCredential(credential['name'], credential['desc'], credential['ctype'], credential['cert'], credential['pkey'], credential['metadata'], credential['conf']) ## Wait for credential to be added. ## TODO: Should handle failure here yield tasks_tx.blockOnTask('localhost', 'local', credTaskName) credClient = cred_client.CredentialClient('local', mq, state.conf) ## If it isn't a local ctype then we need to wait for ## the credential to come alive if credential['ctype'] != 'local': instances = yield credClient.listInstances() else: instances = [] baseConf = config.configFromStream(open('/tmp/machine.conf'), base=config.configFromEnv()) conf = config.configFromMap({'config_loaded': True, 'cluster.cluster_public_key': '/mnt/keys/devel1.pem.pub'}, base=baseConf) cluster = persist.Cluster('local', None, 'local', conf) startTaskName = yield tasks_tx.createTaskAndSave('startCluster', 1) yield tasks_tx.updateTask(startTaskName, lambda t : t.setState(tasks_tx.task.TASK_COMPLETED).progress()) cluster = cluster.update(startTask=startTaskName) masterIp = cluster.config('MASTER_IP') masterIdx = func.find(lambda i : masterIp in [i['public_dns'], i['private_dns']], instances) if masterIdx is not None: master = instances[masterIdx] else: master = dict(instance_id='local', ami_id=None, public_dns=masterIp, private_dns=masterIp, state='running', key=None, index=None, instance_type=None, launch=None, availability_zone=None, monitor=None, spot_request_id=None, bid_price=None) cluster = cluster.setMaster(master) cluster = cluster.setState(cluster.RUNNING) yield state.persistManager.saveCluster(cluster) defer.returnValue(cluster)
yield tasks_tx.updateTask(request.body['task_name'], lambda t : t.addMessage(tasks_tx.task.MSG_ERROR, str(err))) raise err yield tag_mq_data.tagData(request.state, tagName=request.body['tag_name'], taskName=request.body['task_name'], files=[dstTagPath], metadata=metadata, action=tag_mq_data.ACTION_OVERWRITE, recursive=True, expand=True, compressDir=None, deleteOnExpand=True) yield tasks_tx.updateTask(request.body['task_name'], lambda t : t.progress()) defer.returnValue(request) def _forwardToCluster(conf, queueUrl): return queue.forwardRequestToCluster(conf('www.url_prefix') + '/' + os.path.basename(queueUrl)) def subscribe(mq, state): processRealizePhantom = queue.returnResponse(defer_pipe.pipe([queue.keysInBody(['cluster', 'tag_name', 'user_name', 'phantom', 'metadata']), _forwardToCluster(state.conf, state.conf('tags.realize_www')), queue.createTaskAndForward(state.conf('tags.realize_queue'),
def _realizeUrls(request): localTag = yield request.state.tagPersist.loadTag(request.body['tag_name']) # If we have urls we create a fake phantom tag fakePhantom = { 'cluster.ALL.command': 'reliableDownloader.py -m 300 -t 20 -b ${base_dir} ' + ' '.join(localTag.metadata['urls']) } taskName = yield www_tags.realizePhantom( 'localhost', request.body['dst_cluster'], request.body['user_name'], localTag.tagName, fakePhantom, func.updateDict(localTag.metadata, {'urls_realized': True})) localTask = yield tasks_tx.loadTask(request.body['task_name']) endState, tsk = yield tasks_tx.blockOnTaskAndForward( 'localhost', request.body['dst_cluster'], taskName, localTask) if endState == tasks_tx.task.TASK_FAILED: yield tasks_tx.updateTask( request.body['task_name'], lambda t: t.setState(tasks_tx.task.TASK_FAILED)) raise RealizePhantomError(request.body['tag_name']) if request.body['dst_cluster'] == 'local': yield tag_mq_data.tagData(request.state, request.body['tag_name'], request.body['task_name'], files=localTag.files, action=tag_mq_data.ACTION_APPEND, metadata={}, recursive=False, expand=False, compressDir=None) else: localTask = yield www_tags.tagData('localhost', request.body['dst_cluster'], request.body['user_name'], action=tag_mq_data.ACTION_APPEND, tagName=localTag.tagName, files=localTag.files, metadata={}, recursive=False, expand=False, compressDir=None) localTask = yield tasks_tx.loadTask(request.body['task_name']) endState, tsk = yield tasks_tx.blockOnTaskAndForward( 'localhost', request.body['dst_cluster'], taskName, localTask) if endState == tasks_tx.task.TASK_FAILED: yield tasks_tx.updateTask( request.body['task_name'], lambda t: t.setState(tasks_tx.task.TASK_FAILED)) raise RealizePhantomError(request.body['tag_name']) # Load the tag up and return it so we can have the files it created tag = yield www_tags.loadTag('localhost', request.body['dst_cluster'], request.body['user_name'], request.body['tag_name']) defer.returnValue(tag)
def loadLocalCluster(mq, state): """ If local cluster is not present, load it """ def _credential(): if os.path.exists('/tmp/cred-info'): cert, pkey, ctype, metadata = open('/tmp/cred-info').read().split( '\t') return { 'name': 'local', 'desc': 'Local credential', 'ctype': ctype, 'cert': open(cert).read(), 'pkey': open(pkey).read(), 'metadata': metadata and dict([v.split('=', 1) for v in metadata.split(',')]) or {}, 'conf': config.configFromStream(open('/tmp/machine.conf'), lazy=True) } else: return { 'name': 'local', 'desc': 'Local credential', 'ctype': 'local', 'cert': None, 'pkey': None, 'metadata': {}, 'conf': config.configFromMap({}) } try: cluster = yield state.persistManager.loadCluster('local', None) baseConf = config.configFromStream(open('/tmp/machine.conf'), base=config.configFromEnv()) conf = config.configFromMap( { 'config_loaded': True, 'cluster.cluster_public_key': '/mnt/keys/devel1.pem.pub' }, base=baseConf) if (cluster.credName == 'local' and conf('MASTER_IP') not in [ cluster.master['public_dns'], cluster.master['private_dns'] ]): master = dict(instance_id='local', ami_id=None, public_dns=conf('MASTER_IP'), private_dns=conf('MASTER_IP'), state='running', key=None, index=None, instance_type=None, launch=None, availability_zone=None, monitor=None, spot_request_id=None, bid_price=None) cluster = cluster.setMaster(master).update(config=conf) yield state.persistManager.saveCluster(cluster) defer.returnValue(cluster) except persist.ClusterNotFoundError: credential = _credential() credTaskName = yield cred_client.saveCredential( credential['name'], credential['desc'], credential['ctype'], credential['cert'], credential['pkey'], credential['metadata'], credential['conf']) ## Wait for credential to be added. ## TODO: Should handle failure here yield tasks_tx.blockOnTask('localhost', 'local', credTaskName) credClient = cred_client.CredentialClient('local', mq, state.conf) ## If it isn't a local ctype then we need to wait for ## the credential to come alive if credential['ctype'] != 'local': instances = yield credClient.listInstances() else: instances = [] baseConf = config.configFromStream(open('/tmp/machine.conf'), base=config.configFromEnv()) conf = config.configFromMap( { 'config_loaded': True, 'cluster.cluster_public_key': '/mnt/keys/devel1.pem.pub' }, base=baseConf) cluster = persist.Cluster('local', None, 'local', conf) startTaskName = yield tasks_tx.createTaskAndSave('startCluster', 1) yield tasks_tx.updateTask( startTaskName, lambda t: t.setState(tasks_tx.task.TASK_COMPLETED).progress()) cluster = cluster.update(startTask=startTaskName) masterIp = cluster.config('MASTER_IP') masterIdx = func.find( lambda i: masterIp in [i['public_dns'], i['private_dns']], instances) if masterIdx is not None: master = instances[masterIdx] else: master = dict(instance_id='local', ami_id=None, public_dns=masterIp, private_dns=masterIp, state='running', key=None, index=None, instance_type=None, launch=None, availability_zone=None, monitor=None, spot_request_id=None, bid_price=None) cluster = cluster.setMaster(master) cluster = cluster.setState(cluster.RUNNING) yield state.persistManager.saveCluster(cluster) defer.returnValue(cluster)