def compute(self): machine = self.get_machine() jm = JobMonitor.getInstance() cache = jm.getCache(self.signature) if cache: result = cache.parameters['result'] else: if not self.has_input('local_file'): raise ModuleError(self, "No local file specified") local_file = self.get_input('local_file').strip() if not self.has_input('remote_file'): raise ModuleError(self, "No remote file specified") remote_file = self.get_input('remote_file').strip() whereto = 'remote' if self.has_input('to_local') and self.get_input('to_local'): whereto = 'local' ## This indicates that the coming commands submitted on the machine # trick to select machine without initializing every time command = machine.getfile if whereto=='local' else machine.sendfile result = command(local_file, remote_file) d = {'result':result} self.set_job_machine(d, machine) jm.setCache(self.signature, d, self.getName()) self.set_output("machine", machine) self.set_output("output", result)
def compute(self): self.is_cacheable = lambda *args, **kwargs: False if not self.hasInputFromPort('machine'): raise ModuleError(self, "No machine specified") machine = self.getInputFromPort('machine').machine if not self.hasInputFromPort('local_directory'): raise ModuleError(self, "No local directory specified") local_directory = self.getInputFromPort('local_directory').strip() if not self.hasInputFromPort('remote_directory'): raise ModuleError(self, "No remote directory specified") remote_directory = self.getInputFromPort('remote_directory').strip() whereto = 'remote' if self.hasInputFromPort('to_local') and self.getInputFromPort( 'to_local'): whereto = 'local' jm = JobMonitor.getInstance() cache = jm.getCache(self.signature) if not cache: ## This indicates that the coming commands submitted on the machine # trick to select machine without initializing every time use_machine(machine) to_dir = local_directory if whereto == 'local' else remote_directory cdir = CreateDirectory(whereto, to_dir) job = TransferFiles(whereto, local_directory, remote_directory, dependencies=[cdir]) job.run() end_machine() cache = jm.setCache(self.signature, {'result': ''}) self.setResult("machine", machine)
def compute(self): self.is_cacheable = lambda *args, **kwargs: False if not self.hasInputFromPort('machine'): raise ModuleError(self, "No machine specified") machine = self.getInputFromPort('machine').machine if not self.hasInputFromPort('local_directory'): raise ModuleError(self, "No local directory specified") local_directory = self.getInputFromPort('local_directory').strip() if not self.hasInputFromPort('remote_directory'): raise ModuleError(self, "No remote directory specified") remote_directory = self.getInputFromPort('remote_directory').strip() whereto = 'remote' if self.hasInputFromPort('to_local') and self.getInputFromPort('to_local'): whereto = 'local' jm = JobMonitor.getInstance() cache = jm.getCache(self.signature) if not cache: ## This indicates that the coming commands submitted on the machine # trick to select machine without initializing every time use_machine(machine) to_dir = local_directory if whereto=='local' else remote_directory cdir = CreateDirectory(whereto, to_dir) job = TransferFiles(whereto, local_directory, remote_directory, dependencies = [cdir]) job.run() end_machine() cache = jm.setCache(self.signature, {'result':''}) self.setResult("machine", machine)
def compute(self): machine = self.get_machine() jm = JobMonitor.getInstance() id = self.signature job = jm.getCache(id) if not job: remote = self.get_input('Remote Location') local = self.get_input('Local File') override = self.force_get_input('Override', False) if '://' not in remote: remote = self.add_prefix(remote, machine) if not int(self.call_hdfs('dfs -test -e ' + remote + '; echo $?', machine)): if override: self.call_hdfs('dfs -rm -r ' + remote, machine) else: raise ModuleError(self, 'Remote entry already exists') tempfile = machine.remote.send_command('mktemp -u').strip() result = machine.sendfile(local.name, tempfile) self.call_hdfs('dfs -put %s %s' % (tempfile, remote), machine) result = machine.remote.rm(tempfile,force=True,recursively=True) d = {'remote':remote,'local':local.name} self.set_job_machine(d, machine) jm.setCache(id, d, self.getName()) job = jm.getJob(id) self.set_output('Remote Location', job.parameters['remote']) self.set_output('Machine', machine)
def compute(self): if not self.hasInputFromPort('machine'): raise ModuleError(self, "No machine specified") machine = self.getInputFromPort('machine').machine if not self.hasInputFromPort('local_file'): raise ModuleError(self, "No local file specified") local_file = self.getInputFromPort('local_file').strip() if not self.hasInputFromPort('remote_file'): raise ModuleError(self, "No remote file specified") remote_file = self.getInputFromPort('remote_file').strip() whereto = 'remote' if self.hasInputFromPort('to_local') and self.getInputFromPort('to_local'): whereto = 'local' jm = JobMonitor.getInstance() cache = jm.getCache(self.signature) if cache: result = cache['result'] else: ## This indicates that the coming commands submitted on the machine # trick to select machine without initializing every time command = machine.getfile if whereto=='local' else machine.sendfile result = command(local_file, remote_file) cache = jm.setCache(self.signature, {'result':result}) self.setResult("machine", self.getInputFromPort('machine')) self.setResult("output", result)
def compute(self): if not self.hasInputFromPort('machine'): raise ModuleError(self, "No machine specified") machine = self.getInputFromPort('machine').machine if not self.hasInputFromPort('local_file'): raise ModuleError(self, "No local file specified") local_file = self.getInputFromPort('local_file').strip() if not self.hasInputFromPort('remote_file'): raise ModuleError(self, "No remote file specified") remote_file = self.getInputFromPort('remote_file').strip() whereto = 'remote' if self.hasInputFromPort('to_local') and self.getInputFromPort( 'to_local'): whereto = 'local' jm = JobMonitor.getInstance() cache = jm.getCache(self.signature) if cache: result = cache['result'] else: ## This indicates that the coming commands submitted on the machine # trick to select machine without initializing every time command = machine.getfile if whereto == 'local' else machine.sendfile result = command(local_file, remote_file) cache = jm.setCache(self.signature, {'result': result}) self.setResult("machine", self.getInputFromPort('machine')) self.setResult("output", result)
def compute(self): machine = self.get_machine() jm = JobMonitor.getInstance() cache = jm.getCache(self.signature) if not cache: if not self.has_input('local_directory'): raise ModuleError(self, "No local directory specified") local_directory = self.get_input('local_directory').strip() if not self.has_input('remote_directory'): raise ModuleError(self, "No remote directory specified") remote_directory = self.get_input('remote_directory').strip() whereto = 'remote' if self.has_input('to_local') and self.get_input('to_local'): whereto = 'local' use_machine(machine) to_dir = local_directory if whereto=='local' else remote_directory cdir = CreateDirectory(whereto, to_dir) job = TransferFiles(whereto, local_directory, remote_directory, dependencies = [cdir]) job.run() end_machine() d = {} self.set_job_machine(d, machine) cache = jm.setCache(self.signature, d, self.getName()) self.set_output("machine", machine)
def get_job_machine(self): """ Get machine info from job """ jm = JobMonitor.getInstance() if jm.hasJob(self.getId({})): params = jm.getJob(self.signature).parameters if 'server' in params: return (params['server'], params['port'], params['username'], params['password'])
def _handle_suspended(self, obj, error): """ _handle_suspended(obj: VistrailsModule, error: ModuleSuspended ) -> None Report module as suspended """ # update job monitor because this may be an oldStyle job jm = JobMonitor.getInstance() reg = get_module_registry() name = reg.get_descriptor(obj.__class__).name i = "%s" % self.remap_id(obj.id) iteration = self.log.get_iteration_from_module(obj) if iteration is not None: name = name + '/' + str(iteration) i = i + '/' + str(iteration) # add to parent list for computing the module tree later error.name = name # if signature is not set we use the module identifier if not error.signature: error.signature = obj.signature jm.addParent(error)
def compute(self): machine = self.get_machine() if self.cache: result = self.cache.parameters['result'] else: if not self.has_input('command'): raise ModuleError(self, "No command specified") command = self.get_input('command').strip() ## This indicates that the coming commands submitted on the machine # trick to select machine without initializing every time use_machine(machine) m = current_machine() result = m.remote.send_command(command) end_machine() jm = JobMonitor.getInstance() d = {'result':result} self.set_job_machine(d, machine) jm.setCache(self.signature, d, self.getName()) self.set_output("output", result) self.set_output("machine", machine)
def compute(self): machine = self.get_machine() jm = JobMonitor.getInstance() id = self.signature job = jm.getCache(id) if not job: entry_name = self.get_input('Name') if '://' not in entry_name: entry_name = self.add_prefix(entry_name, machine) if not int(self.call_hdfs('dfs -test -e ' + entry_name + '; echo $?', machine)): #self.call_hdfs('dfs -rm -r ' + entry_name, machine) # we are using -rmr but it is deprecated self.call_hdfs('dfs -rmr ' + entry_name, machine) d = {'entry_name':entry_name} self.set_job_machine(d, machine) jm.setCache(id, d, self.getName()) job = jm.getCache(id) self.set_output('Name', job.parameters['entry_name']) self.set_output('Machine', machine)
def compute(self): machine = self.get_machine() jm = JobMonitor.getInstance() id = self.signature job = jm.getCache(id) if not job: uri = self.force_get_input('HDFS File/URI') symlink = self.force_get_input('Symlink') if uri==None or symlink==None: raise ModuleError(self, "Missing 'HDFS File/URI' or 'Symlink' values") if '://' not in uri: uri = self.add_prefix(uri, machine) uri += '#' + symlink d = {'uri':uri} self.set_job_machine(d, machine) jm.setCache(id, d, self.getName()) job = jm.getCache(id) self.set_output('URI', job.parameters['uri']) self.set_output('Machine', machine)
def compute(self): if not self.hasInputFromPort('machine'): raise ModuleError(self, "No machine specified") if not self.hasInputFromPort('command'): raise ModuleError(self, "No command specified") command = self.getInputFromPort('command').strip() machine = self.getInputFromPort('machine').machine jm = JobMonitor.getInstance() cache = jm.getCache(self.signature) if cache: result = cache['result'] else: ## This indicates that the coming commands submitted on the machine # trick to select machine without initializing every time use_machine(machine) m = current_machine() result = m.remote.send_command(command) end_machine() cache = jm.setCache(self.signature, {'result':result}) self.setResult("output", result) self.setResult("machine", self.getInputFromPort('machine'))
def compute(self): if not self.hasInputFromPort('machine'): raise ModuleError(self, "No machine specified") if not self.hasInputFromPort('command'): raise ModuleError(self, "No command specified") command = self.getInputFromPort('command').strip() machine = self.getInputFromPort('machine').machine jm = JobMonitor.getInstance() cache = jm.getCache(self.signature) if cache: result = cache['result'] else: ## This indicates that the coming commands submitted on the machine # trick to select machine without initializing every time use_machine(machine) m = current_machine() result = m.remote.send_command(command) end_machine() cache = jm.setCache(self.signature, {'result': result}) self.setResult("output", result) self.setResult("machine", self.getInputFromPort('machine'))
def compute(self): machine = self.get_machine() jm = JobMonitor.getInstance() id = self.signature job = jm.getCache(id) if not job: remote = self.get_input('Remote Location') local = self.get_input('Local File') override = self.force_get_input('Override', False) if '://' not in remote: remote = self.add_prefix(remote, machine) if os.path.exists(local.name): if override==False: raise ModuleError(self, 'Output already exists') else: if os.path.isdir(local.name): shutil.rmtree(local.name) else: os.unlink(local.name) tempfile = machine.remote.send_command('mktemp -d -u').strip() result = self.call_hdfs('dfs -get %s %s' % (remote, tempfile), machine) # too slow with many files #res = machine.send_command("get -r %s %s" % (tempfile, local.name) ) # tar files to increase speed result = machine.local.send_command('mkdir %s'%local.name) result = machine.sync(local.name, tempfile, mode=machine.MODE_REMOTE_LOCAL, use_tar=True) result = machine.remote.rm(tempfile,force=True,recursively=True) d = {'remote':remote,'local':local.name} self.set_job_machine(d, machine) jm.setCache(id, d, self.getName()) job = jm.getCache(id) self.set_output('Local File', PathObject(job.parameters['local'])) self.set_output('Machine', machine)
def __init__(self, parent=None): QtGui.QWidget.__init__(self, parent) self.jobMonitor = JobMonitor.getInstance() self.jobMonitor.setCallback(self) self.timer_id = None self.workflowItems = {} self.layout = QtGui.QVBoxLayout() # self.layout.setContentsMargins(5, 5, 0, 0) buttonsLayout = QtGui.QHBoxLayout() #buttonsLayout.setMargin(5) #buttonsLayout.setSpacing(5) run_now = QDockPushButton("Check now") run_now.setToolTip("Check all jobs now") run_now.clicked.connect(self.timerEvent) buttonsLayout.addWidget(run_now) label = QtGui.QLabel('Refresh interval (seconds):') buttonsLayout.addWidget(label) self.interval = QtGui.QComboBox() for text, seconds in refresh_states: self.interval.addItem(text, seconds) self.interval.editTextChanged.connect(self.set_refresh) self.interval.setEditable(True) self.interval.setCurrentIndex(self.interval.findText('10 min')) self.interval.setCompleter(None) self.interval.setValidator(QNumberValidator()) conf = configuration.get_vistrails_configuration() if conf.jobCheckInterval and conf.jobCheckInterval != 10: self.interval.setEditText(str(conf.jobCheckInterval)) buttonsLayout.addWidget(self.interval) self.autorun = QtGui.QCheckBox("Automatic re-execution") self.autorun.setToolTip("Automatically re-execute workflow when jobs " "complete") self.autorun.toggled.connect(self.autorunToggled) if conf.jobAutorun: self.autorun.setChecked(True) buttonsLayout.addWidget(self.autorun) buttonsLayout.addStretch(1) self.layout.addLayout(buttonsLayout) self.jobView = QtGui.QTreeWidget() self.jobView.setContentsMargins(0, 0, 0, 0) self.jobView.setColumnCount(2) self.jobView.setHeaderLabels(['Job', 'Message']) self.jobView.header().setResizeMode(0, QtGui.QHeaderView.ResizeToContents) self.jobView.header().setResizeMode(1, QtGui.QHeaderView.Stretch) self.jobView.setExpandsOnDoubleClick(False) self.connect(self.jobView, QtCore.SIGNAL('itemDoubleClicked(QTreeWidgetItem *, int)'), self.item_selected) self.layout.addWidget(self.jobView) self.setLayout(self.layout) self.setWindowTitle('Running Jobs') self.resize(QtCore.QSize(800, 600)) self.updating_now = False
def run_and_get_results(w_list, parameters='', output_dir=None, update_vistrail=True, extra_info=None, reason='Console Mode Execution'): """run_and_get_results(w_list: list of (locator, version), parameters: str, output_dir:str, update_vistrail: boolean, extra_info:dict) Run all workflows in w_list, and returns an interpreter result object. version can be a tag name or a version id. """ elements = parameters.split("$&$") aliases = {} params = [] result = [] for locator, workflow in w_list: (v, abstractions , thumbnails, mashups) = load_vistrail(locator) controller = VistrailController(v, locator, abstractions, thumbnails, mashups, auto_save=update_vistrail) if isinstance(workflow, basestring): version = v.get_version_number(workflow) elif isinstance(workflow, (int, long)): version = workflow elif workflow is None: version = controller.get_latest_version_in_graph() else: msg = "Invalid version tag or number: %s" % workflow raise VistrailsInternalError(msg) controller.change_selected_version(version) for e in elements: pos = e.find("=") if pos != -1: key = e[:pos].strip() value = e[pos+1:].strip() if controller.current_pipeline.has_alias(key): aliases[key] = value elif 'mashup_id' in extra_info: # new-style mashups can have aliases not existing in pipeline for mashuptrail in mashups: if mashuptrail.vtVersion == version: mashup = mashuptrail.getMashup(extra_info['mashup_id']) c = mashup.getAliasByName(key).component params.append((c.vttype, c.vtid, value)) if output_dir is not None and controller.current_pipeline is not None: # FIXME DAK: why is this always done?!? there is a flag for it... if is_running_gui(): controller.updatePipelineScene() base_fname = "%s_%s_pipeline.pdf" % (locator.short_filename, version) filename = os.path.join(output_dir, base_fname) controller.current_pipeline_scene.saveToPDF(filename) else: debug.critical("Cannot save pipeline figure when not " "running in gui mode") base_fname = "%s_%s_pipeline.xml" % (locator.short_filename, version) filename = os.path.join(output_dir, base_fname) vistrails.core.db.io.save_workflow(controller.current_pipeline, filename) if not update_vistrail: conf = get_vistrails_configuration() if conf.has('thumbs'): conf.thumbs.autoSave = False jobMonitor = JobMonitor.getInstance() current_workflow = jobMonitor.currentWorkflow() if not current_workflow: for job in jobMonitor._running_workflows.itervalues(): try: job_version = int(job.version) except ValueError: job_version = v.get_version_number(job.version) if version == job_version and locator.to_url() == job.vistrail: current_workflow = job jobMonitor.startWorkflow(job) if not current_workflow: current_workflow = JobWorkflow(locator.to_url(), version) jobMonitor.getInstance().startWorkflow(current_workflow) try: (results, _) = \ controller.execute_current_workflow(custom_aliases=aliases, custom_params=params, extra_info=extra_info, reason=reason) finally: jobMonitor.finishWorkflow() new_version = controller.current_version if new_version != version: debug.log("Version '%s' (%s) was upgraded. The actual " "version executed was %s" % ( workflow, version, new_version)) run = results[0] run.workflow_info = (locator.name, new_version) run.pipeline = controller.current_pipeline if update_vistrail: controller.write_vistrail(locator) result.append(run) if current_workflow.modules: if current_workflow.completed(): run.job = "COMPLETED" else: run.job = "RUNNING: %s" % current_workflow.id for job in current_workflow.modules.itervalues(): if not job.finished: run.job += "\n %s %s %s" % (job.start, job.name, job.description()) print run.job return result