Пример #1
0
    def compute(self):
        machine = self.get_machine()
        jm = JobMonitor.getInstance()
        cache = jm.getCache(self.signature)
        if cache:
            result = cache.parameters['result']
        else:
            if not self.has_input('local_file'):
                raise ModuleError(self, "No local file specified")
            local_file = self.get_input('local_file').strip()
            if not self.has_input('remote_file'):
                raise ModuleError(self, "No remote file specified")
            remote_file = self.get_input('remote_file').strip()
            whereto = 'remote'
            if self.has_input('to_local') and self.get_input('to_local'):
                whereto = 'local'
            ## This indicates that the coming commands submitted on the machine
            # trick to select machine without initializing every time
            command = machine.getfile if whereto=='local' else machine.sendfile
            result = command(local_file, remote_file)
            d = {'result':result}
            self.set_job_machine(d, machine)
            jm.setCache(self.signature, d, self.getName())

        self.set_output("machine", machine)
        self.set_output("output", result)
Пример #2
0
    def compute(self):
        self.is_cacheable = lambda *args, **kwargs: False
        if not self.hasInputFromPort('machine'):
            raise ModuleError(self, "No machine specified")
        machine = self.getInputFromPort('machine').machine
        if not self.hasInputFromPort('local_directory'):
            raise ModuleError(self, "No local directory specified")
        local_directory = self.getInputFromPort('local_directory').strip()
        if not self.hasInputFromPort('remote_directory'):
            raise ModuleError(self, "No remote directory specified")
        remote_directory = self.getInputFromPort('remote_directory').strip()
        whereto = 'remote'
        if self.hasInputFromPort('to_local') and self.getInputFromPort(
                'to_local'):
            whereto = 'local'

        jm = JobMonitor.getInstance()
        cache = jm.getCache(self.signature)
        if not cache:
            ## This indicates that the coming commands submitted on the machine
            # trick to select machine without initializing every time

            use_machine(machine)
            to_dir = local_directory if whereto == 'local' else remote_directory
            cdir = CreateDirectory(whereto, to_dir)
            job = TransferFiles(whereto,
                                local_directory,
                                remote_directory,
                                dependencies=[cdir])
            job.run()
            end_machine()
            cache = jm.setCache(self.signature, {'result': ''})

        self.setResult("machine", machine)
Пример #3
0
    def compute(self):
        self.is_cacheable = lambda *args, **kwargs: False
        if not self.hasInputFromPort('machine'):
            raise ModuleError(self, "No machine specified")
        machine = self.getInputFromPort('machine').machine
        if not self.hasInputFromPort('local_directory'):
            raise ModuleError(self, "No local directory specified")
        local_directory = self.getInputFromPort('local_directory').strip()
        if not self.hasInputFromPort('remote_directory'):
            raise ModuleError(self, "No remote directory specified")
        remote_directory = self.getInputFromPort('remote_directory').strip()
        whereto = 'remote'
        if self.hasInputFromPort('to_local') and self.getInputFromPort('to_local'):
            whereto = 'local'


        jm = JobMonitor.getInstance()
        cache = jm.getCache(self.signature)
        if not cache:
            ## This indicates that the coming commands submitted on the machine
            # trick to select machine without initializing every time

            use_machine(machine)
            to_dir = local_directory if whereto=='local' else remote_directory
            cdir = CreateDirectory(whereto, to_dir)
            job = TransferFiles(whereto, local_directory, remote_directory,
                              dependencies = [cdir])
            job.run()
            end_machine()
            cache = jm.setCache(self.signature, {'result':''})

        self.setResult("machine", machine)
Пример #4
0
 def compute(self):
     machine = self.get_machine()
     jm = JobMonitor.getInstance()
     id = self.signature
     job = jm.getCache(id)
     if not job:
         remote = self.get_input('Remote Location')
         local = self.get_input('Local File')
         override = self.force_get_input('Override', False)
         if '://' not in remote:
             remote = self.add_prefix(remote, machine)
         if not int(self.call_hdfs('dfs -test -e ' + remote +
                                   '; echo $?', machine)):
             if override:
                 self.call_hdfs('dfs -rm -r ' + remote, machine)
             else:
                 raise ModuleError(self, 'Remote entry already exists')
         tempfile = machine.remote.send_command('mktemp -u').strip()
         result = machine.sendfile(local.name, tempfile)
         self.call_hdfs('dfs -put %s %s' % (tempfile, remote), machine)
         result = machine.remote.rm(tempfile,force=True,recursively=True)
         d = {'remote':remote,'local':local.name}
         self.set_job_machine(d, machine)
         jm.setCache(id, d, self.getName())
         job = jm.getJob(id)
     self.set_output('Remote Location', job.parameters['remote'])
     self.set_output('Machine', machine)
Пример #5
0
    def compute(self):
        if not self.hasInputFromPort('machine'):
            raise ModuleError(self, "No machine specified")
        machine = self.getInputFromPort('machine').machine
        if not self.hasInputFromPort('local_file'):
            raise ModuleError(self, "No local file specified")
        local_file = self.getInputFromPort('local_file').strip()
        if not self.hasInputFromPort('remote_file'):
            raise ModuleError(self, "No remote file specified")
        remote_file = self.getInputFromPort('remote_file').strip()
        whereto = 'remote'
        if self.hasInputFromPort('to_local') and self.getInputFromPort('to_local'):
            whereto = 'local'

        jm = JobMonitor.getInstance()
        cache = jm.getCache(self.signature)
        if cache:
            result = cache['result']
        else:
            ## This indicates that the coming commands submitted on the machine
            # trick to select machine without initializing every time
            command = machine.getfile if whereto=='local' else machine.sendfile
            result = command(local_file, remote_file)
            cache = jm.setCache(self.signature, {'result':result})

        self.setResult("machine", self.getInputFromPort('machine'))
        self.setResult("output", result)
Пример #6
0
    def compute(self):
        if not self.hasInputFromPort('machine'):
            raise ModuleError(self, "No machine specified")
        machine = self.getInputFromPort('machine').machine
        if not self.hasInputFromPort('local_file'):
            raise ModuleError(self, "No local file specified")
        local_file = self.getInputFromPort('local_file').strip()
        if not self.hasInputFromPort('remote_file'):
            raise ModuleError(self, "No remote file specified")
        remote_file = self.getInputFromPort('remote_file').strip()
        whereto = 'remote'
        if self.hasInputFromPort('to_local') and self.getInputFromPort(
                'to_local'):
            whereto = 'local'

        jm = JobMonitor.getInstance()
        cache = jm.getCache(self.signature)
        if cache:
            result = cache['result']
        else:
            ## This indicates that the coming commands submitted on the machine
            # trick to select machine without initializing every time
            command = machine.getfile if whereto == 'local' else machine.sendfile
            result = command(local_file, remote_file)
            cache = jm.setCache(self.signature, {'result': result})

        self.setResult("machine", self.getInputFromPort('machine'))
        self.setResult("output", result)
Пример #7
0
    def compute(self):
        machine = self.get_machine()
        jm = JobMonitor.getInstance()
        cache = jm.getCache(self.signature)
        if not cache:
            if not self.has_input('local_directory'):
                raise ModuleError(self, "No local directory specified")
            local_directory = self.get_input('local_directory').strip()
            if not self.has_input('remote_directory'):
                raise ModuleError(self, "No remote directory specified")
            remote_directory = self.get_input('remote_directory').strip()
            whereto = 'remote'
            if self.has_input('to_local') and self.get_input('to_local'):
                whereto = 'local'
            use_machine(machine)
            to_dir = local_directory if whereto=='local' else remote_directory
            cdir = CreateDirectory(whereto, to_dir)
            job = TransferFiles(whereto, local_directory, remote_directory,
                              dependencies = [cdir])
            job.run()
            end_machine()
            d = {}
            self.set_job_machine(d, machine)
            cache = jm.setCache(self.signature, d, self.getName())

        self.set_output("machine", machine)
Пример #8
0
 def get_job_machine(self):
     """ Get machine info from job
     """
     jm = JobMonitor.getInstance()
     if jm.hasJob(self.getId({})):
         params = jm.getJob(self.signature).parameters
         if 'server' in params:
             return (params['server'],
                     params['port'],
                     params['username'],
                     params['password'])
Пример #9
0
 def _handle_suspended(self, obj, error):
     """ _handle_suspended(obj: VistrailsModule, error: ModuleSuspended
         ) -> None
         Report module as suspended
     """
     # update job monitor because this may be an oldStyle job
     jm = JobMonitor.getInstance()
     reg = get_module_registry()
     name = reg.get_descriptor(obj.__class__).name
     i = "%s" % self.remap_id(obj.id)
     iteration = self.log.get_iteration_from_module(obj)
     if iteration is not None:
         name = name + '/' + str(iteration)
         i = i + '/' + str(iteration)
     # add to parent list for computing the module tree later
     error.name = name
     # if signature is not set we use the module identifier
     if not error.signature:
         error.signature = obj.signature
     jm.addParent(error)
Пример #10
0
 def compute(self):
     machine = self.get_machine()
     if self.cache:
         result = self.cache.parameters['result']
     else:
         if not self.has_input('command'):
             raise ModuleError(self, "No command specified")
         command = self.get_input('command').strip()
         ## This indicates that the coming commands submitted on the machine
         # trick to select machine without initializing every time
         use_machine(machine)
         m = current_machine()
         result = m.remote.send_command(command)
         end_machine()
         jm = JobMonitor.getInstance()
         d = {'result':result}
         self.set_job_machine(d, machine)
         jm.setCache(self.signature, d, self.getName())
     self.set_output("output", result)
     self.set_output("machine", machine)
Пример #11
0
 def compute(self):
     machine = self.get_machine()
     jm = JobMonitor.getInstance()
     id = self.signature
     job = jm.getCache(id)
     if not job:
         entry_name = self.get_input('Name')
         if '://' not in entry_name:
             entry_name = self.add_prefix(entry_name, machine)
         if not int(self.call_hdfs('dfs -test -e ' + entry_name +
                                   '; echo $?', machine)):
             #self.call_hdfs('dfs -rm -r ' + entry_name, machine)
             # we are using -rmr but it is deprecated
             self.call_hdfs('dfs -rmr ' + entry_name, machine)
         d = {'entry_name':entry_name}
         self.set_job_machine(d, machine)
         jm.setCache(id, d, self.getName())
         job = jm.getCache(id)
     self.set_output('Name', job.parameters['entry_name'])
     self.set_output('Machine', machine)
Пример #12
0
 def compute(self):
     machine = self.get_machine()
     jm = JobMonitor.getInstance()
     id = self.signature
     job = jm.getCache(id)
     if not job:
         uri = self.force_get_input('HDFS File/URI')
         symlink = self.force_get_input('Symlink')
         if uri==None or symlink==None:
             raise ModuleError(self,
                             "Missing 'HDFS File/URI' or 'Symlink' values")
         if '://' not in uri:
             uri = self.add_prefix(uri, machine)
         uri += '#' + symlink
         d = {'uri':uri}
         self.set_job_machine(d, machine)
         jm.setCache(id, d, self.getName())
         job = jm.getCache(id)
     self.set_output('URI', job.parameters['uri'])
     self.set_output('Machine', machine)
Пример #13
0
    def compute(self):
        if not self.hasInputFromPort('machine'):
            raise ModuleError(self, "No machine specified")
        if not self.hasInputFromPort('command'):
            raise ModuleError(self, "No command specified")
        command = self.getInputFromPort('command').strip()
        machine = self.getInputFromPort('machine').machine

        jm = JobMonitor.getInstance()
        cache = jm.getCache(self.signature)
        if cache:
            result = cache['result']
        else:
            ## This indicates that the coming commands submitted on the machine
            # trick to select machine without initializing every time
            use_machine(machine)
            m = current_machine()
            result = m.remote.send_command(command)
            end_machine()
            cache = jm.setCache(self.signature, {'result':result})
        self.setResult("output", result)
        self.setResult("machine", self.getInputFromPort('machine'))
Пример #14
0
    def compute(self):
        if not self.hasInputFromPort('machine'):
            raise ModuleError(self, "No machine specified")
        if not self.hasInputFromPort('command'):
            raise ModuleError(self, "No command specified")
        command = self.getInputFromPort('command').strip()
        machine = self.getInputFromPort('machine').machine

        jm = JobMonitor.getInstance()
        cache = jm.getCache(self.signature)
        if cache:
            result = cache['result']
        else:
            ## This indicates that the coming commands submitted on the machine
            # trick to select machine without initializing every time
            use_machine(machine)
            m = current_machine()
            result = m.remote.send_command(command)
            end_machine()
            cache = jm.setCache(self.signature, {'result': result})
        self.setResult("output", result)
        self.setResult("machine", self.getInputFromPort('machine'))
Пример #15
0
    def compute(self):
        machine = self.get_machine()
        jm = JobMonitor.getInstance()
        id = self.signature
        job = jm.getCache(id)
        if not job:
            remote = self.get_input('Remote Location')
            local = self.get_input('Local File')
            override = self.force_get_input('Override', False)
            if '://' not in remote:
                remote = self.add_prefix(remote, machine)
            if os.path.exists(local.name):
                if override==False:
                    raise ModuleError(self, 'Output already exists')
                else:
                    if os.path.isdir(local.name):
                        shutil.rmtree(local.name)
                    else:
                        os.unlink(local.name)

            tempfile = machine.remote.send_command('mktemp -d -u').strip()
            result = self.call_hdfs('dfs -get %s %s' % (remote, tempfile), machine)
            # too slow with many files
            #res = machine.send_command("get -r %s %s" % (tempfile, local.name) )
            # tar files to increase speed
            result = machine.local.send_command('mkdir %s'%local.name)
            result = machine.sync(local.name,
                                  tempfile,
                                  mode=machine.MODE_REMOTE_LOCAL,
                                  use_tar=True)
            result = machine.remote.rm(tempfile,force=True,recursively=True)
            d = {'remote':remote,'local':local.name}
            self.set_job_machine(d, machine)
            jm.setCache(id, d, self.getName())
            job = jm.getCache(id)
        self.set_output('Local File', PathObject(job.parameters['local']))
        self.set_output('Machine', machine)
Пример #16
0
    def __init__(self, parent=None):
        QtGui.QWidget.__init__(self, parent)

        self.jobMonitor = JobMonitor.getInstance()
        self.jobMonitor.setCallback(self)
        self.timer_id = None

        self.workflowItems = {}

        self.layout = QtGui.QVBoxLayout()
#        self.layout.setContentsMargins(5, 5, 0, 0)

        buttonsLayout = QtGui.QHBoxLayout()
        #buttonsLayout.setMargin(5)
        #buttonsLayout.setSpacing(5)
        run_now = QDockPushButton("Check now")
        run_now.setToolTip("Check all jobs now")
        run_now.clicked.connect(self.timerEvent)
        buttonsLayout.addWidget(run_now)
        label = QtGui.QLabel('Refresh interval (seconds):')
        buttonsLayout.addWidget(label)

        self.interval = QtGui.QComboBox()
        for text, seconds in refresh_states:
            self.interval.addItem(text, seconds)
            self.interval.editTextChanged.connect(self.set_refresh)
        self.interval.setEditable(True)
        self.interval.setCurrentIndex(self.interval.findText('10 min'))
        self.interval.setCompleter(None)
        self.interval.setValidator(QNumberValidator())
        conf = configuration.get_vistrails_configuration()
        if conf.jobCheckInterval and conf.jobCheckInterval != 10:
            self.interval.setEditText(str(conf.jobCheckInterval))
        buttonsLayout.addWidget(self.interval)

        self.autorun = QtGui.QCheckBox("Automatic re-execution")
        self.autorun.setToolTip("Automatically re-execute workflow when jobs "
                                "complete")
        self.autorun.toggled.connect(self.autorunToggled)
        if conf.jobAutorun:
            self.autorun.setChecked(True)
        buttonsLayout.addWidget(self.autorun)

        buttonsLayout.addStretch(1)
        self.layout.addLayout(buttonsLayout)

        self.jobView = QtGui.QTreeWidget()
        self.jobView.setContentsMargins(0, 0, 0, 0)
        self.jobView.setColumnCount(2)
        self.jobView.setHeaderLabels(['Job', 'Message'])
        self.jobView.header().setResizeMode(0, QtGui.QHeaderView.ResizeToContents)
        self.jobView.header().setResizeMode(1, QtGui.QHeaderView.Stretch)
        self.jobView.setExpandsOnDoubleClick(False)
        self.connect(self.jobView,
                     QtCore.SIGNAL('itemDoubleClicked(QTreeWidgetItem *, int)'),
                     self.item_selected)
        self.layout.addWidget(self.jobView)

        self.setLayout(self.layout)
        self.setWindowTitle('Running Jobs')
        self.resize(QtCore.QSize(800, 600))
        self.updating_now = False
Пример #17
0
def run_and_get_results(w_list, parameters='', output_dir=None, 
                        update_vistrail=True, extra_info=None, 
                        reason='Console Mode Execution'):
    """run_and_get_results(w_list: list of (locator, version), parameters: str,
                           output_dir:str, update_vistrail: boolean,
                           extra_info:dict)
    Run all workflows in w_list, and returns an interpreter result object.
    version can be a tag name or a version id.
    
    """
    elements = parameters.split("$&$")
    aliases = {}
    params = []
    result = []
    for locator, workflow in w_list:
        (v, abstractions , thumbnails, mashups)  = load_vistrail(locator)
        controller = VistrailController(v, locator, abstractions, thumbnails, 
                                        mashups, auto_save=update_vistrail)
        if isinstance(workflow, basestring):
            version = v.get_version_number(workflow)
        elif isinstance(workflow, (int, long)):
            version = workflow
        elif workflow is None:
            version = controller.get_latest_version_in_graph()
        else:
            msg = "Invalid version tag or number: %s" % workflow
            raise VistrailsInternalError(msg)
        controller.change_selected_version(version)
        
        for e in elements:
            pos = e.find("=")
            if pos != -1:
                key = e[:pos].strip()
                value = e[pos+1:].strip()
            
                if controller.current_pipeline.has_alias(key):
                    aliases[key] = value
                elif 'mashup_id' in extra_info:
                    # new-style mashups can have aliases not existing in pipeline
                    for mashuptrail in mashups:
                        if mashuptrail.vtVersion == version:
                            mashup = mashuptrail.getMashup(extra_info['mashup_id'])
                            c = mashup.getAliasByName(key).component
                            params.append((c.vttype, c.vtid, value))

        if output_dir is not None and controller.current_pipeline is not None:
            # FIXME DAK: why is this always done?!? there is a flag for it...
            if is_running_gui():
                controller.updatePipelineScene()
                base_fname = "%s_%s_pipeline.pdf" % (locator.short_filename, version)
                filename = os.path.join(output_dir, base_fname)
                controller.current_pipeline_scene.saveToPDF(filename)
            else:
                debug.critical("Cannot save pipeline figure when not "
                               "running in gui mode")
            base_fname = "%s_%s_pipeline.xml" % (locator.short_filename, version)
            filename = os.path.join(output_dir, base_fname)
            vistrails.core.db.io.save_workflow(controller.current_pipeline, filename)
        if not update_vistrail:
            conf = get_vistrails_configuration()
            if conf.has('thumbs'):
                conf.thumbs.autoSave = False
        
        jobMonitor = JobMonitor.getInstance()
        current_workflow = jobMonitor.currentWorkflow()
        if not current_workflow:
            for job in jobMonitor._running_workflows.itervalues():
                try:
                    job_version = int(job.version)
                except ValueError:
                    job_version =  v.get_version_number(job.version)
                if version == job_version and locator.to_url() == job.vistrail:
                    current_workflow = job
                    jobMonitor.startWorkflow(job)
            if not current_workflow:
                current_workflow = JobWorkflow(locator.to_url(), version)
                jobMonitor.getInstance().startWorkflow(current_workflow)

        try:
            (results, _) = \
            controller.execute_current_workflow(custom_aliases=aliases,
                                                custom_params=params,
                                                extra_info=extra_info,
                                                reason=reason)
        finally:
            jobMonitor.finishWorkflow()
        new_version = controller.current_version
        if new_version != version:
            debug.log("Version '%s' (%s) was upgraded. The actual "
                      "version executed was %s" % (
                      workflow, version, new_version))
        run = results[0]
        run.workflow_info = (locator.name, new_version)
        run.pipeline = controller.current_pipeline

        if update_vistrail:
            controller.write_vistrail(locator)
        result.append(run)
        if current_workflow.modules:
            if current_workflow.completed():
                run.job = "COMPLETED"
            else:
                run.job = "RUNNING: %s" % current_workflow.id
                for job in current_workflow.modules.itervalues():
                    if not job.finished:
                        run.job += "\n  %s %s %s" % (job.start, job.name, job.description())
            print run.job
    return result