示例#1
0
    def set_execute_settings(self, run_settings, local_settings):
        self.set_domain_settings(run_settings, local_settings)
        update(local_settings, run_settings,
               '%s/stages/setup/payload_destination' % django_settings.SCHEMA_PREFIX,
               '%s/stages/setup/filename_for_PIDs' % django_settings.SCHEMA_PREFIX,
               '%s/stages/setup/process_output_dirname' % django_settings.SCHEMA_PREFIX,
               '%s/stages/setup/smart_connector_input' % django_settings.SCHEMA_PREFIX,
               '%s/system/contextid' % django_settings.SCHEMA_PREFIX,
               '%s/system/random_numbers' % django_settings.SCHEMA_PREFIX,
               '%s/system/id' % django_settings.SCHEMA_PREFIX

               )
        try:
            local_settings['curate_data'] = getval(run_settings,
                                                   '%s/input/mytardis/curate_data' % django_settings.SCHEMA_PREFIX)
        except SettingNotFoundException:
            local_settings['curate_data'] = 0
        local_settings['bdp_username'] = getval(run_settings,
                                                '%s/bdp_userprofile/username' % django_settings.SCHEMA_PREFIX)
        if '%s/input/system/compplatform/hadoop' % django_settings.SCHEMA_PREFIX in run_settings.keys():
            from chiminey.platform import get_platform_settings
            platform_url = run_settings['%s/platform/computation' % django_settings.SCHEMA_PREFIX]['platform_url']
            pltf_settings = get_platform_settings(platform_url, local_settings['bdp_username'])
            local_settings['root_path'] = '/home/%s' % pltf_settings['username']
            local_settings['hadoop_home_path'] = pltf_settings['hadoop_home_path']
            logger.debug('root_path=%s' % local_settings['root_path'])
        else:
            logger.debug('root_path not found')
示例#2
0
 def set_schedule_settings(self, run_settings, local_settings):
     super(HadoopStrategy,
           self).set_schedule_settings(run_settings, local_settings)
     platform_url = run_settings['%s/platform/computation' %
                                 RMIT_SCHEMA]['platform_url']
     local_settings['root_path'] = '/home/%s' % (get_platform_settings(
         platform_url, local_settings['bdp_username'])['username'])
     logger.debug('out=%s' % local_settings)
示例#3
0
 def set_execute_settings(self, run_settings, local_settings):
     self.set_domain_settings(run_settings, local_settings)
     update(
         local_settings, run_settings,
         '%s/stages/setup/payload_destination' %
         django_settings.SCHEMA_PREFIX,
         '%s/stages/setup/filename_for_PIDs' %
         django_settings.SCHEMA_PREFIX,
         '%s/stages/setup/process_output_dirname' %
         django_settings.SCHEMA_PREFIX,
         '%s/stages/setup/smart_connector_input' %
         django_settings.SCHEMA_PREFIX,
         '%s/system/contextid' % django_settings.SCHEMA_PREFIX,
         '%s/system/random_numbers' % django_settings.SCHEMA_PREFIX,
         '%s/system/id' % django_settings.SCHEMA_PREFIX)
     try:
         local_settings['curate_data'] = getval(
             run_settings, '%s/input/mytardis/curate_data' %
             django_settings.SCHEMA_PREFIX)
     except SettingNotFoundException:
         local_settings['curate_data'] = 0
     local_settings['bdp_username'] = getval(
         run_settings,
         '%s/bdp_userprofile/username' % django_settings.SCHEMA_PREFIX)
     if '%s/input/system/compplatform/hadoop' % django_settings.SCHEMA_PREFIX in run_settings.keys(
     ):
         from chiminey.platform import get_platform_settings
         platform_url = run_settings[
             '%s/platform/computation' %
             django_settings.SCHEMA_PREFIX]['platform_url']
         pltf_settings = get_platform_settings(
             platform_url, local_settings['bdp_username'])
         local_settings[
             'root_path'] = '/home/%s' % pltf_settings['username']
         local_settings['hadoop_home_path'] = pltf_settings[
             'hadoop_home_path']
         logger.debug('root_path=%s' % local_settings['root_path'])
     else:
         logger.debug('root_path not found')
示例#4
0
 def set_schedule_settings(self, run_settings, local_settings):
     super(HadoopStrategy, self).set_schedule_settings(run_settings, local_settings)
     platform_url = run_settings['%s/platform/computation' % RMIT_SCHEMA]['platform_url']
     local_settings['root_path'] = '/home/%s' % (get_platform_settings(
         platform_url, local_settings['bdp_username'])['username'])
     logger.debug('out=%s' % local_settings)
示例#5
0
文件: wait.py 项目: chiminey/chiminey
    def process(self, run_settings):
        """
            Check all registered nodes to find whether
            they are running, stopped or in error_nodes
        """

        local_settings = getvals(run_settings, models.UserProfile.PROFILE_SCHEMA_NS)
        # local_settings = run_settings[models.UserProfile.PROFILE_SCHEMA_NS]
        retrieve_local_settings(run_settings, local_settings)
        logger.debug("local_settings=%s" % local_settings)

        self.contextid = getval(run_settings, '%s/system/contextid' % django_settings.SCHEMA_PREFIX)
        output_storage_url = getval(run_settings, '%s/platform/storage/output/platform_url' % django_settings.SCHEMA_PREFIX)
        output_storage_settings = get_platform_settings(output_storage_url, local_settings['bdp_username'])
        # FIXME: Need to be consistent with how we handle settings here.  Prob combine all into
        # single local_settings for simplicity.
        output_storage_settings['bdp_username'] = local_settings['bdp_username']
        offset = getval(run_settings, '%s/platform/storage/output/offset' % django_settings.SCHEMA_PREFIX)
        self.job_dir = get_job_dir(output_storage_settings, offset)

        try:
            self.finished_nodes = getval(run_settings, '%s/stages/run/finished_nodes' % django_settings.SCHEMA_PREFIX)
        except SettingNotFoundException:
            self.finished_nodes = '[]'

        try:
            self.id = int(getval(run_settings, '%s/system/id' % django_settings.SCHEMA_PREFIX))
            self.output_dir = "output_%s" % self.id
        except (SettingNotFoundException, ValueError):
            self.id = 0
            self.output_dir = "output"

        logger.debug("output_dir=%s" % self.output_dir)
        logger.debug("run_settings=%s" % run_settings)
        logger.debug("Wait stage process began")

        #processes = self.executed_procs
        processes = [x for x in self.current_processes if x['status'] == 'running']
        self.error_nodes = []
        # TODO: parse finished_nodes input
        logger.debug('self.finished_nodes=%s' % self.finished_nodes)
        self.finished_nodes = ast.literal_eval(self.finished_nodes)

        computation_platform_url = getval(run_settings, '%s/platform/computation/platform_url' % django_settings.SCHEMA_PREFIX)
        comp_pltf_settings = get_platform_settings(computation_platform_url, local_settings['bdp_username'])
        local_settings.update(comp_pltf_settings)
        comp_pltf_settings['bdp_username'] = local_settings['bdp_username']

        wait_strategy = strategies.SynchronousWaitStrategy()
        try:
            payload_source = getval(run_settings, '%s/stages/setup/payload_source' % django_settings.SCHEMA_PREFIX)
            if payload_source:
                wait_strategy = strategies.AsynchronousWaitStrategy()
        except SettingNotFoundException:
            pass

        for process in processes:
            #instance_id = node.id
            ip_address = process['ip_address']
            process_id = process['id']
            retry_left = process['retry_left']
            #ip = botocloudconnector.get_instance_ip(instance_id, self.boto_settings)
            #ssh = open_connection(ip_address=ip, settings=self.boto_settings)
            #if not botocloudconnector.is_vm_running(node):
                # An unlikely situation where the node crashed after is was
                # detected as registered.
                #FIXME: should error nodes be counted as finished?
            #    logging.error('Instance %s not running' % instance_id)
            #    self.error_nodes.append(node)
            #    continue
            relative_path_suffix = self.get_relative_output_path(local_settings)
            fin = wait_strategy.is_job_finished( self,
                ip_address, process_id, retry_left,
                local_settings, relative_path_suffix)
            logger.debug("fin=%s" % fin)
            if fin:
                logger.debug("done. output is available")
                logger.debug("node=%s" % str(process))
                logger.debug("finished_nodes=%s" % self.finished_nodes)
                #FIXME: for multiple nodes, if one finishes before the other then
                #its output will be retrieved, but it may again when the other node fails, because
                #we cannot tell whether we have prevous retrieved this output before and finished_nodes
                # is not maintained between triggerings...
                if not (int(process_id) in [int(x['id'])
                                            for x in self.finished_nodes
                                            if int(process_id) == int(x['id'])]):
                    self.get_output(ip_address, process_id, self.output_dir,
                                    local_settings, comp_pltf_settings,
                                    output_storage_settings, run_settings)

                    audit_url = get_url_with_credentials(
                        comp_pltf_settings, os.path.join(
                            self.output_dir, process_id, "audit.txt"),
                        is_relative_path=True)
                    fsys = storage.get_filesystem(audit_url)
                    logger.debug("Audit file url %s" % audit_url)
                    if fsys.exists(audit_url):
                        fsys.delete(audit_url)
                    self.finished_nodes.append(process)
                    logger.debug('finished_processes=%s' % self.finished_nodes)
                    for iterator, p in enumerate(self.all_processes):
                        if int(p['id']) == int(process_id) and p['status'] == 'running':
                            self.all_processes[iterator]['status'] = 'completed'
                    for iterator, p in enumerate(self.executed_procs):
                        if int(p['id']) == int(process_id) and p['status'] == 'running':
                            self.executed_procs[iterator]['status'] = 'completed'
                    for iterator, p in enumerate(self.current_processes):
                        if int(p['id']) == int(process_id) and p['status'] == 'running':
                            self.current_processes[iterator]['status'] = 'completed'
                else:
                    logger.warn("We have already "
                        + "processed output of %s on node %s" % (process_id, ip_address))
            else:
                print "job %s at %s not completed" % (process_id, ip_address)
            failed_processes = [x for x in self.current_processes if x['status'] == 'failed']
            logger.debug('failed_processes=%s' % failed_processes)
            logger.debug('failed_processes=%d' % len(failed_processes))
            messages.info(run_settings, "%d: Waiting %d processes (%d completed, %d failed) " % (
                self.id + 1, len(self.current_processes),  len(self.finished_nodes),
                len(failed_processes)))
示例#6
0
文件: wait.py 项目: silky/chiminey
    def process(self, run_settings):
        """
            Check all registered nodes to find whether
            they are running, stopped or in error_nodes
        """

        local_settings = getvals(run_settings, models.UserProfile.PROFILE_SCHEMA_NS)
        # local_settings = run_settings[models.UserProfile.PROFILE_SCHEMA_NS]
        retrieve_local_settings(run_settings, local_settings)
        logger.debug("local_settings=%s" % local_settings)

        self.contextid = getval(run_settings, '%s/system/contextid' % RMIT_SCHEMA)
        output_storage_url = getval(run_settings, '%s/platform/storage/output/platform_url' % RMIT_SCHEMA)
        output_storage_settings = get_platform_settings(output_storage_url, local_settings['bdp_username'])
        # FIXME: Need to be consistent with how we handle settings here.  Prob combine all into
        # single local_settings for simplicity.
        output_storage_settings['bdp_username'] = local_settings['bdp_username']
        offset = getval(run_settings, '%s/platform/storage/output/offset' % RMIT_SCHEMA)
        self.job_dir = get_job_dir(output_storage_settings, offset)

        try:
            self.finished_nodes = getval(run_settings, '%s/stages/run/finished_nodes' % RMIT_SCHEMA)
            # self.finished_nodes = smartconnectorscheduler.get_existing_key(run_settings,
            #     'http://rmit.edu.au/schemas/stages/run/finished_nodes')
        except SettingNotFoundException:
            self.finished_nodes = '[]'

        try:
            self.id = int(getval(run_settings, '%s/system/id' % RMIT_SCHEMA))
            # self.id = int(smartconnectorscheduler.get_existing_key(run_settings,
            #     'http://rmit.edu.au/schemas/system/id'))

            self.output_dir = "output_%s" % self.id
        except (SettingNotFoundException, ValueError):
            self.id = 0
            self.output_dir = "output"

        logger.debug("output_dir=%s" % self.output_dir)
        logger.debug("run_settings=%s" % run_settings)
        logger.debug("Wait stage process began")

        #processes = self.executed_procs
        processes = [x for x in self.current_processes if x['status'] == 'running']
        self.error_nodes = []
        # TODO: parse finished_nodes input
        logger.debug('self.finished_nodes=%s' % self.finished_nodes)
        self.finished_nodes = ast.literal_eval(self.finished_nodes)

        computation_platform_url = getval(run_settings, '%s/platform/computation/platform_url' % RMIT_SCHEMA)
        comp_pltf_settings = get_platform_settings(computation_platform_url, local_settings['bdp_username'])
        local_settings.update(comp_pltf_settings)
        comp_pltf_settings['bdp_username'] = local_settings['bdp_username']

        wait_strategy = strategies.SynchronousWaitStrategy()
        try:
            synchronous_wait = getval(run_settings, '%s/stages/wait/synchronous' % RMIT_SCHEMA)
            if not synchronous_wait:
                wait_strategy = strategies.AsynchronousWaitStrategy()
        except SettingNotFoundException:
            pass

        for process in processes:
            #instance_id = node.id
            ip_address = process['ip_address']
            process_id = process['id']
            retry_left = process['retry_left']
            #ip = botocloudconnector.get_instance_ip(instance_id, self.boto_settings)
            #ssh = open_connection(ip_address=ip, settings=self.boto_settings)
            #if not botocloudconnector.is_vm_running(node):
                # An unlikely situation where the node crashed after is was
                # detected as registered.
                #FIXME: should error nodes be counted as finished?
            #    logging.error('Instance %s not running' % instance_id)
            #    self.error_nodes.append(node)
            #    continue
            relative_path_suffix = self.get_relative_output_path(local_settings)
            fin = wait_strategy.is_job_finished( self,
                ip_address, process_id, retry_left,
                local_settings, relative_path_suffix)
            logger.debug("fin=%s" % fin)
            if fin:
                logger.debug("done. output is available")
                logger.debug("node=%s" % str(process))
                logger.debug("finished_nodes=%s" % self.finished_nodes)
                #FIXME: for multiple nodes, if one finishes before the other then
                #its output will be retrieved, but it may again when the other node fails, because
                #we cannot tell whether we have prevous retrieved this output before and finished_nodes
                # is not maintained between triggerings...
                if not (int(process_id) in [int(x['id'])
                                            for x in self.finished_nodes
                                            if int(process_id) == int(x['id'])]):
                    self.get_output(ip_address, process_id, self.output_dir,
                                    local_settings, comp_pltf_settings,
                                    output_storage_settings, run_settings)

                    audit_url = get_url_with_credentials(
                        comp_pltf_settings, os.path.join(
                            self.output_dir, process_id, "audit.txt"),
                        is_relative_path=True)
                    fsys = storage.get_filesystem(audit_url)
                    logger.debug("Audit file url %s" % audit_url)
                    if fsys.exists(audit_url):
                        fsys.delete(audit_url)
                    self.finished_nodes.append(process)
                    logger.debug('finished_processes=%s' % self.finished_nodes)
                    for iterator, p in enumerate(self.all_processes):
                        if int(p['id']) == int(process_id) and p['status'] == 'running':
                            self.all_processes[iterator]['status'] = 'completed'
                    for iterator, p in enumerate(self.executed_procs):
                        if int(p['id']) == int(process_id) and p['status'] == 'running':
                            self.executed_procs[iterator]['status'] = 'completed'
                    for iterator, p in enumerate(self.current_processes):
                        if int(p['id']) == int(process_id) and p['status'] == 'running':
                            self.current_processes[iterator]['status'] = 'completed'
                else:
                    logger.warn("We have already "
                        + "processed output of %s on node %s" % (process_id, ip_address))
            else:
                print "job %s at %s not completed" % (process_id, ip_address)
            failed_processes = [x for x in self.current_processes if x['status'] == 'failed']
            logger.debug('failed_processes=%s' % failed_processes)
            logger.debug('failed_processes=%d' % len(failed_processes))
            messages.info(run_settings, "%d: waiting %d processes (%d completed, %d failed) " % (
                self.id + 1, len(self.current_processes),  len(self.finished_nodes),
                len(failed_processes)))
示例#7
0
 def get_platform_settings(self, run_settings, namespace_prefix):
     bdp_username = run_settings['%s/bdp_userprofile' %
                                 django_settings.SCHEMA_PREFIX]['username']
     platform_url = run_settings[namespace_prefix]['platform_url']
     return get_platform_settings(platform_url, bdp_username)
示例#8
0
 def get_platform_settings(self, run_settings, namespace_prefix):
     bdp_username = run_settings['http://rmit.edu.au/schemas/bdp_userprofile']['username']
     platform_url = run_settings[namespace_prefix]['platform_url']
     return get_platform_settings(platform_url, bdp_username)
示例#9
0
 def get_platform_settings(self, run_settings, namespace_prefix):
     bdp_username = run_settings['%s/bdp_userprofile' % django_settings.SCHEMA_PREFIX]['username']
     platform_url = run_settings[namespace_prefix]['platform_url']
     return get_platform_settings(platform_url, bdp_username)