示例#1
0
def schedule_task(schedule_class, run_settings, local_settings):
    schedule_class.nodes = get_registered_vms(local_settings, node_type='bootstrapped_nodes')
    try:
        maximum_retry = getval(run_settings, '%s/input/reliability/maximum_retry' % RMIT_SCHEMA)
    except SettingNotFoundException:
        maximum_retry = 0
    local_settings['maximum_retry'] = maximum_retry
    if schedule_class.procs_2b_rescheduled:
        start_reschedule(schedule_class, run_settings, local_settings)
    else:
        start_schedule(schedule_class, run_settings, local_settings)
def complete_bootstrap(bootstrap_class, local_settings):
    try:
        nodes = get_registered_vms(local_settings)
        running_created_nodes = [
            x for x in bootstrap_class.created_nodes if str(x[3]) == 'running'
        ]
        if len(nodes) < len(running_created_nodes):
            raise VMTerminatedError
    except NoRegisteredVMError as e:
        logger.debug('NoRegisteredVMError detected')
        ftmanager = FTManager()
        ftmanager.manage_failure(e,
                                 stage_class=bootstrap_class,
                                 settings=local_settings)
    except VMTerminatedError as e:
        logger.debug('VMTerminatedError detected')
        ftmanager = FTManager()
        ftmanager.manage_failure(e,
                                 stage_class=bootstrap_class,
                                 settings=local_settings)
    for node in nodes:
        node_ip = node.ip_address
        if not node_ip:
            node_ip = node.private_ip_address
        if (node_ip in [
                x[1] for x in bootstrap_class.bootstrapped_nodes
                if x[1] == node_ip
        ]):
            continue
        relative_path_suffix = bootstrap_class.get_relative_output_path(
            local_settings)
        relative_path = "%s@%s" % (local_settings['type'],
                                   relative_path_suffix)
        destination = get_url_with_credentials(local_settings,
                                               relative_path,
                                               is_relative_path=True,
                                               ip_address=node_ip)
        logger.debug("Relative path %s" % relative_path)
        logger.debug("Destination %s" % destination)
        try:
            fin = _is_bootstrap_complete(node_ip, local_settings, destination)
        except IOError, e:
            logger.error(e)
            fin = False
        except Exception as e:
            logger.error(e)
            fin = False
            ftmanager = FTManager()
            ftmanager.manage_failure(e,
                                     stage_class=bootstrap_class,
                                     vm_ip=node_ip,
                                     vm_id=node.id,
                                     settings=local_settings)
示例#3
0
 def _manage_vm_terminated_error(self, kwargs):
     try:
         self.stage_class = kwargs['stage_class']
         running_vms = get_registered_vms(kwargs['settings'])
         running_vms_id = []
         for vm in running_vms:
             running_vms_id.append(vm.id)
         created_vms = self.stage_class.created_nodes
         for vm in created_vms:
             if str(vm[0]) not in running_vms_id:
                 vm[3] = 'failed'
     except KeyError as e:
         logger.debug('key_error = %s' % e)
示例#4
0
 def _manage_vm_terminated_error(self, kwargs):
     try:
         self.stage_class = kwargs['stage_class']
         running_vms = get_registered_vms(kwargs['settings'])
         running_vms_id = []
         for vm in running_vms:
             running_vms_id.append(vm.id)
         created_vms = self.stage_class.created_nodes
         for vm in created_vms:
             if str(vm[0]) not in running_vms_id:
                 vm[3] = 'failed'
     except KeyError as e:
         logger.debug('key_error = %s' % e)
def complete_bootstrap(bootstrap_class, local_settings, id):
    logger.debug("complete_bootstrap")
    try:
        nodes = get_registered_vms(local_settings)
        running_created_nodes = [x for x in bootstrap_class.created_nodes if str(x[3]) == 'running']
        logger.debug("running_created_nodes=%s" % running_created_nodes)
        if len(nodes) < len(running_created_nodes):
            raise VMTerminatedError
    except NoRegisteredVMError as e:
        logger.debug('NoRegisteredVMError detected')
        ftmanager = FTManager()
        ftmanager.manage_failure(e, stage_class=bootstrap_class,  settings=local_settings)
    except VMTerminatedError as e:
        logger.debug('VMTerminatedError detected')
        ftmanager = FTManager()
        ftmanager.manage_failure(e, stage_class=bootstrap_class,  settings=local_settings)
    logger.debug("nodes=%s" % nodes)
    for node in nodes:
        logger.debug("node=%s" % node)
        node_ip = node.ip_address
        if not node_ip:
            node_ip = node.private_ip_address
        logger.debug("node_ip=%s" % node_ip)
        logger.debug("bootstrap_class.bootstrapped_nodes=%s" % bootstrap_class.bootstrapped_nodes)
        node_list = [x[1] for x in bootstrap_class.bootstrapped_nodes if x[1] == node_ip]
        logger.debug("node_list=%s" % node_list)
        if (node_ip in node_list):
            continue
        relative_path_suffix = bootstrap_class.get_relative_output_path(local_settings)
        logger.debug("relative_path_suffix=%s" % relative_path_suffix)
        relative_path = "%s@%s" % (local_settings['type'],
            relative_path_suffix)
        destination = get_url_with_credentials(local_settings,
            relative_path,
            is_relative_path=True,
            ip_address=node_ip)
        logger.debug("Relative path %s" % relative_path)
        logger.debug("Destination %s" % destination)
        try:
            fin = _is_bootstrap_complete(node_ip, local_settings, destination)
        except IOError, e:
            logger.error(e)
            fin = False
        except Exception as e:
            logger.error(e)
            fin = False
            ftmanager = FTManager()
            ftmanager.manage_failure(e, stage_class=bootstrap_class, vm_ip=node_ip,
                                     vm_id=node.id, settings=local_settings)
def start_multi_bootstrap_task(settings, relative_path_suffix):
    """
    Run the package on each of the nodes in the group and grab
    any output as needed
    """
    nodes = get_registered_vms(settings)
    logger.debug("nodes=%s" % nodes)
    requested_nodes = 0
    maketarget_nodegroup_pair = {}

    # TODO: need testcases for following code
    if not maketarget_nodegroup_pair:
        EMPTY_MAKE_TARGET = ''
        requested_nodes = len(nodes)
        maketarget_nodegroup_pair[EMPTY_MAKE_TARGET] = requested_nodes
    else:
        for i in maketarget_nodegroup_pair.keys():
            requested_nodes += maketarget_nodegroup_pair[i]
        if requested_nodes > len(nodes):
            message = "Requested nodes %d; but available nodes %s " \
                % (requested_nodes, len(nodes))
            logger.exception(message)
            raise InsufficientResourceError(message)
    logger.info("Requested nodes %d: \nAvailable nodes %s " %
                (requested_nodes, len(nodes)))

    logger.debug('starting setup')
    for make_target in maketarget_nodegroup_pair:
        for i in range(0, maketarget_nodegroup_pair[make_target]):
            instance = nodes[0]
            node_ip = instance.ip_address
            if not node_ip:
                node_ip = instance.private_ip_address
            logger.debug("node_ip=%s" % node_ip)
            logger.debug('constructing source')
            source = get_url_with_credentials(settings,
                                              settings['payload_source'])
            logger.debug('source=%s' % source)
            #relative_path = '%s@%s' % (settings['type'], settings['payload_destination'])
            relative_path = '%s@%s' % (settings['type'], relative_path_suffix)
            destination = get_url_with_credentials(settings,
                                                   relative_path,
                                                   is_relative_path=True,
                                                   ip_address=node_ip)
            logger.debug("Source %s" % source)
            logger.debug("Destination %s" % destination)
            logger.debug("Relative path %s" % relative_path)
            _start_bootstrap(instance, node_ip, settings, source, destination)
            nodes.pop(0)
def start_multi_bootstrap_task(settings, relative_path_suffix):
    """
    Run the package on each of the nodes in the group and grab
    any output as needed
    """
    nodes = get_registered_vms(settings)
    logger.debug("nodes=%s" % nodes)
    requested_nodes = 0
    maketarget_nodegroup_pair = {}

    # TODO: need testcases for following code
    if not maketarget_nodegroup_pair:
        EMPTY_MAKE_TARGET = ''
        requested_nodes = len(nodes)
        maketarget_nodegroup_pair[EMPTY_MAKE_TARGET] = requested_nodes
    else:
        for i in maketarget_nodegroup_pair.keys():
            requested_nodes += maketarget_nodegroup_pair[i]
        if requested_nodes > len(nodes):
            message = "Requested nodes %d; but available nodes %s " \
                % (requested_nodes, len(nodes))
            logger.exception(message)
            raise InsufficientResourceError(message)
    logger.info("Requested nodes %d: \nAvailable nodes %s "
           % (requested_nodes, len(nodes)))

    logger.debug('starting setup')
    for make_target in maketarget_nodegroup_pair:
        for i in range(0, maketarget_nodegroup_pair[make_target]):
            instance = nodes[0]
            node_ip = instance.ip_address
            if not node_ip:
                node_ip = instance.private_ip_address
            logger.debug("node_ip=%s" % node_ip)
            logger.debug('constructing source')
            source = get_url_with_credentials(settings, "/" + settings['payload_source'])
            logger.debug('source=%s' % source)
            #relative_path = '%s@%s' % (settings['type'], settings['payload_destination'])
            relative_path = '%s@%s' % (settings['type'], relative_path_suffix)
            destination = get_url_with_credentials(settings, relative_path,
                                                 is_relative_path=True,
                                                 ip_address=node_ip)
            logger.debug("Source %s" % source)
            logger.debug("Destination %s" % destination)
            logger.debug("Relative path %s" % relative_path)
            _start_bootstrap(instance, node_ip, settings, source, destination)
            nodes.pop(0)
示例#8
0
def schedule_task(schedule_class, run_settings, local_settings):
    schedule_class.nodes = get_registered_vms(local_settings, node_type='bootstrapped_nodes')
    try:
        maximum_retry = getval(run_settings, '%s/input/reliability/maximum_retry' % RMIT_SCHEMA)
    except SettingNotFoundException:
        maximum_retry = 0
    local_settings['maximum_retry'] = maximum_retry
    try:
        id = int(getval(run_settings, '%s/system/id' % RMIT_SCHEMA))
    except (SettingNotFoundException, ValueError):
        id = 0
    if schedule_class.procs_2b_rescheduled:
        messages.info(run_settings, '%d: rescheduling failed processes' % (id))
        start_reschedule(schedule_class, run_settings, local_settings)
    else:
        messages.info(run_settings, '%d: scheduling processes' % id)
        start_schedule(schedule_class, run_settings, local_settings)
示例#9
0
def schedule_task(schedule_class, run_settings, local_settings):
    schedule_class.nodes = get_registered_vms(local_settings,
                                              node_type='bootstrapped_nodes')
    try:
        maximum_retry = getval(
            run_settings, '%s/input/reliability/maximum_retry' % RMIT_SCHEMA)
    except SettingNotFoundException:
        maximum_retry = 0
    local_settings['maximum_retry'] = maximum_retry
    try:
        id = int(getval(run_settings, '%s/system/id' % RMIT_SCHEMA))
    except (SettingNotFoundException, ValueError):
        id = 0
    if schedule_class.procs_2b_rescheduled:
        messages.info(run_settings, '%d: rescheduling failed processes' % (id))
        start_reschedule(schedule_class, run_settings, local_settings)
    else:
        messages.info(run_settings, '%d: scheduling processes' % id)
        start_schedule(schedule_class, run_settings, local_settings)
示例#10
0
def complete_schedule(schedule_class, local_settings):
    logger.debug("started")
    schedule_class.nodes = get_registered_vms(local_settings, node_type='bootstrapped_nodes')
    for node in schedule_class.nodes:
        node_ip = node.ip_address
        logger.debug("node_ip=%s" % node_ip)
        if not node_ip:
            node_ip = node.private_ip_address
        if (node_ip in [x[1]
                                for x in schedule_class.scheduled_nodes
                                if x[1] == node_ip]) \
            and (not schedule_class.procs_2b_rescheduled):
            logger.debug("skip1")
            continue
        if (node_ip in [x[1]
                                for x in schedule_class.rescheduled_nodes
                                if x[1] == node_ip]) \
            and schedule_class.procs_2b_rescheduled:
            logger.debug("skip2")

            continue
        if not is_vm_running(node):
            # An unlikely situation where the node crashed after is was
            # detected as registered.
            #FIXME: should error nodes be counted as finished?
            #FIXME: remove this instance from created_nodes
            logger.error('Instance %s not running' % node.id)
            #self.error_nodes.append((node.id, node_ip,
            #                        unicode(node.region)))
            logger.debug("skip3")
            continue

        logger.debug('mynode=%s' % node_ip)
        try:
            #relative_path = "%s@%s" % (local_settings['type'],
            #    local_settings['payload_destination'])
            relative_path = "%s@%s" % (local_settings['type'],
                schedule_class.get_relative_output_path(local_settings))
            destination = get_url_with_credentials(
                local_settings,
                relative_path,
                is_relative_path=True,
                ip_address=node_ip)
        except Exception, e:
            logger.debug(e)
        logger.debug("Relative path %s" % relative_path)
        logger.debug("Destination %s" % destination)
        fin = _is_schedule_complete(node_ip, local_settings, destination)
        logger.debug("fin=%s" % fin)
        if fin:
            logger.debug("done.")
            node_list = schedule_class.scheduled_nodes
            if schedule_class.procs_2b_rescheduled:
                node_list = schedule_class.rescheduled_nodes
            if not (node_ip in [x[1]
                                            for x in node_list
                                            if x[1] == node_ip]):
                    node_list.append([node.id, node_ip,
                                                unicode(node.region), 'running'])
                    if schedule_class.procs_2b_rescheduled:
                        scheduled_procs = [x
                                           for x in schedule_class.current_processes
                                           if x['ip_address'] == node_ip
                            and x['status'] == 'reschedule_ready']
                        schedule_class.total_rescheduled_procs += len(scheduled_procs)
                        for process in scheduled_procs:
                            process['status'] = 'ready'
                        schedule_class.all_processes = update_lookup_table(
                            schedule_class.all_processes,
                            reschedule_to_ready='reschedule_to_ready')
                    else:
                        scheduled_procs = [x['ip_address']
                                           for x in schedule_class.current_processes
                                           if x['ip_address'] == node_ip]
                        schedule_class.total_scheduled_procs += len(scheduled_procs)
                        #if self.total_scheduled_procs == len(self.current_processes):
                        #    break
            else:
                    logger.info("We have already "
                        + "scheduled process on node %s" % node_ip)
        else:
            print "job still running on %s" % node_ip
示例#11
0
def complete_schedule(schedule_class, local_settings):
    logger.debug("started")
    schedule_class.nodes = get_registered_vms(local_settings,
                                              node_type='bootstrapped_nodes')
    for node in schedule_class.nodes:
        node_ip = node.ip_address
        logger.debug("node_ip=%s" % node_ip)
        if not node_ip:
            node_ip = node.private_ip_address
        if (node_ip in [x[1]
                                for x in schedule_class.scheduled_nodes
                                if x[1] == node_ip]) \
            and (not schedule_class.procs_2b_rescheduled):
            logger.debug("skip1")
            continue
        if (node_ip in [x[1]
                                for x in schedule_class.rescheduled_nodes
                                if x[1] == node_ip]) \
            and schedule_class.procs_2b_rescheduled:
            logger.debug("skip2")

            continue
        if not is_vm_running(node):
            # An unlikely situation where the node crashed after is was
            # detected as registered.
            #FIXME: should error nodes be counted as finished?
            #FIXME: remove this instance from created_nodes
            logger.error('Instance %s not running' % node.id)
            #self.error_nodes.append((node.id, node_ip,
            #                        unicode(node.region)))
            logger.debug("skip3")
            continue

        logger.debug('mynode=%s' % node_ip)
        try:
            #relative_path = "%s@%s" % (local_settings['type'],
            #    local_settings['payload_destination'])
            relative_path = "%s@%s" % (
                local_settings['type'],
                schedule_class.get_relative_output_path(local_settings))
            destination = get_url_with_credentials(local_settings,
                                                   relative_path,
                                                   is_relative_path=True,
                                                   ip_address=node_ip)
        except Exception, e:
            logger.debug(e)
        logger.debug("Relative path %s" % relative_path)
        logger.debug("Destination %s" % destination)
        fin = _is_schedule_complete(node_ip, local_settings, destination)
        logger.debug("fin=%s" % fin)
        if fin:
            logger.debug("done.")
            node_list = schedule_class.scheduled_nodes
            if schedule_class.procs_2b_rescheduled:
                node_list = schedule_class.rescheduled_nodes
            if not (node_ip in [x[1] for x in node_list if x[1] == node_ip]):
                node_list.append(
                    [node.id, node_ip,
                     unicode(node.region), 'running'])
                if schedule_class.procs_2b_rescheduled:
                    scheduled_procs = [
                        x for x in schedule_class.current_processes
                        if x['ip_address'] == node_ip
                        and x['status'] == 'reschedule_ready'
                    ]
                    schedule_class.total_rescheduled_procs += len(
                        scheduled_procs)
                    for process in scheduled_procs:
                        process['status'] = 'ready'
                    schedule_class.all_processes = update_lookup_table(
                        schedule_class.all_processes,
                        reschedule_to_ready='reschedule_to_ready')
                else:
                    scheduled_procs = [
                        x['ip_address']
                        for x in schedule_class.current_processes
                        if x['ip_address'] == node_ip
                    ]
                    schedule_class.total_scheduled_procs += len(
                        scheduled_procs)
                    #if self.total_scheduled_procs == len(self.current_processes):
                    #    break
            else:
                logger.info("We have already " +
                            "scheduled process on node %s" % node_ip)
        else:
            print "job still running on %s" % node_ip