Пример #1
0
def default_remove_vessels(overlord, vessel_handlers):
    overlord.logger.debug('Checking for stopped vessels')

    # Remove any stopped vessels.
    stopped_vessels = []

    for vessel in vessel_handlers:
        try:
            vessel_status = explib.get_vessel_status(
                vessel, overlord.config['identity'])
        except:
            stopped_vessels.append(vessel)
        else:
            if vessel_status != explib.VESSEL_STATUS_STARTED:
                stopped_vessels.append(vessel)

    if len(stopped_vessels) > 0:
        overlord.logger.info('Releasing ' + str(len(stopped_vessels)) +
                             ' stopped vessels')
        overlord.release_vessels(stopped_vessels)

        # Remove released vessels from the list.
        vessel_handlers = overlord.list_difference(vessel_handlers,
                                                   stopped_vessels)

    # Log the current number of running vessels.
    overlord.logger.info('Currently have ' + str(len(vessel_handlers)) +
                         ' running vessels')

    return vessel_handlers
Пример #2
0
def default_remove_vessels(overlord, vessel_handlers):
    overlord.logger.debug("Checking for stopped vessels")

    # Remove any stopped vessels.
    stopped_vessels = []

    for vessel in vessel_handlers:
        try:
            vessel_status = explib.get_vessel_status(vessel, overlord.config["identity"])
        except:
            stopped_vessels.append(vessel)
        else:
            if vessel_status != explib.VESSEL_STATUS_STARTED:
                stopped_vessels.append(vessel)

    if len(stopped_vessels) > 0:
        overlord.logger.info("Releasing " + str(len(stopped_vessels)) + " stopped vessels")
        overlord.release_vessels(stopped_vessels)

        # Remove released vessels from the list.
        vessel_handlers = overlord.list_difference(vessel_handlers, stopped_vessels)

    # Log the current number of running vessels.
    overlord.logger.info("Currently have " + str(len(vessel_handlers)) + " running vessels")

    return vessel_handlers
def _check_vessel_status_change(vesselhandle, monitordict):
  """
  Checks the status of an individual vessel and calls the registered
  callback function for the monitor if the vessel's status has changed since
  the last time it was checked.
  """
  try:
    # When the monitor is removed/canceled, the parallelized function isn't
    # aborted and we instead just have each of these calls immediately return.
    if monitordict['canceled']:
      return
    
    datadict = monitordict['vessels'][vesselhandle]
    if 'status' not in datadict:
      datadict['status'] = ''
      
    old_data = datadict.copy()
    
    status = experimentlib.get_vessel_status(vesselhandle, monitordict['identity'])
    datadict['status'] = status
    
    # No matter where the above try block returned from, we want to see if
    # the vessel data changed and call the user's callback if it has.
    new_data = datadict.copy()
    
    # Note that by not letting the lock go before we call the user's callback
    # function, the processing of all of the vessels will slow down but we
    # avoid requiring the user to handle locking to protect against another
    # call to the callback for the same vessel.
    if old_data['status'] != new_data['status']:
      try:
        # TODO: make sure that exception's from the user's code end up
        # somewhere where the user has access to them. For now, we leave it to
        # the user to make sure they handle exceptions rather than let them
        # escape their callback and this is documented in the docstring of
        # the function register_vessel_status_monitor.
        monitordict['callback'](vesselhandle, old_data['status'], new_data['status'])
      
      except Exception:
        _debug_print("Exception occurred in vessel status change callback:")
        _debug_print(traceback.format_exc())
  
    # In order to prevent repeating failures, we remove the vesselhandle
    # from the monitor's list if the status indicates a positive response.
    # This means that scripts should occasionally add their known active
    # vessels to the monitor to prevent temporary failures from causing the
    # vessel to be subsequently ignored forever.
    if status in experimentlib.VESSEL_STATUS_SET_INACTIVE:
      _monitor_lock.acquire()
      try:
        monitordict['vesselhandle_list'].remove(vesselhandle)
        # We don't "del monitordict['vessels'][vesselhandle]" because it
        # doesn't hurt anything to leave it other than taking up a bit of
        # space, and it feels safer to leave it there just in case, for
        # example, this code got changed to put the "remove" call in the
        # try block above when access to the vessel's lock is still needed.
      finally:
        _monitor_lock.release()
      
  except Exception:
    _debug_print(traceback.format_exc())
Пример #4
0
def run(*args):
  """
  <Purpose>
    Starts the deployment and monitoring of a service on a number of vessels.
    Handles all acquisition of, uploading to, starting, and release of vessels.
    Contains the main loop of this program, and is thus the final function to
    call in all client programs. Requires init() to have been called prior to
    running.
  
  <Arguments>
    *args

  <Exceptions>
    None

  <Side Effects>
    Persistently writes to a log file.
    
  <Returns>
    None
  """
  # Write logfile header
  config['logfile'] = open(config['logfilename'], 'w')
  config['logfile'].write('################################################\n')
  config['logfile'].write('##   Overlord Deployment and Monitoring Log   ##\n')
  config['logfile'].write('################################################\n\n')
  config['logfile'].write('GENI user:              '******'identity']['username'] + '\n')
  config['logfile'].write('Vessels to monitor:     ' + str(config['vesselcount']) + '\n')
  config['logfile'].write('Time of script start:   ' + str(time.time()) + '\n\n')
  config['logfile'].flush()

  
  # Release any preallocated vessels
  vesselhandle_list = explib.seattlegeni_get_acquired_vessels(config['identity'])
  release_vessels(vesselhandle_list, 'Releasing ' + str(len(vesselhandle_list)) + ' preallocated vessels...')

  
  # Acquire an initial sample of vessels
  config['logfile'].write(str(time.time()) + ': Fetching initial batch of ' + str(config['vesselcount']) + ' vessels:\n')
  config['logfile'].flush()
  vesselhandle_list = []
  while not vesselhandle_list:
    vesselhandle_list = acquire_vessels(config['vesselcount'])

  # Upload program to vessels
  vesselhandle_list = upload_to_vessels(vesselhandle_list, config['program_filename'])


  # Run program on vessels
  vesselhandle_list, failed_list = run_on_vessels(vesselhandle_list,
                                             config['program_filename'],
                                             *args)


  # Release any failed vessels
  if failed_list:
    config['logfile'].write(str(time.time()) + ': Running ' + config['program_filename'] + ' failed on ' + str(len(failed_list)) + ' vessels\n')

    # Get details about failed vessel(s) and log them
    for vh in failed_list:
      try:
        vessel_log = explib.get_vessel_log(vh, config['identity'])
      except:
        vessel_log = '[ERROR: vessel log fetch failed]'
        
      nodeid, vesselname = explib.get_nodeid_and_vesselname(vh)
      nodelocation = explib.get_node_location(nodeid)
      
      # Log the vessel's log contents
      config['logfile'].write('Log contents of failed vessel at ' + nodelocation + ': ' + vessel_log + '\n')
      config['logfile'].flush()
      
    # Release the failed vessels
    release_vessels(failed_list, 'Releasing failed vessel(s)...')



  # Initialize counter variable for loop iterations
  loop_iterations = 0
  PREPPED = True
  print "PREPPED!"
  print "Vessel Handles: %s" % vesselhandle_list

  # Main loop
  while KEEP_RUNNING == True:
    print "Starting Loop!"
    # Check for vessels not in started state
    stopped_vessel_list = []
    for vh in vesselhandle_list:
      try:
        vessel_status = explib.get_vessel_status(vh, config['identity'])
        log = explib.get_vessel_log(vh, config['identity'])
        print "Loop Log: %s" % log
      except:
        # Node lookup failed, so remove vessel from vesselhandle_list
        # TODO: proper way to handle failed advertisements?
        stopped_vessel_list.append(vh)
      else:
        if vessel_status != explib.VESSEL_STATUS_STARTED:
          stopped_vessel_list.append(vh)

    # Release and replace any stopped vessels
    if stopped_vessel_list:
      # Release any stopped vessels
      release_vessels(stopped_vessel_list, 'Releasing ' + str(len(stopped_vessel_list)) + ' stopped vessel(s)...')

      # Remove released vessels from vesselhandle_list
      vesselhandle_list = list_difference(vesselhandle_list, stopped_vessel_list)

    # Ensure that enough vessels are running
    if len(vesselhandle_list) < config['vesselcount']:
      # If there aren't enough active vessels, acquire some
      config['logfile'].write(str(time.time()) + ': Only ' + str(len(vesselhandle_list)) + ' vessel(s) out of target ' + str(config['vesselcount']) + ' detected\n')
      config['logfile'].flush()
      fresh_vessels = acquire_vessels(config['vesselcount'] - len(vesselhandle_list))

      # Upload and run program to/on fresh vessels
      fresh_vessels = upload_to_vessels(fresh_vessels, config['program_filename'])
      success_list, failed_list = run_on_vessels(fresh_vessels,
                                                 config['program_filename'],
                                                 *args)

      # Release any failed vessels
      if failed_list:
        config['logfile'].write(str(time.time()) + ': Running ' + config['program_filename'] + ' failed on ' + str(len(failed_list)) + ' vessels\n')

        # Get details about failed vessel(s) and log them
        for vh in failed_list:
          try:
            vessel_log = explib.get_vessel_log(vh, config['identity'])
          except:
            vessel_log = '[ERROR: vessel log fetch failed]'

          nodeid, vesselname = explib.get_nodeid_and_vesselname(vh)
          nodelocation = explib.get_node_location(nodeid)

          # Log the vessel's log contents
          config['logfile'].write('Log contents of failed vessel at ' + nodelocation + ': ' + vessel_log + '\n')
          config['logfile'].flush()

        # Release the failed vessels
        release_vessels(failed_list, 'Releasing failed vessel(s)...')
        
        # Remove released vessels from fresh_vessels list
        fresh_vessels = list_difference(fresh_vessels, failed_list)

      # Add fresh_vessels to vesselhandle_list
      vesselhandle_list.extend(fresh_vessels)


    # Sleep for parameterized amount of time
    time.sleep(VESSEL_POLLING_TIME)
    
    # Log a liveness message every certain number of iterations
    loop_iterations += 1
    if loop_iterations % LOG_AFTER_THIS_MANY_LOOPS == 0:
      config['logfile'].write(str(time.time()) + ': Still alive...\n')
      config['logfile'].flush()

    # Renew vessels according to constant period
    if loop_iterations * VESSEL_POLLING_TIME > VESSEL_RENEWAL_PERIOD:
      explib.seattlegeni_renew_vessels(config['identity'], vesselhandle_list)
      loop_iterations = 0
Пример #5
0
def _check_vessel_status_change(vesselhandle, monitordict):
    """
  Checks the status of an individual vessel and calls the registered
  callback function for the monitor if the vessel's status has changed since
  the last time it was checked.
  """
    try:
        # When the monitor is removed/canceled, the parallelized function isn't
        # aborted and we instead just have each of these calls immediately return.
        if monitordict['canceled']:
            return

        datadict = monitordict['vessels'][vesselhandle]
        if 'status' not in datadict:
            datadict['status'] = ''

        old_data = datadict.copy()

        status = experimentlib.get_vessel_status(vesselhandle,
                                                 monitordict['identity'])
        datadict['status'] = status

        # No matter where the above try block returned from, we want to see if
        # the vessel data changed and call the user's callback if it has.
        new_data = datadict.copy()

        # Note that by not letting the lock go before we call the user's callback
        # function, the processing of all of the vessels will slow down but we
        # avoid requiring the user to handle locking to protect against another
        # call to the callback for the same vessel.
        if old_data['status'] != new_data['status']:
            try:
                # TODO: make sure that exception's from the user's code end up
                # somewhere where the user has access to them. For now, we leave it to
                # the user to make sure they handle exceptions rather than let them
                # escape their callback and this is documented in the docstring of
                # the function register_vessel_status_monitor.
                monitordict['callback'](vesselhandle, old_data['status'],
                                        new_data['status'])

            except Exception:
                _debug_print(
                    "Exception occurred in vessel status change callback:")
                _debug_print(traceback.format_exc())

        # In order to prevent repeating failures, we remove the vesselhandle
        # from the monitor's list if the status indicates a positive response.
        # This means that scripts should occasionally add their known active
        # vessels to the monitor to prevent temporary failures from causing the
        # vessel to be subsequently ignored forever.
        if status in experimentlib.VESSEL_STATUS_SET_INACTIVE:
            _monitor_lock.acquire()
            try:
                monitordict['vesselhandle_list'].remove(vesselhandle)
                # We don't "del monitordict['vessels'][vesselhandle]" because it
                # doesn't hurt anything to leave it other than taking up a bit of
                # space, and it feels safer to leave it there just in case, for
                # example, this code got changed to put the "remove" call in the
                # try block above when access to the vessel's lock is still needed.
            finally:
                _monitor_lock.release()

    except Exception:
        _debug_print(traceback.format_exc())