Пример #1
0
def process_API_call(fullrequest):

    callname = fullrequest.split('|')[0]

    if DEBUG_MODE:
        servicelogger.log("Now handling call: " + callname)

    if callname not in API_dict:
        raise nmAPI.BadRequest("Unknown Call")

    # find the entry that describes this call...
    numberofargs, permissiontype, APIfunction = API_dict[callname]

    # we'll do the signature checks first... (the signature needs to be stripped
    # off to get the args anyways)...

    if permissiontype == 'Public':
        # There should be no signature, so this is the raw request...
        if len(fullrequest.split('|')) < numberofargs - 1:
            raise nmAPI.BadRequest("Not Enough Arguments")

        # If there are 3 args, we want to split at most 3 times (the first item is
        # the callname)
        callargs = fullrequest.split('|', numberofargs)
        # return any output for the user...
        return APIfunction(*callargs[1:])

    else:
        # strip off the signature and get the requestdata
        requestdata, requestsignature = fastsigneddata.signeddata_split_signature(
            fullrequest)

        # NOTE: the first argument *must* be the vessel name!!!!!!!!!!!
        vesselname = requestdata.split('|', 2)[1]

        if vesselname not in nmAPI.vesseldict:
            raise nmAPI.BadRequest('Unknown Vessel')

        # I must have something to check...
        if permissiontype == 'Owner':
            # only the owner is allowed, so the list of keys is merely that key
            allowedkeys = [nmAPI.vesseldict[vesselname]['ownerkey']]
        else:
            # the user keys are also allowed
            allowedkeys = [nmAPI.vesseldict[vesselname]['ownerkey']
                           ] + nmAPI.vesseldict[vesselname]['userkeys']

        # I need to pass the fullrequest in here...
        ensure_is_correctly_signed(fullrequest, allowedkeys,
                                   nmAPI.vesseldict[vesselname]['oldmetadata'])

        # If there are 3 args, we want to split at most 3 times (the first item is
        # the callname)
        callargs = requestdata.split('|', numberofargs)

        #store the request signature as old metadata
        nmAPI.vesseldict[vesselname]['oldmetadata'] = requestsignature

        # return any output for the user...
        return APIfunction(*callargs[1:])
Пример #2
0
def advertise_to_DNS(unique_id):
  """
  Advertise unique_id to the zenodotus DNS server. We strip away whatever that
  follows the NAME_SERVER part of the unique_id. For instance, if our unique_id
  is abc.NAME_SERVER:1234@xyz, then we only advertise abc.NAME_SERVER.

  """
  # IP that maps to the unique_id
  myip = emulcomm.getmyip()

  # Extract the part of unique_id up to the name server,
  # i.e. xyz.zenodotus.washington.edu, and discard whatever that follows
  name_server_pos = unique_id.find(NAME_SERVER)
  if name_server_pos > -1:
    unique_id = unique_id[0 : name_server_pos + len(NAME_SERVER)]
  else:
    raise Exception("Invalid unique_id format: '" + str(unique_id) + "'")

  advertise_success = False
  
  # We keep trying until successful advertisement (Fix for Ticket #956)
  while not advertise_success:
    try:
      advertise_announce(unique_id, myip, DNS_CACHE_TTL)
      servicelogger.log("[INFO]: Advertised " + str(unique_id) + " which maps to " + myip)
      advertise_success = True
    except Exception, error:
      if 'announce error' in str(error):
        # We can confidently drop the exception here. The advertisement service
        # can sometimes be flaky, yet it can guarantee advertisement of our
        # key-value pair on at least one of the three components. Thus, we are
        # printing the error message as a warning here.
        advertise_success = True
      else:
        advertise_success = False
Пример #3
0
def update_restrictions():
    # Create an internal handler function, takes a resource line and returns the new number of threads
    def _internal_func(lineContents):
        try:
            threads = float(lineContents[2])
            threads = threads * EVENT_SCALAR
            threads = int(threads)
            threads = max(threads, HARD_MIN)  # Set a hard minimum
            return threads
        except:
            # On failure, return the minimum
            return HARD_MIN

    # Create a task that uses our internal function
    task = ("resource", "events", _internal_func, True)
    taskList = [task]

    # Process all the resource files
    errors = nmrestrictionsprocessor.process_all_files(taskList)

    # Log any errors we encounter
    if errors != []:
        for e in errors:
            print e
            servicelogger.log(
                "[ERROR]:Unable to patch events limit in resource file " + e[0] + ", exception " + str(e[1])
            )
Пример #4
0
def update_restrictions():
    # Create an internal handler function, takes a resource line and returns the new number of threads
    def _internal_func(lineContents):
        try:
            threads = float(lineContents[2])
            threads = threads * EVENT_SCALAR
            threads = int(threads)
            threads = max(threads, HARD_MIN)  # Set a hard minimum
            return threads
        except:
            # On failure, return the minimum
            return HARD_MIN

    # Create a task that uses our internal function
    task = ("resource", "events", _internal_func, True)
    taskList = [task]

    # Process all the resource files
    errors = nmrestrictionsprocessor.process_all_files(taskList)

    # Log any errors we encounter
    if errors != []:
        for e in errors:
            print e
            servicelogger.log(
                "[ERROR]:Unable to patch events limit in resource file " +
                e[0] + ", exception " + str(e[1]))
def uninstall_nokia():
  """
    <Purpose>
    Remove the startup script and symlink to it in the /etc/init.d and 
    /etc/rc2.d directories, and kill all seattle processes by using 
    stop_all_seattle_processes. This requires the user to be currently on root
    access. 
  <Arguments>
    None.
  <Exceptions>
    None.
  <Side Effects>
    Removes the startup script and the symlink to it, and stops seattle from 
    running.
  <Returns>
    True if succeeded in uninstalling,
    False otherwise.
  """
  

  # Note to developers: If you need to change the path of the startup script or
  # the path of the symlink, make sure you keep it consistent with those in
  # seattleinstaller.py.

  startup_script_name = "nokia_seattle_startup.sh"
  # The directory where the startup script resides.
  startup_script_dir = "/etc/init.d/"
  # The full path to the startup script.
  startup_script_path = startup_script_dir + startup_script_name

  # The name of the symlink that links to the startup script.
  symlink_name = "S99startseattle"
  # The directory where the symlink to the startup script resides.
  symlink_dir = "/etc/rc2.d/"
  # The full path to the symlink.
  symlink_path = symlink_dir + symlink_name

  # Check if the startup script and the symlink exists.
  if not os.path.exists(startup_script_path) and \
        not os.path.lexists(symlink_path):
    _output("Neither the startup script nor the symlink exists.")
    return True

  # Remove the startup script.
  try:
    os.remove(startup_script_path)
  # Cannot remove the startup script due to some reason.
  except OSError, e:
    # The startup script does not exist - that is fine, we will continue 
    # and try to remove the symlink.
    if e.errno == errno.ENOENT:
      pass
    else:
      # The startup script cannot be removed.
      _output("The startup script cannot be removed. Make sure you have the " \
                + "permission to do so.")
      servicelogger.log("Seattle cannot be uninstalled because " \
                          + startup_script_path + " cannot be removed.")
      return False
Пример #6
0
def set_accepter(accepter):
    global accepter_thread
    accepter_state['lock'].acquire(True)
    accepter_thread = accepter

    if DEBUG_MODE:
        servicelogger.log("[DEBUG] Accepter Thread has been set...")
    accepter_state['lock'].release()
Пример #7
0
def set_accepter(accepter):
  global accepter_thread
  accepter_state['lock'].acquire(True)
  accepter_thread = accepter

  if DEBUG_MODE:
    servicelogger.log("[DEBUG] Accepter Thread has been set...")
  accepter_state['lock'].release()
def uninstall_nokia():
    """
    <Purpose>
    Remove the startup script and symlink to it in the /etc/init.d and 
    /etc/rc2.d directories, and kill all seattle processes by using 
    stop_all_seattle_processes. This requires the user to be currently on root
    access. 
  <Arguments>
    None.
  <Exceptions>
    None.
  <Side Effects>
    Removes the startup script and the symlink to it, and stops seattle from 
    running.
  <Returns>
    True if succeeded in uninstalling,
    False otherwise.
  """

    # Note to developers: If you need to change the path of the startup script or
    # the path of the symlink, make sure you keep it consistent with those in
    # seattleinstaller.py.

    startup_script_name = "nokia_seattle_startup.sh"
    # The directory where the startup script resides.
    startup_script_dir = "/etc/init.d/"
    # The full path to the startup script.
    startup_script_path = startup_script_dir + startup_script_name

    # The name of the symlink that links to the startup script.
    symlink_name = "S99startseattle"
    # The directory where the symlink to the startup script resides.
    symlink_dir = "/etc/rc2.d/"
    # The full path to the symlink.
    symlink_path = symlink_dir + symlink_name

    # Check if the startup script and the symlink exists.
    if not os.path.exists(startup_script_path) and \
          not os.path.lexists(symlink_path):
        _output("Neither the startup script nor the symlink exists.")
        return True

    # Remove the startup script.
    try:
        os.remove(startup_script_path)
    # Cannot remove the startup script due to some reason.
    except OSError, e:
        # The startup script does not exist - that is fine, we will continue
        # and try to remove the symlink.
        if e.errno == errno.ENOENT:
            pass
        else:
            # The startup script cannot be removed.
            _output("The startup script cannot be removed. Make sure you have the " \
                      + "permission to do so.")
            servicelogger.log("Seattle cannot be uninstalled because " \
                                + startup_script_path + " cannot be removed.")
            return False
Пример #9
0
def process_API_call(fullrequest):

  callname = fullrequest.split('|')[0]

  if DEBUG_MODE:
    servicelogger.log("Now handling call: " + callname)

  if callname not in API_dict:
    raise nmAPI.BadRequest("Unknown Call")

  # find the entry that describes this call...
  numberofargs, permissiontype, APIfunction = API_dict[callname]
  
  # we'll do the signature checks first... (the signature needs to be stripped
  # off to get the args anyways)...

  if permissiontype == 'Public':
    # There should be no signature, so this is the raw request...
    if len(fullrequest.split('|')) < numberofargs-1:
      raise nmAPI.BadRequest("Not Enough Arguments")

    # If there are 3 args, we want to split at most 3 times (the first item is 
    # the callname)
    callargs = fullrequest.split('|',numberofargs)
    # return any output for the user...
    return APIfunction(*callargs[1:])

  else:
    # strip off the signature and get the requestdata
    requestdata, requestsignature = fastsigneddata.signeddata_split_signature(fullrequest)
    

    # NOTE: the first argument *must* be the vessel name!!!!!!!!!!!
    vesselname = requestdata.split('|',2)[1]

    if vesselname not in nmAPI.vesseldict:
      raise nmAPI.BadRequest('Unknown Vessel')

    # I must have something to check...
    if permissiontype == 'Owner':
      # only the owner is allowed, so the list of keys is merely that key
      allowedkeys = [ nmAPI.vesseldict[vesselname]['ownerkey'] ]
    else:
      # the user keys are also allowed
      allowedkeys = [ nmAPI.vesseldict[vesselname]['ownerkey'] ] + nmAPI.vesseldict[vesselname]['userkeys']

    # I need to pass the fullrequest in here...
    ensure_is_correctly_signed(fullrequest, allowedkeys, nmAPI.vesseldict[vesselname]['oldmetadata'])
    
    # If there are 3 args, we want to split at most 3 times (the first item is 
    # the callname)
    callargs = requestdata.split('|',numberofargs)
    
    #store the request signature as old metadata
    nmAPI.vesseldict[vesselname]['oldmetadata'] = requestsignature
    
    # return any output for the user...
    return APIfunction(*callargs[1:])
Пример #10
0
def check_and_create_affix_object(virtual_host_name):
  """
  <Purpose>
    The purpose of this function is to check if Affix has been enabled,
    If it is enabled, we create an Affix object with the advertised
    Affix string and return the Affix object as well as whether Affix
    is enabled. 

  <Arguments>
    virtual_host_name - the zenodotus name we want to set for this
      node.

  <Exceptions>
    None

  <Return>
    Returns a Tuple in the form:
    (Boolean, AffixStackInterface, String)
  
    The first item in the tuple is whether Affix has been enabled.
    The second item is an AffixStackInterface object if Affix 
    has been enabled. Otherwise the second item is None.
    The third item is the Affix string that is being used 
    for the Affix object.
  """
  global affix_stack_string
  global affix_enabled

  # Check to see if AFFIX is enabled.
  try:
    affix_enabled_lookup = advertise_lookup(enable_affix_key)[-1]
    # Now we check if the last entry is True or False.
    if affix_enabled_lookup == 'True':
      affix_stack_string = advertise_lookup(affix_service_key)[-1]
      affix_enabled = True
      servicelogger.log("[INFO]: Current advertised Affix string: " + str(affix_stack_string))

      # If Affix is enabled, we can go ahead and create the Affix object 
      # right away so we don't have to repeatedly create it in the
      # loop below.
      affix_legacy_string = "(CoordinationAffix)" + affix_stack_string
      affix_object = AffixStackInterface(affix_legacy_string, virtual_host_name)
      
      # Return the results.
      return (affix_enabled, affix_object, affix_legacy_string)
    else:
      affix_enabled = False  
      # Affix is not enabled, so we return (False, None)
      return (affix_enabled, None, None)
  except (AdvertiseError, TimeoutError, ValueError, IndexError), e:
    servicelogger.log("Trying to look up Affix enabled threw " + str(type(e)) + " " + str(e))
    affix_enabled = False
    # Raise error on debug mode.
    if DEBUG_MODE:
      raise
    # Affix is not enabled, so we return (False, None)
    return (affix_enabled, None, None)
def uninstall_Windows():
    """
  <Purpose>
    Removes seattle from the Winodws registry startup key and/or the
    startup folder should either exist, then stops all seattle processes using
    stop_all_seattle_process.py
  <Arguments>
    None.
  <Exceptions>
    Possible IOError could be caused by filepath manipulation from a
      sub-function.
    SeattleNotInstalledError if seattle was not installed prior to uninstall.
  <Side Effects>
    Removes seattle from the Windows registry key and/or the Windows startup
    folder if it exists in either place.
    Stops seattle from running.
  <Returns>
    True if the uninstall succeeded.  Currently, if uninstall fails, it must be
    because seattle was not installed prior to uninstall.  We must return a
    boolean value for the parent function.
  """
    # First see if seattle appears as a value in the Windows startup registry key,
    # and remove it if it exists.
    # removed_from_registry is used later and thus must have a value in case the
    # try: block below raises an exception.
    removed_from_registry = False
    try:
        removed_from_registry = remove_seattle_from_win_startup_registry()
    except WindowsError:
        print "The uninstaller does not have access to the Windows registry " \
            + "startup keys. This means that seattle is likely not installed in " \
            + "your Windows registry startup key, though you may want to " \
            + "manually check the following registry keys and remove seattle " \
            + "from those keys should it exist there: "
        print "HKEY_LOCAL_MACHINE\Software\Microsoft\Windows\CurrentVersion\Run"
        print "HKEY_CURRENT_USER\Software\Microsoft\Windows\CurrentVersion\Run"
        # Distinguish the above-printed text from what will be printed later by
        # by printing a blank line.
        print
        servicelogger.log(" uninstaller could not access the Windows registry " \
                            + "during this attempted uninstall.")

    # Next, see if there is a link to the seattle starter script in the startup
    # folder and remove it if it is there.
    if not WIN_STARTUP_SCRIPT_PATH == None:
        removed_from_startup_folder = \
            remove_seattle_from_win_startup_folder()

    # Check to see if uninstall actually removed seattle from the computer.
    if not removed_from_registry and not removed_from_startup_folder:
        raise SeattleNotInstalledError("Seattle could not be detected as " \
                                         + "having been installed prior to " \
                                         + "uninstall.")
    elif removed_from_registry or removed_from_startup_folder:
        # Stop all instances of seattle from running before returning.
        stop_all_seattle_processes.main()
        return True
Пример #12
0
def start_accepter():
  

  if AUTO_USE_NAT == False:
    # check to see if we should use the nat layer
    try:
      # see if we can currently have a bi-directional connection
      use_nat = nat_check_bi_directional(getmyip(), configuration['ports'][0])
    except Exception,e:
      servicelogger.log("Exception occurred trying to contact forwarder to detect nat "+str(e))
      use_nat = False
Пример #13
0
def start_accepter():
  

  if AUTO_USE_NAT == False:
    # check to see if we should use the nat layer
    try:
      # see if we can currently have a bi-directional connection
      use_nat = nat_check_bi_directional(getmyip(), configuration['ports'][0])
    except Exception,e:
      servicelogger.log("Exception occurred trying to contact forwarder to detect nat "+str(e))
      use_nat = False
Пример #14
0
def parse_arguments():
  """
  Parse all the arguments passed in through the command
  line for the nodemanager. This way in the future it 
  will be easy to add and remove options from the 
  nodemanager.
  """

  # Create the option parser
  parser = optparse.OptionParser(version="Seattle " + version)
  
  # Add the --foreground option.
  parser.add_option('--foreground', dest='foreground',
                    action='store_true', default=False,
                    help="Run the nodemanager in foreground " +
                         "instead of daemonizing it.")
                    

  # Add the --test-mode optino.
  parser.add_option('--test-mode', dest='test_mode',
                    action='store_true', default=False,
                    help="Run the nodemanager in test mode.")

  
  # Add the using shim capability.
  # --shims [shim name]: Forces use of the specified shims. The shim name must
  #  conform to the format as specified in:
  #  https://seattle.cs.washington.edu/wiki/UsingShims.
  parser.add_option('--shims', type="string", dest="shim_name",
                    help="Use a user specified shim instead of the" +
                         " default (NatDeciderShim)")


  
  # Parse the argumetns.
  options, args = parser.parse_args()

  # Set some global variables.
  global FOREGROUND
  global TEST_NM
  global default_shim


  # Analyze the options
  if options.foreground:
    FOREGROUND = True

  if options.test_mode:
    TEST_NM = True

  if options.shim_name:
    servicelogger.log("[INFO]: Using user-specified shims " + options.shim_name)
    default_shim = options.shim_name
def safe_log(message):
  """
  Log a message in a way that cannot throw an exception. First try to log using
  the servicelogger, then just try to print the message.
  """
  try:
    #f = open('/tmp/log.txt', 'a')
    #f.write(message + '\n')
    #f.close()
    servicelogger.log(message)
  except:
    pass
Пример #16
0
def log(*args):
    chunks = []
    for arg in args:
        chunks.append(str(arg))
    logstring = " ".join(chunks)

    # servicelogger.log will end a trailing newline to the string,
    # remove the existing one (if any).
    if logstring.endswith("\n"):
        servicelogger.log(logstring[:-1])
    else:
        servicelogger.log(logstring)
Пример #17
0
def safe_log(message):
  """
  Log a message in a way that cannot throw an exception. First try to log using
  the servicelogger, then just try to print the message.
  """
  try:
    #f = open('/tmp/log.txt', 'a')
    #f.write(message + '\n')
    #f.close()
    servicelogger.log(message)
  except:
    pass
Пример #18
0
def log(*args):
  chunks = []
  for arg in args:
    chunks.append(str(arg))
  logstring = " ".join(chunks)

  # servicelogger.log will end a trailing newline to the string,
  # remove the existing one (if any).
  if logstring.endswith("\n"):
    servicelogger.log(logstring[:-1])
  else:
    servicelogger.log(logstring)
Пример #19
0
def new_affix_listenforconnection(localip, localport, timeout=10):
  global affix_enabled
  global affix_stack_string
  global zenodotus_advertise_handle

  # Similarly, stop advertising my old Zenodotus name (if any), 
  # ignoring potential errors. If any error is raised, it will
  # be recorded.
  try:
    advertisepipe.remove_from_pipe(zenodotus_advertise_handle)
  except Exception, err:
    servicelogger.log("Unexpected error when attempting to " +
      "remove old zenodotus_advertise_handle. " + str(type(err)) +
      ". " + str(err))
Пример #20
0
 def run(self):
   # Run indefinitely.
   # This is on the assumption that getconnection() blocks, and so this won't consume an inordinate amount of resources.
   while True:
     try:
       ip, port, client_socket = self.serversocket.getconnection()
       connection_handler(ip, port, client_socket)
     except SocketWouldBlockError:
       sleep(0.5)
     except SocketTimeoutError:
       sleep(0.5)
     except Exception, e:
       servicelogger.log("FATAL error in AccepterThread: " + 
           traceback.format_exc())
       return
 def run(self):
     # Run indefinitely.
     # This is on the assumption that getconnection() blocks, and so this won't consume an inordinate amount of resources.
     while True:
         try:
             ip, port, client_socket = self.serversocket.getconnection()
             connection_handler(ip, port, client_socket)
         except SocketWouldBlockError:
             sleep(0.5)
         except SocketTimeoutError:
             sleep(0.5)
         except Exception, e:
             servicelogger.log("FATAL error in AccepterThread: " +
                               traceback.format_exc())
             return
Пример #22
0
def safe_log(message):
  """
  Log a message in a way that cannot throw an exception. First try to log using
  the servicelogger, then just try to print the message.
  """
  try:
    servicelogger.log(message)
  except:
    try:
      print message
    except:
      # As the standard output streams aren't closed, it would seem that this
      # should never happen. If it does, though, what can we do to log the
      # message, other than directly write to a file?
      pass
Пример #23
0
def safe_log(message):
  """
  Log a message in a way that cannot throw an exception. First try to log using
  the servicelogger, then just try to print the message.
  """
  try:
    servicelogger.log(message)
  except:
    try:
      print message
    except:
      # As the standard output streams aren't closed, it would seem that this
      # should never happen. If it does, though, what can we do to log the
      # message, other than directly write to a file?
      pass
Пример #24
0
def main():

  global configuration

  if not FOREGROUND:
    # Background ourselves.
    daemon.daemonize()

  # ensure that only one instance is running at a time...
  gotlock = runonce.getprocesslock("seattlenodemanager")
  if gotlock == True:
    # I got the lock.   All is well...
    pass
  else:
    if gotlock:
      servicelogger.log("[ERROR]:Another node manager process (pid: " + str(gotlock) + 
          ") is running")
    else:
      servicelogger.log("[ERROR]:Another node manager process is running")
    return

  
  # I'll grab the necessary information first...
  servicelogger.log("[INFO]:Loading config")
  # BUG: Do this better?   Is this the right way to engineer this?
  configuration = persist.restore_object("nodeman.cfg")
  
  # Armon: initialize the network restrictions
  initialize_ip_interface_restrictions(configuration)
  
  
  
  # ZACK BOKA: For Linux and Darwin systems, check to make sure that the new
  #            seattle crontab entry has been installed in the crontab.
  #            Do this here because the "nodeman.cfg" needs to have been read
  #            into configuration via the persist module.
  if nonportable.ostype == 'Linux' or nonportable.ostype == 'Darwin':
    if 'crontab_updated_for_2009_installer' not in configuration or \
          configuration['crontab_updated_for_2009_installer'] == False:
      try:
        import update_crontab_entry
        modified_crontab_entry = \
            update_crontab_entry.modify_seattle_crontab_entry()
        # If updating the seattle crontab entry succeeded, then update the
        # 'crontab_updated_for_2009_installer' so the nodemanager no longer
        # tries to update the crontab entry when it starts up.
        if modified_crontab_entry:
          configuration['crontab_updated_for_2009_installer'] = True
          persist.commit_object(configuration,"nodeman.cfg")

      except Exception,e:
        exception_traceback_string = traceback.format_exc()
        servicelogger.log("[ERROR]: The following error occured when " \
                            + "modifying the crontab for the new 2009 " \
                            + "seattle crontab entry: " \
                            + exception_traceback_string)
Пример #25
0
def handle_request(socketobj):

  # always close the socketobj
  try:


    try:
      # let's get the request...
      # BUG: Should prevent endless data / slow retrival attacks
      fullrequest = session.session_recvmessage(socketobj)
  
    # Armon: Catch a vanilla exception because repy emulated_sockets
    # will raise Exception when the socket has been closed.
    # This is changed from just passing through socket.error,
    # which we were catching previously.
    except Exception, e:

      #JAC: Fix for the exception logging observed in #992
      if 'Socket closed' in str(e) or 'timed out!' in str(e):
        servicelogger.log('Connection abruptly closed during recv')
        return
      elif 'Bad message size' in str(e):
        servicelogger.log('Received bad message size')
        return
      else:
        # I can't handle this, let's exit
        # BUG: REMOVE LOGGING IN PRODUCTION VERSION (?)
        servicelogger.log_last_exception()
        return



    # handle the request as appropriate
    try:
      retstring = process_API_call(fullrequest)

    # Bad parameters, signatures, etc.
    except nmAPI.BadRequest,e:
      session.session_sendmessage(socketobj, str(e)+"\nError")
      return
Пример #26
0
def enable_affix(affix_string):
    """
  <Purpose>
    Overload the listenforconnection() and getmyip() API call 
    if Affix is enabled.

  <Arguments>
    None

  <SideEffects>
    Original listenforconnection() and getmyip() gets overwritten.

  <Exceptions>
    None
  """
    # If Affix is not enabled, we just return.
    if not affix_enabled:
        return

    global timeout_listenforconnection
    global getmyip

    # Create my affix object and overwrite the listenforconnection
    # and the getmyip call.
    nodemanager_affix = affix_stack.AffixStack(affix_string)

    # Create a new timeout_listenforconnection that wraps a normal
    # Affix socket with timeout_server_socket.
    def new_timeout_listenforconnection(localip, localport, timeout):
        sockobj = nodemanager_affix.listenforconnection(localip, localport)
        return timeout_server_socket(sockobj, timeout)

    # Overload the two functionalities with Affix functionalities
    # that will be used later on.
    timeout_listenforconnection = new_timeout_listenforconnection
    getmyip = nodemanager_affix.getmyip

    servicelogger.log('[INFO] Nodemanager now using Affix string: ' +
                      affix_string)
Пример #27
0
def enable_affix(affix_string):
  """
  <Purpose>
    Overload the listenforconnection() and getmyip() API call 
    if Affix is enabled.

  <Arguments>
    None

  <SideEffects>
    Original listenforconnection() and getmyip() gets overwritten.

  <Exceptions>
    None
  """
  # If Affix is not enabled, we just return.
  if not affix_enabled:
    return

  global timeout_listenforconnection
  global getmyip

  # Create my affix object and overwrite the listenforconnection
  # and the getmyip call.
  nodemanager_affix = affix_stack.AffixStack(affix_string)

  # Create a new timeout_listenforconnection that wraps a normal
  # Affix socket with timeout_server_socket.
  def new_timeout_listenforconnection(localip, localport, timeout):
    sockobj = nodemanager_affix.listenforconnection(localip, localport)
    return timeout_server_socket(sockobj, timeout)

  # Overload the two functionalities with Affix functionalities
  # that will be used later on.
  timeout_listenforconnection = new_timeout_listenforconnection
  getmyip = nodemanager_affix.getmyip

  servicelogger.log('[INFO] Nodemanager now using Affix string: ' + affix_string)
Пример #28
0
def start_accepter():
  
  unique_id = rsa_publickey_to_string(configuration['publickey'])
  unique_id = sha_hexhash(unique_id) + str(configuration['service_vessel'])
  unique_id += "." + NAME_SERVER
 
  # do this until we get the accepter started...
  while True:

    if not node_reset_config['reset_accepter'] and is_accepter_started():
      # we're done, return the name!
      return myname
    
    else:
      for possibleport in configuration['ports']:
        try:
          servicelogger.log("[INFO]: Trying to wait")

          # We advertise the unique_id first so that we can perform waitforconn
          # on it later. It's tempting to do a waitforconn directly on the
          # current IP, but IPs are not unique. If we are behind a NAT, our IP
          # can be some private address which may have duplicates registered in
          # the NAT forwarder. As a result, a client may not be able to locate
          # us within the NAT forwarder. Hence, waitforconn must occur on a unique
          # resolvable name.
          advertise_to_DNS(unique_id)

          timeout_waitforconn(unique_id, possibleport,
                              nmconnectionmanager.connection_handler,
                              timeout=10, use_shim=True, shim_string=default_shim)

        except Exception, e:
          servicelogger.log("[ERROR]: when calling waitforconn for the connection_handler: " + str(e))
          servicelogger.log_last_exception()
        else:
          # the waitforconn was completed so the accepter is started
          accepter_state['lock'].acquire()
          accepter_state['started']= True
          accepter_state['lock'].release()

          # assign the nodemanager name
          myname = unique_id + ":" + str(possibleport)
          servicelogger.log("[INFO]: Now listening as " + myname)

          break

      else:
        servicelogger.log("[ERROR]: cannot find a port for waitforconn.")
Пример #29
0
                    node_reset_config['reset_accepter'] = False
                except Exception, e:
                    # print bind_ip, port, e
                    servicelogger.log(
                        "[ERROR] setting up nodemanager serversocket " +
                        "on address " + bind_ip + ":" + str(possibleport) +
                        ": " + repr(e))
                    servicelogger.log_last_exception()
                else:
                    break

            else:
                # We exhausted the list of possibleport's to no avail.
                # Pause to avoid busy-waiting for the problem to go away.
                servicelogger.log(
                    "[ERROR]: Could not create serversocket. Sleeping for 30 seconds."
                )
                time.sleep(30)

        # check infrequently
        time.sleep(configuration['pollfrequency'])


# has the thread started?
def is_worker_thread_started():
    for thread in threading.enumerate():
        if 'WorkerThread' in str(thread):
            return True
    else:
        return False
Пример #30
0
def start_accepter():
    global accepter_thread
    global affix_enabled
    global affix_stack_string
    global zenodotus_advertise_handle

    # do this until we get the accepter started...
    while True:

        if not node_reset_config['reset_accepter'] and is_accepter_started():
            # we're done, return the name!
            return myname_port

        else:
            # If we came here because a reset was initiated, kill the old
            # accepter thread server socket before starting a new one.
            try:
                accepter_thread.close_serversocket()
                servicelogger.log(
                    "Closed previous accepter thread server socket.")
            except:
                # There was no accepter_thread, or it couldn't .close_serversocket().
                # No problem -- this means nothing will be in the way of the new
                # serversocket.
                pass

            # Similarly, stop advertising my old Zenodotus name (if any),
            # ignoring potential errors.
            try:
                advertisepipe.remove_from_pipe(zenodotus_advertise_handle)
            except:
                pass

            # Just use getmyip(), this is the default behavior and will work if we have preferences set
            # We only want to call getmyip() once, rather than in the loop since this potentially avoids
            # rebuilding the allowed IP cache for each possible port
            bind_ip = emulcomm.getmyip()

            # Attempt to have the nodemanager listen on an available port.
            # Once it is able to listen, create a new thread and pass it the socket.
            # That new thread will be responsible for handling all of the incoming connections.
            for portindex in range(len(configuration['ports'])):
                possibleport = configuration['ports'][portindex]
                try:
                    # There are two possible implementations available here:
                    # 1) Use a raw (python) socket, and so we can have a timeout, as per ticket #881
                    # 2) Use a repy socket, but then possibly leak many connections.

                    # Check to see if AFFIX is enabled.
                    try:
                        affix_enabled_lookup = advertise_lookup(
                            enable_affix_key)[-1]
                        servicelogger.log("affix_enabled_lookup is " +
                                          str(affix_enabled_lookup))
                        # Now we check if the last entry is True or False.
                        if affix_enabled_lookup == 'True':
                            affix_stack_string = advertise_lookup(
                                affix_service_key)[-1]
                            affix_enabled = True
                            servicelogger.log(
                                "[INFO]: Current advertised Affix string: " +
                                str(affix_stack_string))
                        else:
                            affix_enabled = False
                    except (AdvertiseError, TimeoutError), e:
                        servicelogger.log(
                            "Trying to look up Affix enabled threw " +
                            str(type(e)) + " " + str(e))
                        affix_enabled = False
                        # Raise error on debug mode.
                        if DEBUG_MODE:
                            raise
                    except ValueError:
                        servicelogger.log(
                            "Trying to look up Affix enabled threw " +
                            str(type(e)) + " " + str(e))
                        affix_enabled = False
                        # Raise error on debug mode.
                        if DEBUG_MODE:
                            raise
                    except IndexError:
                        servicelogger.log(
                            "Trying to look up Affix enabled threw " +
                            str(type(e)) + " " + str(e))
                        # This will occur if the advertise server returns an empty list.
                        affix_enabled = False
                        # Raise error on debug mode.
                        if DEBUG_MODE:
                            raise

                    # If AFFIX is enabled, then we use AFFIX to open up a tcpserversocket.
                    if affix_enabled:
                        # Here we are going to use a for loop to find a second available port
                        # for us to use for the LegacyAffix. Since the LegacyAffix opens up two
                        # tcpserversocket, it needs two available ports. The first for a normal
                        # repy listenforconnection call, the second for affix enabled
                        # listenforconnection call.

                        # We keep track of how many times we failed to listen with the Affix
                        # framework. If we exceed 3, we default to Repy V2 API. Note that we
                        # will try three times with each port, if we are unable to connect
                        # with legacy Repy V2 API as well.
                        fail_affix_count = 0
                        error_list = []

                        for affixportindex in range(
                                portindex + 1, len(configuration['ports'])):
                            affixport = configuration['ports'][affixportindex]

                            # Assign the nodemanager name to be the nodekey. We replace any whitespace in the
                            # name and append zenodotus tag at the end.
                            mypubkey = rsa_publickey_to_string(
                                configuration['publickey']).replace(" ", "")
                            myname = sha_hexhash(
                                mypubkey) + '.zenodotus.poly.edu'
                            myname_port = myname + ":" + str(possibleport)

                            # Announce my (new) Zenodotus name
                            zenodotus_advertise_handle = advertisepipe.add_to_pipe(
                                myname, getmyip())

                            affix_legacy_string = "(CoordinationAffix)(LegacyAffix," + myname + "," + str(
                                affixport) + ",0,"
                            affix_legacy_string += "(CoordinationAffix)" + affix_stack_string + ")"
                            affix_object = AffixStackInterface(
                                affix_legacy_string)

                            # Now that we have found the Affix string and have created the AffixStackInterface
                            # object, we will try to open up a listening tcp socket. If we fail to do so
                            # 3 times, we will default to legacy Repy V2 socket.
                            try:
                                serversocket = affix_object.listenforconnection(
                                    myname, possibleport)
                                servicelogger.log(
                                    "[INFO]Started accepter thread with Affix string: "
                                    + affix_legacy_string)
                                break
                            except (AddressBindingError, AlreadyListeningError,
                                    DuplicateTupleError), e:

                                servicelogger.log(
                                    "Failed to open listening socket with Affix on port: "
                                    + str(affixport) + ". Found error: " +
                                    str(e))

                                fail_affix_count += 1
                                error_list.append((type(e), str(e)))

                                # If we fail more than 2 times, we will stop attempting to try listening
                                # on a socket with the Affix framework.
                                if fail_affix_count > 2:
                                    servicelogger.log(
                                        "Failed to open socket using Affix after three attemps."
                                        +
                                        "Now resuming with legacy Repy socket. Errors were: "
                                        + str(error_list))
                                    serversocket = timeout_listenforconnection(
                                        bind_ip, possibleport, 10)
                                    # assign the nodemanager name
                                    myname_port = str(bind_ip) + ":" + str(
                                        possibleport)
                                    break
                            except Exception, e:
                                servicelogger.log(
                                    "[ERROR] Found Listenforconnection had exception: "
                                    + str(e))
                                raise

                    else:
                        # If AFFIX is not enabled, then we open up a normal tcpserversocket.
                        # For now, we'll use the second method.
                        serversocket = timeout_listenforconnection(
                            bind_ip, possibleport, 10)
                        # assign the nodemanager name
                        myname_port = str(bind_ip) + ":" + str(possibleport)
                    # If there is no error, we were able to successfully start listening.
                    # Create the thread, and start it up!
                    accepter = nmconnectionmanager.AccepterThread(serversocket)
                    accepter.start()

                    # Now that we created an accepter, let's use it!
                    set_accepter(accepter)

                    # MOSHE: Is this thread safe!?
                    # Now that waitforconn has been called, unset the accepter reset flag
                    node_reset_config['reset_accepter'] = False
Пример #31
0
  def run(self):
    # Put everything in a try except block so that if badness happens, we can
    # log it before dying.
    try:
      while True:
        # remove stale items from the advertise dict.   This is important because
        # we're using membership in the dict to indicate a need to advertise
        clean_advertise_dict()

        # this list contains the keys we will advertise
        advertisekeylist = []

        # JAC: advertise under the node's key
        if rsa_publickey_to_string(self.nodekey) not in lastadvertisedict and self.nodekey not in advertisekeylist:
          advertisekeylist.append(self.nodekey)


        # make a copy so there isn't an issue with a race
        for vesselname in self.addict.keys()[:]:

          try:
            thisentry = self.addict[vesselname].copy()
          except KeyError:
            # the entry must have been removed in the meantime.   Skip it!
            continue

          # if I advertise the vessel...
          if thisentry['advertise']:
            # add the owner key if not there already...
            if rsa_publickey_to_string(thisentry['ownerkey']) not in lastadvertisedict and thisentry['ownerkey'] not in advertisekeylist:
              advertisekeylist.append(thisentry['ownerkey'])

            # and all user keys if not there already
            for userkey in thisentry['userkeys']:
              if rsa_publickey_to_string(userkey) not in lastadvertisedict and userkey not in advertisekeylist:
                advertisekeylist.append(userkey)


        # there should be no dups.   
        assert(advertisekeylist == listops_uniq(advertisekeylist))

        # now that I know who to announce to, send messages to annouce my IP and 
        # port to all keys I support
        for advertisekey in advertisekeylist:
          try:
            advertise_announce(advertisekey, str(myname), adTTL)
            # mark when we advertise
            lastadvertisedict[rsa_publickey_to_string(advertisekey)] = getruntime()
         
            # If the announce succeeded, and node was offline, log info message
            # and switch it back to online mode.
            if self.is_offline:
              info_msg = 'Node is back online.'
              if self.error_count:
                info_msg += ' (Encountered ' + str(self.error_count) + \
                              ' advertise errors)'
              servicelogger.log('[INFO]: ' + info_msg)
              self.error_count = 0
              self.is_offline = False
          
          except AdvertiseError, e:
            # If all announce requests failed, assume node has
            # gone offline, 
            if str(e) == "None of the advertise services could be contacted":
              self.is_offline = True
              # Log an error message after every 'N' failures
              if (self.error_count % error_skip_count == 0):
                servicelogger.log('AdvertiseError occured, continuing: '+str(e))
              self.error_count += 1
            # Log all other types of errors
            else:
              servicelogger.log('AdvertiseError occured, continuing: '+str(e))
          except Exception, e:
            servicelogger.log_last_exception()
            # an unexpected exception occured, exit and restart
            return
Пример #32
0
            # We only want to call getmyip() once, rather than in the loop since this potentially avoids
            # rebuilding the allowed IP cache for each possible port
            bind_ip = emulcomm.getmyip()

            for possibleport in configuration['ports']:
                try:

                    if use_nat:
                        # use the sha hash of the nodes public key with the vessel
                        # number as an id for this node
                        unique_id = rsa_publickey_to_string(
                            configuration['publickey'])
                        hashedunique_id = sha.new(unique_id).hexdigest()
                        advertiseid = hashedunique_id + str(
                            configuration['service_vessel'])
                        servicelogger.log("[INFO]: Trying NAT wait")
                        nat_waitforconn(advertiseid, possibleport,
                                        nmconnectionmanager.connection_handler)

                    # do a local waitforconn (not using a forwarder)
                    # this makes the node manager easily accessible locally

                    #JAC: I do a timeout waitforconn in an attempt to address #881
                    # 10 seconds should be adequate for a client to respond / communicate
                    timeout_waitforconn(bind_ip,
                                        possibleport,
                                        nmconnectionmanager.connection_handler,
                                        timeout=10)
                    # Now that waitforconn has been called, unset the accepter reset flag
                    node_reset_config['reset_accepter'] = False
                except Exception, e:
Пример #33
0
def main():
    global configuration

    if not FOREGROUND:
        # Background ourselves.
        daemon.daemonize()

    # Check if we are running in testmode.
    if TEST_NM:
        nodemanager_pid = os.getpid()
        servicelogger.log(
            "[INFO]: Running nodemanager in test mode on port 1224, " +
            "pid %s." % str(nodemanager_pid))
        nodeman_pid_file = open(os.path.join(os.getcwd(), 'nodemanager.pid'),
                                'w')

        # Write out the pid of the nodemanager process that we started to a file.
        # This is only done if the nodemanager was started in test mode.
        try:
            nodeman_pid_file.write(str(nodemanager_pid))
        finally:
            nodeman_pid_file.close()

    else:
        # ensure that only one instance is running at a time...
        gotlock = runonce.getprocesslock("seattlenodemanager")

        if gotlock == True:
            # I got the lock.   All is well...
            pass
        else:
            if gotlock:
                servicelogger.log(
                    "[ERROR]:Another node manager process (pid: " +
                    str(gotlock) + ") is running")
            else:
                servicelogger.log(
                    "[ERROR]:Another node manager process is running")
            return

    servicelogger.log('[INFO]: This is Seattle release "' + version + "'")

    # Feature add for #1031: Log information about the system in the nm log...
    servicelogger.log('[INFO]:platform.python_version(): "' +
                      str(platform.python_version()) + '"')
    servicelogger.log('[INFO]:platform.platform(): "' +
                      str(platform.platform()) + '"')

    # uname on Android only yields 'Linux', let's be more specific.
    try:
        import android
        servicelogger.log('[INFO]:platform.uname(): Android / "' +
                          str(platform.uname()) + '"')
    except ImportError:
        servicelogger.log('[INFO]:platform.uname(): "' +
                          str(platform.uname()) + '"')

    # I'll grab the necessary information first...
    servicelogger.log("[INFO]:Loading config")
    # BUG: Do this better?   Is this the right way to engineer this?
    configuration = persist.restore_object("nodeman.cfg")

    # Armon: initialize the network restrictions
    initialize_ip_interface_restrictions(configuration)

    # ZACK BOKA: For Linux and Darwin systems, check to make sure that the new
    #            seattle crontab entry has been installed in the crontab.
    #            Do this here because the "nodeman.cfg" needs to have been read
    #            into configuration via the persist module.
    if nonportable.ostype == 'Linux' or nonportable.ostype == 'Darwin':
        if 'crontab_updated_for_2009_installer' not in configuration or \
              configuration['crontab_updated_for_2009_installer'] == False:
            try:
                # crontab may not exist on Android, therefore let's not check
                # if we are running on Android. See #1302 and #1254.
                try:
                    import android
                except ImportError:
                    import update_crontab_entry
                    modified_crontab_entry = \
                        update_crontab_entry.modify_seattle_crontab_entry()
                    # If updating the seattle crontab entry succeeded, then update the
                    # 'crontab_updated_for_2009_installer' so the nodemanager no longer
                    # tries to update the crontab entry when it starts up.
                    if modified_crontab_entry:
                        configuration[
                            'crontab_updated_for_2009_installer'] = True
                        persist.commit_object(configuration, "nodeman.cfg")

            except Exception, e:
                exception_traceback_string = traceback.format_exc()
                servicelogger.log("[ERROR]: The following error occured when " \
                                    + "modifying the crontab for the new 2009 " \
                                    + "seattle crontab entry: " \
                                    + exception_traceback_string)
Пример #34
0
def start_accepter():
  global accepter_thread
  global affix_enabled
  global affix_stack_string

  # do this until we get the accepter started...
  while True:

    if not node_reset_config['reset_accepter'] and is_accepter_started():
      # we're done, return the name!
      return myname_port
    
    else:
      # Just use getmyip(), this is the default behavior and will work if we have preferences set
      # We only want to call getmyip() once, rather than in the loop since this potentially avoids
      # rebuilding the allowed IP cache for each possible port
      bind_ip = emulcomm.getmyip()
      
      # Attempt to have the nodemanager listen on an available port.
      # Once it is able to listen, create a new thread and pass it the socket.
      # That new thread will be responsible for handling all of the incoming connections.     
      for portindex in range(len(configuration['ports'])):
        possibleport = configuration['ports'][portindex]
        try:
          # There are two possible implementations available here:
          # 1) Use a raw (python) socket, and so we can have a timeout, as per ticket #881
          # 2) Use a repy socket, but then possibly leak many connections.
          
          # Check to see if AFFIX is enabled.
          try:
            affix_enabled_lookup = advertise_lookup(enable_affix_key)[-1]
            servicelogger.log("affix_enabled_lookup is " + str(affix_enabled_lookup))
            # Now we check if the last entry is True or False.
            if affix_enabled_lookup == 'True':
              affix_stack_string = advertise_lookup(affix_service_key)[-1]
              affix_enabled = True
              servicelogger.log("[INFO]: Current advertised Affix string: " + str(affix_stack_string))
            else:
              affix_enabled = False
          except (AdvertiseError, TimeoutError), e:
            servicelogger.log("Trying to look up Affix enabled threw " + str(type(e)) + " " + str(e))
            affix_enabled = False
            # Raise error on debug mode.
            if DEBUG_MODE:
              raise
          except ValueError:
            servicelogger.log("Trying to look up Affix enabled threw " + str(type(e)) + " " + str(e))
            affix_enabled = False
            # Raise error on debug mode.
            if DEBUG_MODE:
              raise
          except IndexError:
            servicelogger.log("Trying to look up Affix enabled threw " + str(type(e)) + " " + str(e))
            # This will occur if the advertise server returns an empty list.
            affix_enabled = False
            # Raise error on debug mode.
            if DEBUG_MODE:
              raise
      
          # If AFFIX is enabled, then we use AFFIX to open up a tcpserversocket.
          if affix_enabled:
            # Here we are going to use a for loop to find a second available port
            # for us to use for the LegacyAffix. Since the LegacyAffix opens up two
            # tcpserversocket, it needs two available ports. The first for a normal
            # repy listenforconnection call, the second for affix enabled 
            # listenforconnection call.
            
            # We keep track of how many times we failed to listen with the Affix
            # framework. If we exceed 3, we default to Repy V2 API. Note that we
            # will try three times with each port, if we are unable to connect
            # with legacy Repy V2 API as well.
            fail_affix_count = 0
            error_list = []

            for affixportindex in range(portindex+1, len(configuration['ports'])):
              affixport = configuration['ports'][affixportindex]

              # Assign the nodemanager name to be the nodekey. We replace any whitespace in the
              # name and append zenodotus tag at the end.
              mypubkey = rsa_publickey_to_string(configuration['publickey']).replace(" ", "")
              myname = sha_hexhash(mypubkey) + '.zenodotus.poly.edu'
              myname_port = myname + ":" + str(possibleport)

              # Announce my Zenodotus name
              # XXX Save the handle, modify the announcement when my address changes!
              advertisepipe.add_to_pipe(myname, getmyip())

              affix_legacy_string = "(CoordinationAffix)(LegacyAffix," + myname + "," + str(affixport) + ",0," 
              affix_legacy_string += "(CoordinationAffix)" + affix_stack_string + ")"
              affix_object = AffixStackInterface(affix_legacy_string)

              # Now that we have found the Affix string and have created the AffixStackInterface
              # object, we will try to open up a listening tcp socket. If we fail to do so
              # 3 times, we will default to legacy Repy V2 socket.
              try:
                serversocket = affix_object.listenforconnection(myname, possibleport)
                servicelogger.log("[INFO]Started accepter thread with Affix string: " + affix_legacy_string)
                break
              except (AddressBindingError, AlreadyListeningError, DuplicateTupleError), e:

                servicelogger.log(
                  "Failed to open listening socket with Affix on port: " + 
                  str(affixport) + ". Found error: " + str(e))

                fail_affix_count += 1
                error_list.append((type(e), str(e)))

                # If we fail more than 2 times, we will stop attempting to try listening
                # on a socket with the Affix framework.
                if fail_affix_count > 2:
                  servicelogger.log("Failed to open socket using Affix after three attemps." +
                                    "Now resuming with legacy Repy socket. Errors were: " + 
                                    str(error_list))
                  serversocket = timeout_listenforconnection(bind_ip, possibleport, 10)
                  # assign the nodemanager name
                  myname_port = str(bind_ip) + ":" + str(possibleport)
                  break
              except Exception, e:
                servicelogger.log("[ERROR] Found Listenforconnection had exception: " + str(e))
                raise

          else:
            # If AFFIX is not enabled, then we open up a normal tcpserversocket.
            # For now, we'll use the second method.
            serversocket = timeout_listenforconnection(bind_ip, possibleport,10)
            # assign the nodemanager name
            myname_port = str(bind_ip) + ":" + str(possibleport)
          # If there is no error, we were able to successfully start listening.
          # Create the thread, and start it up!
          accepter = nmconnectionmanager.AccepterThread(serversocket)
          accepter.start()
          
          # Now that we created an accepter, let's use it!          
          set_accepter(accepter)

          # MOSHE: Is this thread safe!?          
          # Now that waitforconn has been called, unset the accepter reset flag
          node_reset_config['reset_accepter'] = False
Пример #35
0
def start_accepter():
  global accepter_thread
  global affix_enabled
  global affix_stack_string

  # do this until we get the accepter started...
  while True:

    if not node_reset_config['reset_accepter'] and is_accepter_started():
      # we're done, return the name!
      return myname
    
    else:
      # Just use getmyip(), this is the default behavior and will work if we have preferences set
      # We only want to call getmyip() once, rather than in the loop since this potentially avoids
      # rebuilding the allowed IP cache for each possible port
      bind_ip = emulcomm.getmyip()
      
      # Attempt to have the nodemanager listen on an available port.
      # Once it is able to listen, create a new thread and pass it the socket.
      # That new thread will be responsible for handling all of the incoming connections.     
      for portindex in range(len(configuration['ports'])):
        possibleport = configuration['ports'][portindex]
        try:
          # There are two possible implementations available here:
          # 1) Use a raw (python) socket, and so we can have a timeout, as per ticket #881
          # 2) Use a repy socket, but then possibly leak many connections.
          
          # Check to see if AFFIX is enabled.
          try:
            affix_enabled_lookup = advertise_lookup(enable_affix_key)[-1]
            # Now we check if the last entry is True or False.
            if affix_enabled_lookup == 'True':
              affix_stack_string = advertise_lookup(affix_service_key)[-1]
              affix_enabled = True
            else:
              affix_enabled = False
          except AdvertiseError:
            affix_enabled = False
          except ValueError:
            affix_enabled = False
          except IndexError:
            # This will occur if the advertise server returns an empty list.
            affix_enabled = False

      
          # If AFFIX is enabled, then we use AFFIX to open up a tcpserversocket.
          if affix_enabled:
            # Here we are going to use a for loop to find a second available port
            # for us to use for the LegacyShim. Since the LegacyShim opens up two
            # tcpserversocket, it needs two available ports. The first for a normal
            # repy listenforconnection call, the second for shim enabled 
            # listenforconnection call.
            for shimportindex in range(portindex+1, len(configuration['ports'])):
              shimport = configuration['ports'][shimportindex]
              affix_legacy_string = "(LegacyShim," + str(shimport) + ",0)" + affix_stack_string
              affix_object = ShimStackInterface(affix_legacy_string)
              serversocket = affix_object.listenforconnection(bind_ip, possibleport)
              servicelogger.log("[INFO]Started accepter thread with Affix string: " + affix_legacy_string)
              break
            else:
              # This is the case if we weren't able to find any port to listen on
              # With the legacy shim.
              raise ShimError("Unable to create create tcpserversocket with shims using port:" + str(possibleport))

          else:
            # If AFFIX is not enabled, then we open up a normal tcpserversocket.
            # For now, we'll use the second method.
            serversocket = listenforconnection(bind_ip, possibleport)
          
          # If there is no error, we were able to successfully start listening.
          # Create the thread, and start it up!
          accepter = nmconnectionmanager.AccepterThread(serversocket)
          accepter.start()
          
          # Now that we created an accepter, let's use it!          
          set_accepter(accepter)

          # MOSHE: Is this thread safe!?          
          # Now that waitforconn has been called, unset the accepter reset flag
          node_reset_config['reset_accepter'] = False
        except Exception, e:
          # print bind_ip, port, e
          servicelogger.log("[ERROR]: when calling listenforconnection for the connection_handler: " + str(e))
          servicelogger.log_last_exception()
        else:
          # assign the nodemanager name
          myname = str(bind_ip) + ":" + str(possibleport)
          break

      else:
        servicelogger.log("[ERROR]: cannot find a port for recvmess")
Пример #36
0
        raise
    else:
      # We succeeded in getting our external IP. Leave the loop.
      break
    time.sleep(0.1)

  vesseldict = nmrequesthandler.initialize(myip, configuration['publickey'], version)

  # Start accepter...
  myname = start_accepter()

  # Initialize the global node name inside node reset configuration dict
  node_reset_config['name'] = myname
  
  #send our advertised name to the log
  servicelogger.log('myname = '+str(myname))

  # Start worker thread...
  start_worker_thread(configuration['pollfrequency'])

  # Start advert thread...
  start_advert_thread(vesseldict, myname, configuration['publickey'])

  # Start status thread...
  start_status_thread(vesseldict,configuration['pollfrequency'])


  # we should be all set up now.   

  servicelogger.log("[INFO]:Started")
Пример #37
0
def start_accepter():
  global accepter_thread

  # do this until we get the accepter started...
  while True:

    if not node_reset_config['reset_accepter'] and is_accepter_started():
      # we're done, return the name!
      return myname_port
    
    else:
      # If we came here because a reset was initiated, kill the old 
      # accepter thread server socket before starting a new one.
      try:
        accepter_thread.close_serversocket()
        servicelogger.log("Closed previous accepter thread server socket.")
      except:
        # There was no accepter_thread, or it couldn't .close_serversocket().
        # No problem -- this means nothing will be in the way of the new 
        # serversocket.
        pass


      # Just use getmyip(), this is the default behavior and will work if we have preferences set
      # We only want to call getmyip() once, rather than in the loop since this potentially avoids
      # rebuilding the allowed IP cache for each possible port
      bind_ip = getmyip()

      # Attempt to have the nodemanager listen on an available port.
      # Once it is able to listen, create a new thread and pass it the socket.
      # That new thread will be responsible for handling all of the incoming connections.     
      for portindex in range(len(configuration['ports'])):
        possibleport = configuration['ports'][portindex]
        try:
          # There are two possible implementations available here:
          # 1) Use a raw (python) socket, and so we can have a timeout, as per ticket #881
          # 2) Use a repy socket, but then possibly leak many connections.
      
          # For now, we'll use the second method and use the sockettimeout
          # library so we can still use a timeout to ensure we don't have
          # any malicious clients that feed us endless data (or no data)
          # to tie up the connection. Note that if we are using Affix,
          # we will be using a TimeoutAffix to achieve the equivalent
          # outcome.
          serversocket = timeout_listenforconnection(bind_ip, possibleport,10)

          # assign the nodemanager name.
          # We re-retrieve our address using getmyip as we may now be using
          # a zenodotus name instead.
          myname_port = str(getmyip()) + ":" + str(possibleport)

          # If there is no error, we were able to successfully start listening.
          # Create the thread, and start it up!
          accepter = nmconnectionmanager.AccepterThread(serversocket)
          accepter.start()
          
          # Now that we created an accepter, let's use it!          
          set_accepter(accepter)

          # MOSHE: Is this thread safe!?          
          # Now that waitforconn has been called, unset the accepter reset flag
          node_reset_config['reset_accepter'] = False
        except Exception, e:
          # print bind_ip, port, e
          servicelogger.log("[ERROR]: when calling listenforconnection for the connection_handler: " + str(e))
          servicelogger.log_last_exception()
        else:
          break

      else:
        servicelogger.log("[ERROR]: cannot find a port for recvmess")
Пример #38
0
def handle_threading_error(nmAPI):
    """
  <Purpose>
    Handles a repy node failing with ThreadErr. Reduces global thread count by 50%.
    Restarts all existing vesselts

  <Arguments>
    nmAPI: the nmAPI module -- passed to the function to avoid import loops;
           see ticket #590 for more information about this.
  """
    # Make a log of this
    servicelogger.log(
        "[ERROR]:A Repy vessel has exited with ThreadErr status. Patching restrictions and reseting all vessels."
    )

    # Get the number of threads Repy has allocated
    allocatedThreads = get_allocated_threads()

    # Get the number os system threads currently
    systemThreads = nonportable.os_api.get_system_thread_count()

    # Log this information
    servicelogger.log("[ERROR]:System Threads: " + str(systemThreads) +
                      "  Repy Allocated Threads: " + str(allocatedThreads))

    # Get the NM configuration
    configuration = persist.restore_object("nodeman.cfg")

    # Check if there is a threshold configuration,
    # otherwise add the default configuration
    if NOOP_CONFIG_KEY in configuration:
        threshold = configuration[NOOP_CONFIG_KEY]
    else:
        threshold = DEFAULT_NOOP_THRESHOLD
        configuration[NOOP_CONFIG_KEY] = threshold
        persist.commit_object(configuration, "nodeman.cfg")

    # Check if we are below the threshold, if so
    # then just return, this is a noop
    if allocatedThreads < systemThreads * threshold:
        return

    # We are continuing, so we are above the threshold!
    # First, update the restrictions
    update_restrictions()

    # Then, stop the vessels
    # Get all the vessels
    vessels = nmAPI.vesseldict.keys()

    # Create the stop tuple, exit code 57 with an error message
    stoptuple = (57,
                 "Fatal system-wide threading error! Stopping all vessels.")

    # Stop each vessel
    for vessel in vessels:
        try:
            # Stop each vessel, using our stoptuple
            nmAPI.stopvessel(vessel, stoptuple)
        except Exception, exp:
            # Forge on, regardless of errors
            servicelogger.log(
                "[ERROR]:Failed to reset vessel (Handling ThreadErr). Exception: "
                + str(exp))
            servicelogger.log_last_exception()
                                + startup_script_path + " cannot be removed.")
            return False

    # Remove the symlink.
    try:
        os.remove(symlink_path)
    # Cannot remove the symlink due to some reason.
    except OSError, e:
        # The symlink does not exist - that is fine.
        if e.errno == errno.ENOENT:
            pass
        else:
            # The symlink cannot be removed.
            _output("The symlink cannot be removed. Make sure you have the " \
                      + "permission to do so.")
            servicelogger.log("Seattle cannot be uninstalled because " \
                                + symlink_path + " cannot be removed.")
            return False

    # Stop all instances of seattle from running.
    stop_all_seattle_processes.main()

    return True


def uninstall_Linux_and_Mac():
    """
  <Purpose>
    Remove the seattle entry from the crontab, and kill all seattle processes
    by using stop_all_seattle_processes.py
  <Arguments>
    None.
Пример #40
0
          # MOSHE: Is this thread safe!?          
          # Now that waitforconn has been called, unset the accepter reset flag
          node_reset_config['reset_accepter'] = False
        except Exception, e:
          # print bind_ip, port, e
          servicelogger.log("[ERROR] setting up nodemanager serversocket " + 
              "on address " + bind_ip + ":" + str(possibleport) + ": " + 
              repr(e))
          servicelogger.log_last_exception()
        else:
          break

      else:
        # We exhausted the list of possibleport's to no avail. 
        # Pause to avoid busy-waiting for the problem to go away.
        servicelogger.log("[ERROR]: Could not create serversocket. Sleeping for 30 seconds.")
        time.sleep(30)

    # check infrequently
    time.sleep(configuration['pollfrequency'])
  






# has the thread started?
def is_worker_thread_started():
  for thread in threading.enumerate():
    if 'WorkerThread' in str(thread):
Пример #41
0

  # If AFFIX is enabled, then we use AFFIX to open up a tcpserversocket.
  if affix_enabled:
    # Assign the nodemanager name to be the nodekey. We replace any whitespace in the
    # name and append zenodotus tag at the end.
    
    # Announce my (new) Zenodotus name
    zenodotus_advertise_handle = advertisepipe.add_to_pipe(my_zeno_name, emulcomm.getmyip())
    
    # Now that we have found the Affix string and have created the AffixStackInterface
    # object, we will try to open up a listening tcp socket. If we get an error, we
    # fall back to using legacy Repy API.
    try:
      serversocket = affix_object.listenforconnection(my_zeno_name, localport)
      servicelogger.log("[INFO]Started accepter thread with Affix string: " + affix_legacy_string)
    except (AddressBindingError, AlreadyListeningError, DuplicateTupleError):
      servicelogger.log("Failed to open listening socket with Affix on port: " + 
        str(localport) + ". Found error: " + str(e) + ". Trying legacy connection.")
      return old_timeout_listenforconnection(localip, localport, 10)
    else:
      # If we did not receive any error, we need to overwrite getmyip() with 
      # the new address.
      global getmyip
      getmyip = affix_object.getmyip
  else:
    # If Affix is not enaled, we do a normal timeout_listenforconnection
    # and return the socket.
    return old_timeout_listenforconnection(localip, localport, 10)

Пример #42
0
    def run(self):
        # Put everything in a try except block so that if badness happens, we can
        # log it before dying.
        try:
            while True:
                # remove stale items from the advertise dict.   This is important because
                # we're using membership in the dict to indicate a need to advertise
                clean_advertise_dict()

                # this list contains the keys we will advertise
                advertisekeylist = []

                # JAC: advertise under the node's key
                if rsa_publickey_to_string(
                        self.nodekey
                ) not in lastadvertisedict and self.nodekey not in advertisekeylist:
                    advertisekeylist.append(self.nodekey)

                # make a copy so there isn't an issue with a race
                for vesselname in self.addict.keys()[:]:

                    try:
                        thisentry = self.addict[vesselname].copy()
                    except KeyError:
                        # the entry must have been removed in the meantime.   Skip it!
                        continue

                    # if I advertise the vessel...
                    if thisentry['advertise']:
                        # add the owner key if not there already...
                        if rsa_publickey_to_string(
                                thisentry['ownerkey']
                        ) not in lastadvertisedict and thisentry[
                                'ownerkey'] not in advertisekeylist:
                            advertisekeylist.append(thisentry['ownerkey'])

                        # and all user keys if not there already
                        for userkey in thisentry['userkeys']:
                            if rsa_publickey_to_string(
                                    userkey
                            ) not in lastadvertisedict and userkey not in advertisekeylist:
                                advertisekeylist.append(userkey)

                # there should be no dups.
                assert (advertisekeylist == listops_uniq(advertisekeylist))

                # now that I know who to announce to, send messages to annouce my IP and
                # port to all keys I support
                for advertisekey in advertisekeylist:
                    try:
                        advertise_announce(advertisekey, str(myname), adTTL)
                        # mark when we advertise
                        lastadvertisedict[rsa_publickey_to_string(
                            advertisekey)] = getruntime()

                        # If the announce succeeded, and node was offline, log info message
                        # and switch it back to online mode.
                        if self.is_offline:
                            info_msg = 'Node is back online.'
                            if self.error_count:
                                info_msg += ' (Encountered ' + str(self.error_count) + \
                                              ' advertise errors)'
                            servicelogger.log('[INFO]: ' + info_msg)
                            self.error_count = 0
                            self.is_offline = False

                    except AdvertiseError, e:
                        # If all announce requests failed, assume node has
                        # gone offline,
                        if str(
                                e
                        ) == "None of the advertise services could be contacted":
                            self.is_offline = True
                            # Log an error message after every 'N' failures
                            if (self.error_count % error_skip_count == 0):
                                servicelogger.log(
                                    'AdvertiseError occured, continuing: ' +
                                    str(e))
                            self.error_count += 1
                        # Log all other types of errors
                        else:
                            servicelogger.log(
                                'AdvertiseError occured, continuing: ' +
                                str(e))
                    except Exception, e:
                        servicelogger.log_last_exception()
                        # an unexpected exception occured, exit and restart
                        return
Пример #43
0
def start_accepter():
    global accepter_thread

    # do this until we get the accepter started...
    while True:

        if not node_reset_config['reset_accepter'] and is_accepter_started():
            # we're done, return the name!
            return myname_port

        else:
            # If we came here because a reset was initiated, kill the old
            # accepter thread server socket before starting a new one.
            try:
                accepter_thread.close_serversocket()
                servicelogger.log(
                    "Closed previous accepter thread server socket.")
            except:
                # There was no accepter_thread, or it couldn't .close_serversocket().
                # No problem -- this means nothing will be in the way of the new
                # serversocket.
                pass

            # Use getmyip() to find the IP address the nodemanager should
            # listen on for incoming connections. This will work correctly
            # if IP/interface preferences have been set.
            # We only want to call getmyip() once rather than in the loop
            # since this potentially avoids rebuilding the allowed IP
            # cache for each possible port
            bind_ip = getmyip()

            # Attempt to have the nodemanager listen on an available port.
            # Once it is able to listen, create a new thread and pass it the socket.
            # That new thread will be responsible for handling all of the incoming connections.
            for possibleport in configuration['ports']:
                try:
                    # Use a Repy socket for listening. This lets us override
                    # the listenforconnection function with a version using an
                    # Affix stack easily; furthermore, we can transparently use
                    # the Repy sockettimeout library to protect against malicious
                    # clients that feed us endless data (or no data) to tie up
                    # the connection.
                    try:
                        serversocket = timeout_listenforconnection(
                            bind_ip, possibleport, 10)
                    except (AlreadyListeningError, DuplicateTupleError), e:
                        # These are rather dull errors that will result in us
                        # trying a different port. Don't print a stack trace.
                        servicelogger.log(
                            "[ERROR]: listenforconnection for address " +
                            bind_ip + ":" + str(possibleport) +
                            " failed with error '" + repr(e) + "'. Retrying.")
                        continue

                    # Assign the nodemanager name.
                    # We re-retrieve our address using getmyip as we may now be using
                    # a zenodotus name instead.
                    myname_port = str(getmyip()) + ":" + str(possibleport)

                    # If there is no error, we were able to successfully start listening.
                    # Create the thread, and start it up!
                    accepter = nmconnectionmanager.AccepterThread(serversocket)
                    accepter.start()

                    # Now that we created an accepter, let's use it!
                    set_accepter(accepter)

                    # MOSHE: Is this thread safe!?
                    # Now that waitforconn has been called, unset the accepter reset flag
                    node_reset_config['reset_accepter'] = False
                except Exception, e:
                    # print bind_ip, port, e
                    servicelogger.log(
                        "[ERROR] setting up nodemanager serversocket " +
                        "on address " + bind_ip + ":" + str(possibleport) +
                        ": " + repr(e))
                    servicelogger.log_last_exception()
                else:
                    break
Пример #44
0
        # It wasn't emulcomm.getmyip()'s exception. re-raise.
        raise
    else:
      # We succeeded in getting our external IP. Leave the loop.
      break
    time.sleep(0.1)

  vesseldict = nmrequesthandler.initialize(myip, configuration['publickey'], version)

  # Start accepter...
  myname = start_accepter()
  # Initialize the global node name inside node reset configuration dict
  node_reset_config['name'] = myname
  
  #send our advertised name to the log
  servicelogger.log('myname = '+str(myname))

  # Start worker thread...
  start_worker_thread(configuration['pollfrequency'])

  # Start advert thread...
  start_advert_thread(vesseldict, myname, configuration['publickey'])

  # Start status thread...
  start_status_thread(vesseldict,configuration['pollfrequency'])


  # we should be all set up now.   

  servicelogger.log("[INFO]:Started")
Пример #45
0
      return

    # Other exceptions only should happen on an internal error and should be
    # captured by servicelogger.log
    except Exception,e:
      servicelogger.log_last_exception()
      session.session_sendmessage(socketobj,"Internal Error\nError")
      return
 
    # send the output of the command...
    session.session_sendmessage(socketobj,retstring)

  except Exception, e:
    #JAC: Fix for the exception logging observed in #992
    if 'Socket closed' in str(e) or 'timed out!' in str(e):
      servicelogger.log('Connection abruptly closed in send')
      return
    else:
      raise
  
  finally:
    # Prevent leaks
    try:
      socketobj.close()
    except Exception, e:
      servicelogger.log_last_exception()
   
      
  

                          + startup_script_path + " cannot be removed.")
      return False

  # Remove the symlink.
  try:
    os.remove(symlink_path)
  # Cannot remove the symlink due to some reason.
  except OSError, e:
    # The symlink does not exist - that is fine.
    if e.errno == errno.ENOENT:
      pass
    else:
      # The symlink cannot be removed.
      _output("The symlink cannot be removed. Make sure you have the " \
                + "permission to do so.")
      servicelogger.log("Seattle cannot be uninstalled because " \
                          + symlink_path + " cannot be removed.")
      return False

  # Stop all instances of seattle from running.
  stop_all_seattle_processes.main()

  return True


def uninstall_Linux_and_Mac():
  """
  <Purpose>
    Remove the seattle entry from the crontab, and kill all seattle processes
    by using stop_all_seattle_processes.py
  <Arguments>
    None.
Пример #47
0
def main():
  global configuration

  if not FOREGROUND:
    # Background ourselves.
    daemon.daemonize()


  # Check if we are running in testmode.
  if TEST_NM:
    nodemanager_pid = os.getpid()
    servicelogger.log("[INFO]: Running nodemanager in test mode on port 1224, "+
                      "pid %s." % str(nodemanager_pid))
    nodeman_pid_file = open(os.path.join(os.getcwd(), 'nodemanager.pid'), 'w')
    
    # Write out the pid of the nodemanager process that we started to a file.
    # This is only done if the nodemanager was started in test mode.
    try:
      nodeman_pid_file.write(str(nodemanager_pid))
    finally:
      nodeman_pid_file.close()

  else:
    # ensure that only one instance is running at a time...
    gotlock = runonce.getprocesslock("seattlenodemanager")

    if gotlock == True:
      # I got the lock.   All is well...
      pass
    else:
      if gotlock:
        servicelogger.log("[ERROR]:Another node manager process (pid: " + str(gotlock) + 
                        ") is running")
      else:
        servicelogger.log("[ERROR]:Another node manager process is running")
      return


  servicelogger.log('[INFO]: This is Seattle release "' + version + "'") 

  # Feature add for #1031: Log information about the system in the nm log...
  servicelogger.log('[INFO]:platform.python_version(): "' + 
    str(platform.python_version())+'"')
  servicelogger.log('[INFO]:platform.platform(): "' + 
    str(platform.platform())+'"')

  # uname on Android only yields 'Linux', let's be more specific.
  try:
    import android
    servicelogger.log('[INFO]:platform.uname(): Android / "' + 
      str(platform.uname())+'"')
  except ImportError:
    servicelogger.log('[INFO]:platform.uname(): "'+str(platform.uname())+'"')

  # I'll grab the necessary information first...
  servicelogger.log("[INFO]:Loading config")
  # BUG: Do this better?   Is this the right way to engineer this?
  configuration = persist.restore_object("nodeman.cfg")

  # If Seattle is not installed, the nodemanager will have no vesseldict
  # and an incomplete config. Log this problem and exit.
  try:
    if configuration["seattle_installed"] is not True:
      servicelogger.log("[ERROR]:Seattle is not installed. Run the Seattle installer to create the required configuration files before starting the nodemanager. Exiting.")
      harshexit.harshexit(10)
  except KeyError:
    # There isn't even a "seattle_installed" entry in this dict!?
    servicelogger.log("[ERROR]:The nodemanager configuration, nodeman.cfg, is corrupt. Exiting.")
    harshexit.harshexit(11)
  
  
  # Armon: initialize the network restrictions
  initialize_ip_interface_restrictions(configuration)
  
  
  # Enable Affix and overload various Repy network API calls 
  # with Affix-enabled calls.
  # Use the node's publickey to generate a name for our node.
  mypubkey = rsa_publickey_to_string(configuration['publickey']).replace(" ", "")
  affix_stack_name = sha_hexhash(mypubkey)

  enable_affix('(CoordinationAffix)(MakeMeHearAffix)(NamingAndResolverAffix,' + 
      affix_stack_name + ')')

  # get the external IP address...
  myip = None
  while True:
    try:
      # Try to find our external IP.
      myip = emulcomm.getmyip()
    except Exception, e: # Replace with InternetConnectivityError ?
      # If we aren't connected to the internet, emulcomm.getmyip() raises this:
      if len(e.args) >= 1 and e.args[0] == "Cannot detect a connection to the Internet.":
        # So we try again.
        pass
      else:
        # It wasn't emulcomm.getmyip()'s exception. re-raise.
        raise
    else:
      # We succeeded in getting our external IP. Leave the loop.
      break
    time.sleep(0.1)
Пример #48
0
def handle_threading_error():
    """
  <Purpose>
    Handles a repy node failing with ThreadErr. If repy is allowed to use
    more than 10% of the current threads, reduce the global thread count by 50%
    and stop all existing vessels

  <Arguments>
    None
  
  <Exceptions>
    None

  <Side Effects>
    May re-write all resource files and stop all vessels

  <Returns>
    None
  """
    # Make a log of this
    servicelogger.log(
        "[ERROR]:A Repy vessel has exited with ThreadErr status. Checking to determine next step"
    )

    # Get all the names of the vessels
    vesselnamelist = nmAPI.vesseldict.keys()

    # read in all of the resource files so that we can look at and possibly
    # manipulate them.
    resourcedicts = {}
    for vesselname in vesselnamelist:
        resourcedicts[
            vesselname] = resourcemanipulation.read_resourcedict_from_file(
                'resource.' + vesselname)

    # Get the number of threads Repy has allocated
    allowedthreadcount = 0
    for vesselname in vesselnamelist:
        allowedthreadcount = allowedthreadcount + resourcedicts[vesselname][
            'events']

    # Get the total number os system threads currently used
    totalusedthreads = nonportable.os_api.get_system_thread_count()

    # Log this information
    servicelogger.log("[WARNING]:System Threads: " + str(totalusedthreads) +
                      "  Repy Allocated Threads: " + str(allowedthreadcount))

    # Get the NM configuration
    configuration = persist.restore_object("nodeman.cfg")

    # Check if there is a threshold configuration,
    # otherwise add the default configuration
    if NOOP_CONFIG_KEY in configuration:
        threshold = configuration[NOOP_CONFIG_KEY]
    else:
        threshold = DEFAULT_NOOP_THRESHOLD
        configuration[NOOP_CONFIG_KEY] = threshold
        persist.commit_object(configuration, "nodeman.cfg")

    # Check if we are below the threshold, if so
    # then just return, this is a noop
    if allowedthreadcount < totalusedthreads * threshold:
        return

    servicelogger.log("[ERROR]:Reducing number of system threads!")

    #### We are above the threshold!   Let's cut everything by 1/2

    # First, update the resource files
    for vesselname in vesselnamelist:
        # cut the events by 1/2
        resourcedicts[vesselname][
            'events'] = resourcedicts[vesselname]['events'] / 2
        # write out the new resource files...
        resourcemanipulation.write_resourcedict_to_file(
            resourcedicts[vesselname], 'resource.' + vesselname)

    # Create the stop tuple, exit code 57 with an error message
    stoptuple = (57,
                 "Fatal system-wide threading error! Stopping all vessels.")

    # Stop each vessel
    for vesselname in vesselnamelist:
        try:
            # Stop each vessel, using our stoptuple
            nmAPI.stopvessel(vesselname, stoptuple)
        except Exception, exp:
            # Forge on, regardless of errors
            servicelogger.log(
                "[ERROR]:Failed to reset vessel (Handling ThreadErr). Exception: "
                + str(exp))
            servicelogger.log_last_exception()
def uninstall_Windows():
  """
  <Purpose>
    Removes seattle from the Winodws registry startup key and/or the
    startup folder should either exist, then stops all seattle processes using
    stop_all_seattle_process.py
  <Arguments>
    None.
  <Exceptions>
    Possible IOError could be caused by filepath manipulation from a
      sub-function.
    SeattleNotInstalledError if seattle was not installed prior to uninstall.
  <Side Effects>
    Removes seattle from the Windows registry key and/or the Windows startup
    folder if it exists in either place.
    Stops seattle from running.
  <Returns>
    True if the uninstall succeeded.  Currently, if uninstall fails, it must be
    because seattle was not installed prior to uninstall.  We must return a
    boolean value for the parent function.
  """
  # First see if seattle appears as a value in the Windows startup registry key,
  # and remove it if it exists.
  # removed_from_registry is used later and thus must have a value in case the
  # try: block below raises an exception.
  removed_from_registry = False
  try:
    removed_from_registry = remove_seattle_from_win_startup_registry()
  except WindowsError:
    print "The uninstaller does not have access to the Windows registry " \
        + "startup keys. This means that seattle is likely not installed in " \
        + "your Windows registry startup key, though you may want to " \
        + "manually check the following registry keys and remove seattle " \
        + "from those keys should it exist there: "
    print "HKEY_LOCAL_MACHINE\Software\Microsoft\Windows\CurrentVersion\Run"
    print "HKEY_CURRENT_USER\Software\Microsoft\Windows\CurrentVersion\Run"
    # Distinguish the above-printed text from what will be printed later by
    # by printing a blank line.
    print
    servicelogger.log(" uninstaller could not access the Windows registry " \
                        + "during this attempted uninstall.")



  # Next, see if there is a link to the seattle starter script in the startup
  # folder and remove it if it is there.
  if not WIN_STARTUP_SCRIPT_PATH == None:
    removed_from_startup_folder = \
        remove_seattle_from_win_startup_folder()



  # Check to see if uninstall actually removed seattle from the computer.
  if not removed_from_registry and not removed_from_startup_folder:
    raise SeattleNotInstalledError("Seattle could not be detected as " \
                                     + "having been installed prior to " \
                                     + "uninstall.")
  elif removed_from_registry or removed_from_startup_folder:
    # Stop all instances of seattle from running before returning.
    stop_all_seattle_processes.main()
    return True
Пример #50
0
    else:
      # Just use getmyip(), this is the default behavior and will work if we have preferences set
      # We only want to call getmyip() once, rather than in the loop since this potentially avoids
      # rebuilding the allowed IP cache for each possible port
      bind_ip = emulcomm.getmyip()
        
      for possibleport in configuration['ports']:
        try:
          
          if use_nat:
            # use the sha hash of the nodes public key with the vessel
            # number as an id for this node
            unique_id = rsa_publickey_to_string(configuration['publickey'])
            unique_id = sha_hexhash(unique_id)
            unique_id = unique_id+str(configuration['service_vessel'])
            servicelogger.log("[INFO]: Trying NAT wait")
            nat_waitforconn(unique_id, possibleport,
                    nmconnectionmanager.connection_handler)

          # do a local waitforconn (not using a forowarder)
          # this makes the node manager easily accessible locally
          waitforconn(bind_ip, possibleport, 
                    nmconnectionmanager.connection_handler)
        
        except Exception, e:
          servicelogger.log("[ERROR]: when calling waitforconn for the connection_handler: " + str(e))
          servicelogger.log_last_exception()
        else:
          # the waitforconn was completed so the acceptor is started
          acceptor_state['lock'].acquire()
          acceptor_state['started']= True
Пример #51
0
              if (self.error_count % error_skip_count == 0):
                servicelogger.log('AdvertiseError occured, continuing: '+str(e))
              self.error_count += 1
            # Log all other types of errors
            else:
              servicelogger.log('AdvertiseError occured, continuing: '+str(e))
          except Exception, e:
            servicelogger.log_last_exception()
            # an unexpected exception occured, exit and restart
            return
           

        # wait to avoid sending too frequently
        time.sleep(adsleepfrequency)
    except Exception, e:
      exceptionstring = "[ERROR]:"
      (etype, value, tb) = sys.exc_info()
    
      for line in traceback.format_tb(tb):
        exceptionstring = exceptionstring + line
  
      # log the exception that occurred.
      exceptionstring = exceptionstring + str(etype)+" "+str(value)+"\n"

      servicelogger.log(exceptionstring)
      raise e




Пример #52
0
def handle_threading_error():
  """
  <Purpose>
    Handles a repy node failing with ThreadErr. If repy is allowed to use
    more than 10% of the current threads, reduce the global thread count by 50%
    and stop all existing vessels

  <Arguments>
    None
  
  <Exceptions>
    None

  <Side Effects>
    May re-write all resource files and stop all vessels

  <Returns>
    None
  """
  # Make a log of this
  servicelogger.log("[ERROR]:A Repy vessel has exited with ThreadErr status. Checking to determine next step")

  # Get all the names of the vessels
  vesselnamelist = nmAPI.vesseldict.keys()
  
  # read in all of the resource files so that we can look at and possibly 
  # manipulate them.
  resourcedicts = {}
  for vesselname in vesselnamelist:
    resourcedicts[vesselname] = resourcemanipulation.read_resourcedict_from_file('resource.'+vesselname)
  
  # Get the number of threads Repy has allocated
  allowedthreadcount = 0
  for vesselname in vesselnamelist:
    allowedthreadcount = allowedthreadcount + resourcedicts[vesselname]['events']
  
  # Get the total number os system threads currently used 
  totalusedthreads = nonportable.os_api.get_system_thread_count()
  
  # Log this information
  servicelogger.log("[WARNING]:System Threads: "+str(totalusedthreads)+"  Repy Allocated Threads: "+str(allowedthreadcount))
  
  # Get the NM configuration
  configuration = persist.restore_object("nodeman.cfg")
  
  # Check if there is a threshold configuration,
  # otherwise add the default configuration
  if NOOP_CONFIG_KEY in configuration:
    threshold = configuration[NOOP_CONFIG_KEY]
  else:
    threshold = DEFAULT_NOOP_THRESHOLD
    configuration[NOOP_CONFIG_KEY] = threshold
    persist.commit_object(configuration, "nodeman.cfg")
  
  # Check if we are below the threshold, if so
  # then just return, this is a noop
  if allowedthreadcount < totalusedthreads * threshold:
    return
  
  servicelogger.log("[ERROR]:Reducing number of system threads!")



  #### We are above the threshold!   Let's cut everything by 1/2

  # First, update the resource files
  for vesselname in vesselnamelist:
    # cut the events by 1/2
    resourcedicts[vesselname]['events'] = resourcedicts[vesselname]['events'] / 2
    # write out the new resource files...
    resourcemanipulation.write_resourcedict_to_file(resourcedicts[vesselname], 'resource.'+vesselname)
  

  
  
  # Create the stop tuple, exit code 57 with an error message
  stoptuple = (57, "Fatal system-wide threading error! Stopping all vessels.")
  
  # Stop each vessel
  for vesselname in vesselnamelist:
    try:
      # Stop each vessel, using our stoptuple
      nmAPI.stopvessel(vesselname,stoptuple)
    except Exception, exp:
      # Forge on, regardless of errors
      servicelogger.log("[ERROR]:Failed to reset vessel (Handling ThreadErr). Exception: "+str(exp))
      servicelogger.log_last_exception()
Пример #53
0
            # assign the nodemanager name
            myname_port = str(bind_ip) + ":" + str(possibleport)
          # If there is no error, we were able to successfully start listening.
          # Create the thread, and start it up!
          accepter = nmconnectionmanager.AccepterThread(serversocket)
          accepter.start()
          
          # Now that we created an accepter, let's use it!          
          set_accepter(accepter)

          # MOSHE: Is this thread safe!?          
          # Now that waitforconn has been called, unset the accepter reset flag
          node_reset_config['reset_accepter'] = False
        except Exception, e:
          # print bind_ip, port, e
          servicelogger.log("[ERROR]: when calling listenforconnection for the connection_handler: " + str(e))
          servicelogger.log_last_exception()
        else:
          break

      else:
        servicelogger.log("[ERROR]: cannot find a port for recvmess")

    # check infrequently
    time.sleep(configuration['pollfrequency'])
  




Пример #54
0
                        myname_port = str(bind_ip) + ":" + str(possibleport)
                    # If there is no error, we were able to successfully start listening.
                    # Create the thread, and start it up!
                    accepter = nmconnectionmanager.AccepterThread(serversocket)
                    accepter.start()

                    # Now that we created an accepter, let's use it!
                    set_accepter(accepter)

                    # MOSHE: Is this thread safe!?
                    # Now that waitforconn has been called, unset the accepter reset flag
                    node_reset_config['reset_accepter'] = False
                except Exception, e:
                    # print bind_ip, port, e
                    servicelogger.log(
                        "[ERROR]: when calling listenforconnection for the connection_handler: "
                        + str(e))
                    servicelogger.log_last_exception()
                else:
                    break

            else:
                servicelogger.log("[ERROR]: cannot find a port for recvmess")

        # check infrequently
        time.sleep(configuration['pollfrequency'])


# has the thread started?
def is_worker_thread_started():
    for thread in threading.enumerate():
Пример #55
0
def main():
  global configuration

  if not FOREGROUND:
    # Background ourselves.
    daemon.daemonize()


  # Check if we are running in testmode.
  if TEST_NM:
    nodemanager_pid = os.getpid()
    servicelogger.log("[INFO]: Running nodemanager in test mode on port <nodemanager_port>, "+
                      "pid %s." % str(nodemanager_pid))
    nodeman_pid_file = open(os.path.join(os.getcwd(), 'nodemanager.pid'), 'w')
    
    # Write out the pid of the nodemanager process that we started to a file.
    # This is only done if the nodemanager was started in test mode.
    try:
      nodeman_pid_file.write(str(nodemanager_pid))
    finally:
      nodeman_pid_file.close()

  else:
    # ensure that only one instance is running at a time...
    gotlock = runonce.getprocesslock("seattlenodemanager")

    if gotlock == True:
      # I got the lock.   All is well...
      pass
    else:
      if gotlock:
        servicelogger.log("[ERROR]:Another node manager process (pid: " + str(gotlock) + 
                        ") is running")
      else:
        servicelogger.log("[ERROR]:Another node manager process is running")
      return



  # Feature add for #1031: Log information about the system in the nm log...
  servicelogger.log('[INFO]:platform.python_version(): "' + 
    str(platform.python_version())+'"')
  servicelogger.log('[INFO]:platform.platform(): "' + 
    str(platform.platform())+'"')

  # uname on Android only yields 'Linux', let's be more specific.
  try:
    import android
    servicelogger.log('[INFO]:platform.uname(): Android / "' + 
      str(platform.uname())+'"')
  except ImportError:
    servicelogger.log('[INFO]:platform.uname(): "'+str(platform.uname())+'"')

  # I'll grab the necessary information first...
  servicelogger.log("[INFO]:Loading config")
  # BUG: Do this better?   Is this the right way to engineer this?
  configuration = persist.restore_object("nodeman.cfg")
  
  
  # Armon: initialize the network restrictions
  initialize_ip_interface_restrictions(configuration)
  
  
  
  # ZACK BOKA: For Linux and Darwin systems, check to make sure that the new
  #            seattle crontab entry has been installed in the crontab.
  #            Do this here because the "nodeman.cfg" needs to have been read
  #            into configuration via the persist module.
  if nonportable.ostype == 'Linux' or nonportable.ostype == 'Darwin':
    if 'crontab_updated_for_2009_installer' not in configuration or \
          configuration['crontab_updated_for_2009_installer'] == False:
      try:
        # crontab may not exist on Android, therefore let's not check
        # if we are running on Android. See #1302 and #1254.
        try:
          import android
        except ImportError:
          import update_crontab_entry
          modified_crontab_entry = \
              update_crontab_entry.modify_seattle_crontab_entry()
          # If updating the seattle crontab entry succeeded, then update the
          # 'crontab_updated_for_2009_installer' so the nodemanager no longer
          # tries to update the crontab entry when it starts up.
          if modified_crontab_entry:
            configuration['crontab_updated_for_2009_installer'] = True
            persist.commit_object(configuration,"nodeman.cfg")

      except Exception,e:
        exception_traceback_string = traceback.format_exc()
        servicelogger.log("[ERROR]: The following error occured when " \
                            + "modifying the crontab for the new 2009 " \
                            + "seattle crontab entry: " \
                            + exception_traceback_string)
Пример #56
0
                            if (self.error_count % error_skip_count == 0):
                                servicelogger.log(
                                    'AdvertiseError occured, continuing: ' +
                                    str(e))
                            self.error_count += 1
                        # Log all other types of errors
                        else:
                            servicelogger.log(
                                'AdvertiseError occured, continuing: ' +
                                str(e))
                    except Exception, e:
                        servicelogger.log_last_exception()
                        # an unexpected exception occured, exit and restart
                        return

                # wait to avoid sending too frequently
                time.sleep(adsleepfrequency)
        except Exception, e:
            exceptionstring = "[ERROR]:"
            (etype, value, tb) = sys.exc_info()

            for line in traceback.format_tb(tb):
                exceptionstring = exceptionstring + line

            # log the exception that occurred.
            exceptionstring = exceptionstring + str(etype) + " " + str(
                value) + "\n"

            servicelogger.log(exceptionstring)
            raise e