def get_downtime_fd(entry_name, cmdname): try: # New style has config all in the factory file #if entry_name=='factory': config = glideFactoryConfig.GlideinDescript() #else: # config=glideFactoryConfig.JobDescript(entry_name) except IOError: raise RuntimeError("Failed to load config for %s" % entry_name) fd = glideFactoryDowntimeLib.DowntimeFile(config.data['DowntimesFile']) return fd
def get_downtime_fd_dict(entry_or_id, cmdname, opt_dict): out_fds = {} if entry_or_id in ('entries', 'All'): glideinDescript = glideFactoryConfig.GlideinDescript() entries = string.split(glideinDescript.data['Entries'], ',') for entry in entries: out_fds[entry] = get_downtime_fd(entry, cmdname) if (entry_or_id == 'All') and ("entries" not in opt_dict): out_fds['factory'] = get_downtime_fd('factory', cmdname) else: out_fds[entry_or_id] = get_downtime_fd(entry_or_id, cmdname) return out_fds
def parse_args(): if len(sys.argv) < 3: raise ValueError("Not enough arguments!") factory_dir = sys.argv[1] try: glideFactoryConfig.factoryConfig.glidein_descript_file = os.path.join( factory_dir, glideFactoryConfig.factoryConfig.glidein_descript_file) glideinDescript = glideFactoryConfig.GlideinDescript() except: raise ValueError("%s is not a factory!" % factory_dir) glideinDescript.factory_dir = factory_dir glideinDescript.date_arr = gWftArgsHelper.parse_date(sys.argv[2]) if len(sys.argv) >= 4: glideinDescript.time_arr = gWftArgsHelper.parse_time(sys.argv[3]) else: glideinDescript.time_arr = (0, 0, 0) return glideinDescript
def main(parent_pid, sleep_time, advertize_rate, startup_dir, entry_names, group_id): """ GlideinFactoryEntryGroup main function Setup logging, monitoring, and configuration information. Starts the Entry group main loop and handles cleanup at shutdown. @type parent_pid: int @param parent_pid: The pid for the Factory daemon @type sleep_time: int @param sleep_time: The number of seconds to sleep between iterations @type advertize_rate: int @param advertize_rate: The rate at which advertising should occur @type startup_dir: string @param startup_dir: The "home" directory for the entry. @type entry_names: string @param entry_names: The CVS name of the entries this process should work on @type group_id: string @param group_id: Group id """ # Assume name to be group_[0,1,2] etc. Only required to create log_dir # where tasks common to the group will be stored. There is no other # significance to the group_name and number of entries supported by a group # can change between factory reconfigs group_name = "group_%s" % group_id os.chdir(startup_dir) # Setup the lock_dir gfi.factoryConfig.lock_dir = os.path.join(startup_dir, "lock") # Read information about the glidein and frontends glideinDescript = gfc.GlideinDescript() frontendDescript = gfc.FrontendDescript() # set factory_collector at a global level, since we do not expect it to change gfi.factoryConfig.factory_collector = glideinDescript.data[ 'FactoryCollector'] # Load factory keys glideinDescript.load_pub_key() glideinDescript.load_old_rsa_key() # Dictionary of Entry objects this group will process my_entries = {} glidein_entries = glideinDescript.data['Entries'] # Initiate the logs logSupport.log_dir = os.path.join(glideinDescript.data['LogDir'], 'factory') process_logs = eval(glideinDescript.data['ProcessLogs']) init_logs(group_name, logSupport.log_dir, process_logs) logSupport.log.info("Starting up") logSupport.log.info("Entries processed by %s: %s " % (group_name, entry_names)) # Check if all the entries in this group are valid for entry in string.split(entry_names, ':'): if not (entry in string.split(glidein_entries, ',')): msg = "Entry '%s' not configured: %s" % (entry, glidein_entries) logSupport.log.warning(msg) raise RuntimeError(msg) # Create entry objects my_entries[entry] = glideFactoryEntry.Entry(entry, startup_dir, glideinDescript, frontendDescript) # Create lock file for this group and register its parent pid_obj = glideFactoryPidLib.EntryGroupPidSupport(startup_dir, group_name) pid_obj.register(parent_pid) try: try: try: iterate(parent_pid, sleep_time, advertize_rate, glideinDescript, frontendDescript, group_name, my_entries) except KeyboardInterrupt: logSupport.log.info("Received signal...exit") except: logSupport.log.exception("Exception occurred in iterate: ") raise finally: # No need to cleanup. The parent should be doing it logSupport.log.info("Dying") finally: pid_obj.relinquish()
def main(): """ The main module """ # Move to the working directory try: if "GLIDEIN_FACTORY_DIR" in os.environ: os.chdir(os.environ["GLIDEIN_FACTORY_DIR"]) else: os.chdir("/var/lib/gwms-factory/work-dir/") except OSError as ose: logging.error("Cannot chdir to /var/lib/gwms-factory/work-dir/: %s", ose) return 1 # Parse command line options options = parse_opts() entry_name = options.entry_name wms_collector = options.wms_collector # Set some variables needed later on params = {} status_sf = {} nr_glideins = 1 idle_lifetime = 3600 * 24 factory_config = FactoryConfig() glidein_descript = gfc.GlideinDescript() frontend_descript = gfc.FrontendDescript() collector = htcondor.Collector(wms_collector) req_name = get_reqname(collector, options.fe_name, entry_name) logging.debug("Using reques name %s" % req_name) factory_config.submit_dir = '/var/lib/gwms-factory/work-dir' constraint_gc = '(MyType=="glideclient") && (Name=="%s")' % (req_name) ads_gc = collector.query(htcondor.AdTypes.Any, constraint_gc) if not ads_gc: logging.error("Cannot find glideclient classad using constraint %s", constraint_gc) return 1 else: ad_gc = ads_gc[0] log_debug(ad_gc, header='glideclient classad') # Load factory config and get some info that will go in the pilot classad glidein_descript.load_pub_key() sym_key_obj, frontend_sec_name = validate_frontend( ad_gc, frontend_descript, glidein_descript.data['PubKeyObj']) security_class = sym_key_obj.decrypt_hex( ad_gc['GlideinEncParamSecurityClass']) # GlideinSecurityClass proxyid = sym_key_obj.decrypt_hex(ad_gc['GlideinEncParamSubmitProxy']) user_name = frontend_descript.get_username(frontend_sec_name, security_class) # Prepare some values that ends up in the Arguments classad # of the pilot, i.e., the ClientWeb instance client_web_url = ad_gc['WebURL'] # -clientweb client_signtype = ad_gc['WebSignType'] # -signtype client_descript = ad_gc['WebDescriptFile'] # -clientdescript client_sign = ad_gc['WebDescriptSign'] # -clientsign client_group = ad_gc['GroupName'] # -clientgroup client_group_web_url = ad_gc['WebGroupURL'] # -clientwebgroup # -clientdescriptgroup client_group_descript = ad_gc['WebGroupDescriptFile'] client_group_sign = ad_gc['WebGroupDescriptSign'] # -clientsigngroup client_web = ClientWeb(client_web_url, client_signtype, client_descript, client_sign, client_group, client_group_web_url, client_group_descript, client_group_sign) # Create the submit_credentials object credentials = SubmitCredentials(user_name, security_class) credentials.id = proxyid credentials.cred_dir = '/var/lib/gwms-factory/client-proxies/user_%s/glidein_gfactory_instance' % user_name credfname = '%s_%s' % (ad_gc['ClientName'], proxyid) if not credentials.add_security_credential('SubmitProxy', credfname): fname = os.path.join(credentials.cred_dir, 'credential_%s' % credfname) logging.info(( "Problems getting credential file using credentials.add_security_credential." " Check file %s permissions"), fname) # Set the arguments # I was using escapeParam for GLIDECLIENT_ReqNode and GLIDECLIENT_Collector but turned out it's not necessary params['CONDOR_VERSION'] = 'default' params['CONDOR_OS'] = 'default' params['CONDOR_ARCH'] = 'default' params['GLIDECLIENT_ReqNode'] = ad_gc[ 'GlideinParamGLIDECLIENT_ReqNode'] params['GLIDECLIENT_Rank'] = ad_gc.get('GlideinParamGLIDECLIENT_Rank', "1") params['GLIDEIN_Collector'] = ad_gc['GlideinParamGLIDEIN_Collector'] params['USE_MATCH_AUTH'] = ad_gc['GlideinParamUSE_MATCH_AUTH'] params['Report_Failed'] = 'NEVER' # Now that we have everything submit the pilot! logging.getLogger().setLevel(logging.DEBUG) submitGlideins(entry_name, "test.test", int(nr_glideins), idle_lifetime, "test:test", credentials, client_web, params, status_sf, log=logging.getLogger(), factoryConfig=factory_config) return 0
def main(startup_dir): """ Reads in the configuration file and starts up the factory @type startup_dir: String @param startup_dir: Path to glideinsubmit directory """ # Force integrity checks on all condor operations glideFactoryLib.set_condor_integrity_checks() glideFactoryInterface.factoryConfig.lock_dir = os.path.join(startup_dir, "lock") glideFactoryConfig.factoryConfig.glidein_descript_file = \ os.path.join(startup_dir, glideFactoryConfig.factoryConfig.glidein_descript_file) glideinDescript = glideFactoryConfig.GlideinDescript() frontendDescript = glideFactoryConfig.FrontendDescript() # set factory_collector at a global level, since we do not expect it to change glideFactoryInterface.factoryConfig.factory_collector = glideinDescript.data['FactoryCollector'] # Setup the glideFactoryLib.factoryConfig so that we can process the # globals classads glideFactoryLib.factoryConfig.config_whoamI( glideinDescript.data['FactoryName'], glideinDescript.data['GlideinName']) glideFactoryLib.factoryConfig.config_dirs( startup_dir, glideinDescript.data['LogDir'], glideinDescript.data['ClientLogBaseDir'], glideinDescript.data['ClientProxiesBaseDir']) # Set the Log directory logSupport.log_dir = os.path.join(glideinDescript.data['LogDir'], "factory") # Configure factory process logging process_logs = eval(glideinDescript.data['ProcessLogs']) for plog in process_logs: if 'ADMIN' in plog['msg_types'].upper(): logSupport.add_processlog_handler("factoryadmin", logSupport.log_dir, "DEBUG,INFO,WARN,ERR", plog['extension'], int(float(plog['max_days'])), int(float(plog['min_days'])), int(float(plog['max_mbytes'])), int(float(plog['backup_count'])), plog['compression']) else: logSupport.add_processlog_handler("factory", logSupport.log_dir, plog['msg_types'], plog['extension'], int(float(plog['max_days'])), int(float(plog['min_days'])), int(float(plog['max_mbytes'])), int(float(plog['backup_count'])), plog['compression']) logSupport.log = logging.getLogger("factory") logSupport.log.info("Logging initialized") if (glideinDescript.data['Entries'].strip() in ('', ',')): # No entries are enabled. There is nothing to do. Just exit here. log_msg = "No Entries are enabled. Exiting." logSupport.log.error(log_msg) sys.exit(1) write_descript(glideinDescript, frontendDescript, os.path.join(startup_dir, 'monitor/')) try: os.chdir(startup_dir) except: logSupport.log.exception("Failed starting Factory. Unable to change to startup_dir: ") raise try: if (is_file_old(glideinDescript.default_rsakey_fname, int(glideinDescript.data['OldPubKeyGraceTime']))): # First backup and load any existing key logSupport.log.info("Backing up and loading old key") glideinDescript.backup_and_load_old_key() # Create a new key for this run logSupport.log.info("Recreating and loading new key") glideinDescript.load_pub_key(recreate=True) else: # Key is recent enough. Just reuse it. logSupport.log.info("Key is recent enough, reusing for this run") glideinDescript.load_pub_key(recreate=False) logSupport.log.info("Loading old key") glideinDescript.load_old_rsa_key() except RSAError as e: logSupport.log.exception("Failed starting Factory. Exception occurred loading factory keys: ") key_fname = getattr(e, 'key_fname', None) cwd = getattr(e, 'cwd', None) if key_fname and cwd: logSupport.log.error("Failed to load RSA key %s with current working direcotry %s", key_fname, cwd) logSupport.log.error("If you think the rsa key might be corrupted, try to remove it, and then reconfigure the factory to recreate it") raise except IOError as ioe: logSupport.log.exception("Failed starting Factory. Exception occurred loading factory keys: ") if ioe.filename == 'rsa.key' and ioe.errno == 2: logSupport.log.error("Missing rsa.key file. Please, reconfigure the factory to recreate it") raise except: logSupport.log.exception("Failed starting Factory. Exception occurred loading factory keys: ") raise glideFactoryMonitorAggregator.glideFactoryMonitoring.monitoringConfig.my_name = "%s@%s" % (glideinDescript.data['GlideinName'], glideinDescript.data['FactoryName']) glideFactoryInterface.factoryConfig.advertise_use_tcp = (glideinDescript.data['AdvertiseWithTCP'] in ('True', '1')) glideFactoryInterface.factoryConfig.advertise_use_multi = (glideinDescript.data['AdvertiseWithMultiple'] in ('True', '1')) sleep_time = int(glideinDescript.data['LoopDelay']) advertize_rate = int(glideinDescript.data['AdvertiseDelay']) restart_attempts = int(glideinDescript.data['RestartAttempts']) restart_interval = int(glideinDescript.data['RestartInterval']) try: glideinwms_dir = os.path.dirname(os.path.dirname(sys.argv[0])) glideFactoryInterface.factoryConfig.glideinwms_version = glideinWMSVersion.GlideinWMSDistro(glideinwms_dir, 'checksum.factory').version() except: logSupport.log.exception("Non critical Factory error. Exception occurred while trying to retrieve the glideinwms version: ") entries = sorted(glideinDescript.data['Entries'].split(',')) glideFactoryMonitorAggregator.monitorAggregatorConfig.config_factory( os.path.join(startup_dir, "monitor"), entries, log=logSupport.log ) # create lock file pid_obj = glideFactoryPidLib.FactoryPidSupport(startup_dir) increase_process_limit() # start try: pid_obj.register() except glideFactoryPidLib.pidSupport.AlreadyRunning as err: pid_obj.load_registered() logSupport.log.exception("Failed starting Factory. Instance with pid %s is aready running. Exception during pid registration: %s" % (pid_obj.mypid, err)) raise try: try: spawn(sleep_time, advertize_rate, startup_dir, glideinDescript, frontendDescript, entries, restart_attempts, restart_interval) except KeyboardInterrupt as e: raise e except HUPException as e: # inside spawn(), outermost try will catch HUPException, # then the code within the finally will run # which will terminate glideFactoryEntryGroup children processes # and then the following 3 lines will be executed. logSupport.log.info("Received SIGHUP, reload config uid = %d" % os.getuid()) # must empty the lock file so that when the thread returns from reconfig_glidein and # begins from the beginning, it will not error out which will happen # if the lock file is not empty pid_obj.relinquish() os.execv(os.path.join(FACTORY_DIR, "../creation/reconfig_glidein"), ['reconfig_glidein', '-update_scripts', 'no', '-sighupreload', '-xml', '/etc/gwms-factory/glideinWMS.xml']) except: logSupport.log.exception("Exception occurred spawning the factory: ") finally: pid_obj.relinquish()
def infosys_based(entry_name, opt_dict, infosys_types): # find out which entries I need to look at # gather downtime fds for them config_els = {} if entry_name == 'factory': return 0 # nothing to do... the whole factory cannot be controlled by infosys elif entry_name in ('entries', 'all'): # all==entries in this case, since there is nothing to do for the factory glideinDescript = glideFactoryConfig.GlideinDescript() entries = string.split(glideinDescript.data['Entries'], ',') for entry in entries: config_els[entry] = {} else: config_els[entry_name] = {} # load the infosys info for entry in config_els.keys(): infosys_fd = cgWDictFile.InfoSysDictFile( cgWConsts.get_entry_submit_dir('.', entry), cgWConsts.INFOSYS_FILE) infosys_fd.load() if len(infosys_fd.keys) == 0: # entry not associated with any infosys, cannot be managed, ignore del config_els[entry] continue compatible_infosys = False for k in infosys_fd.keys: infosys_type = infosys_fd[k][0] if infosys_type in infosys_types: compatible_infosys = True break if not compatible_infosys: # entry not associated with a compatible infosys, cannot be managed, ignore del config_els[entry] continue config_els[entry]['infosys_fd'] = infosys_fd if len(config_els.keys()) == 0: return 0 # nothing to do # all the remaining entries are handled by one of the supported infosys # summarize infosys_data = {} for entry in config_els.keys(): infosys_fd = config_els[entry]['infosys_fd'] for k in infosys_fd.keys: infosys_type = infosys_fd[k][0] server = infosys_fd[k][1] ref = infosys_fd[k][2] if infosys_type not in infosys_data: infosys_data[infosys_type] = {} infosys_data_type = infosys_data[infosys_type] if server not in infosys_data_type: infosys_data_type[server] = [] infosys_data_type[server].append({'ref': ref, 'entry_name': entry}) # get production entries production_entries = [] for infosys_type in infosys_data.keys(): if infosys_type in infosys_types: infosys_data_type = infosys_data[infosys_type] for server in infosys_data_type.keys(): infosys_data_server = infosys_data_type[server] if infosys_type == "RESS": production_entries += get_production_ress_entries( server, infosys_data_server) elif infosys_type == "BDII": production_entries += get_production_bdii_entries( server, infosys_data_server) else: raise RuntimeError("Unknown infosys type '%s'" % infosys_type) # should never get here # Use the info to put the entry_keys = sorted(config_els.keys()) for entry in entry_keys: if entry in production_entries: print("%s up" % entry) up(entry, ['up']) else: print("%s down" % entry) down(entry, ['down']) return 0
def get_entries(factory_dir): glideinDescript = glideFactoryConfig.GlideinDescript() #glideinDescript=glideFactoryConfig.ConfigFile(factory_dir+"/glidein.descript",lambda s:s) return string.split(glideinDescript.data['Entries'], ',')