def _handle_aes(self, load): ''' Takes the aes encrypted load, decrypts is and runs the encapsulated instructions ''' try: data = self.crypticle.loads(load) except AuthenticationError: self.authenticate() data = self.crypticle.loads(load) # Verify that the publication is valid if 'tgt' not in data or 'jid' not in data or 'fun' not in data \ or 'arg' not in data: return # Verify that the publication applies to this minion if 'tgt_type' in data: if not getattr(self.matcher, '{0}_match'.format(data['tgt_type']))( data['tgt']): return else: if not self.matcher.glob_match(data['tgt']): return # If the minion does not have the function, don't execute, # this prevents minions that could not load a minion module # from returning a predictable exception #if data['fun'] not in self.functions: # return if 'user' in data: log.info(('User {0[user]} Executing command {0[fun]} with jid ' '{0[jid]}'.format(data))) else: log.info( ('Executing command {0[fun]} with jid {0[jid]}'.format(data))) log.debug('Command details {0}'.format(data)) self._handle_decoded_payload(data)
def ec2_tags(): boto_version = StrictVersion(boto.__version__) required_boto_version = StrictVersion('2.8.0') if boto_version < required_boto_version: log.error("%s: installed boto version %s < %s, can't find ec2_tags", __name__, boto_version, required_boto_version) return None if not _on_ec2(): log.info("%s: not an EC2 instance, skipping", __name__) return None (instance_id, region) = _get_instance_info() credentials = _get_credentials() if not credentials: log.error( "%s: no AWS credentials found, see documentation for how to provide them.", __name__) return None # Connect to EC2 and parse the Roles tags for this instance conn = boto.ec2.connect_to_region( region, aws_access_key_id=credentials['access_key'], aws_secret_access_key=credentials['secret_key']) tags = {} try: reservation = conn.get_all_instances(instance_ids=[instance_id])[0] instance = reservation.instances[0] tags = instance.tags except IndexError, e: log.error("Couldn't retrieve instance information: %s", e) return None
def ec2_tags(): boto_version = StrictVersion(boto.__version__) required_boto_version = StrictVersion('2.8.0') if boto_version < required_boto_version: log.error("Installed boto version %s < %s, can't find ec2_tags", boto_version, required_boto_version) return None if not _on_ec2(): log.info("Not an EC2 instance, skipping") return None instance_id, region = _get_instance_info() credentials = _get_credentials() # Connect to EC2 and parse the Roles tags for this instance if not (credentials['access_key'] and credentials['secret_key']): log.error("No AWS credentials found, see documentation for how to provide them.") return None try: conn = boto.ec2.connect_to_region( region, aws_access_key_id=credentials['access_key'], aws_secret_access_key=credentials['secret_key'], ) except Exception, e: log.error("Could not get AWS connection: %s", e) return None
def ec2_tags(): boto_version = StrictVersion(boto.__version__) required_boto_version = StrictVersion('2.8.0') if boto_version < required_boto_version: log.error("%s: installed boto version %s < %s, can't find ec2_tags", __name__, boto_version, required_boto_version) return None if not _on_ec2(): log.info("%s: not an EC2 instance, skipping", __name__) return None (instance_id, region) = _get_instance_info() credentials = _get_credentials() if not credentials: log.error("%s: no AWS credentials found, see documentation for how to provide them.", __name__) return None # Connect to EC2 and parse the Roles tags for this instance conn = boto.ec2.connect_to_region(region, aws_access_key_id=credentials['access_key'], aws_secret_access_key=credentials['secret_key']) tags = {} try: reservation = conn.get_all_instances(instance_ids=[ instance_id ])[0] instance = reservation.instances[0] tags = instance.tags except IndexError, e: log.error("Couldn't retrieve instance information: %s", e) return None
def send(self, enc, load, tries=1, timeout=60): """ Takes two arguments, the encryption type and the base payload """ payload = {"enc": enc} payload["load"] = load pkg = self.serial.dumps(payload) self.socket.send(pkg) self.poller.register(self.socket, zmq.POLLIN) tried = 0 while True: polled = self.poller.poll(timeout * 1000) tried += 1 if polled: break if tries > 1: log.info( "SaltReqTimeoutError: after %s seconds. (Try %s of %s)", timeout, tried, tries, ) if tried >= tries: self.clear_socket() raise SaltReqTimeoutError( "SaltReqTimeoutError: after {} seconds, ran {} " "tries".format(timeout * tried, tried)) return self.serial.loads(self.socket.recv())
def _handle_aes(self, load): ''' Takes the aes encrypted load, decrypts is and runs the encapsulated instructions ''' try: data = self.crypticle.loads(load) except AuthenticationError: self.authenticate() data = self.crypticle.loads(load) # Verify that the publication is valid if 'tgt' not in data or 'jid' not in data or 'fun' not in data \ or 'arg' not in data: return # Verify that the publication applies to this minion if 'tgt_type' in data: if not getattr(self.matcher, '{0}_match'.format(data['tgt_type']))(data['tgt']): return else: if not self.matcher.glob_match(data['tgt']): return # If the minion does not have the function, don't execute, # this prevents minions that could not load a minion module # from returning a predictable exception #if data['fun'] not in self.functions: # return if 'user' in data: log.info(('User {0[user]} Executing command {0[fun]} with jid ' '{0[jid]}'.format(data))) else: log.info(('Executing command {0[fun]} with jid {0[jid]}' .format(data))) log.debug('Command details {0}'.format(data)) self._handle_decoded_payload(data)
def send(self, enc, load, tries=1, timeout=60): ''' Takes two arguments, the encryption type and the base payload ''' payload = {'enc': enc} payload['load'] = load pkg = self.serial.dumps(payload) self.socket.send(pkg) self.poller.register(self.socket, zmq.POLLIN) tried = 0 while True: polled = self.poller.poll(timeout * 1000) tried += 1 if polled: break if tries > 1: log.info( 'SaltReqTimeoutError: after {0} seconds. (Try {1} of {2})'. format(timeout, tried, tries)) if tried >= tries: self.clear_socket() raise SaltReqTimeoutError( 'SaltReqTimeoutError: after {0} seconds, ran {1} tries'. format(timeout * tried, tried)) return self.serial.loads(self.socket.recv())
def start(self): ''' we override start() just for our log message ''' log.info("starting salt-eventsd daemon") # leave the startup to the supers daemon, thats where all # the daemonizing and double-forking takes place super(SaltEventsDaemon, self).start()
def run(self): ''' the method automatically called by start() from our parent class ''' log.info("initializing event listener") self.pid = self._get_pid() self._write_state() self.listen()
def _return_pub(self, ret, ret_cmd='_return'): ''' Return the data from the executed command to the master server ''' if self.opts['multiprocessing']: fn_ = os.path.join(self.proc_dir, ret['jid']) if os.path.isfile(fn_): try: os.remove(fn_) except (OSError, IOError): # The file is gone already pass log.info('Returning information for job: {0}'.format(ret['jid'])) sreq = salt.payload.SREQ(self.opts['master_uri']) if ret_cmd == '_syndic_return': load = {'cmd': ret_cmd, 'jid': ret['jid'], 'id': self.opts['id']} load['return'] = {} for key, value in ret.items(): if key == 'jid' or key == 'fun': continue load['return'][key] = value else: load = {'return': ret['return'], 'cmd': ret_cmd, 'jid': ret['jid'], 'id': self.opts['id']} try: if hasattr(self.functions[ret['fun']], '__outputter__'): oput = self.functions[ret['fun']].__outputter__ if isinstance(oput, string_types): load['out'] = oput except KeyError: pass try: ret_val = sreq.send('aes', self.crypticle.dumps(load)) except SaltReqTimeoutError: ret_val = '' if isinstance(ret_val, string_types) and not ret_val: # The master AES key has changed, reauth self.authenticate() ret_val = sreq.send('aes', self.crypticle.dumps(load)) if self.opts['cache_jobs']: # Local job cache has been enabled fn_ = os.path.join( self.opts['cachedir'], 'minion_jobs', load['jid'], 'return.p') jdir = os.path.dirname(fn_) if not os.path.isdir(jdir): os.makedirs(jdir) salt.utils.fopen(fn_, 'w+').write(self.serial.dumps(ret)) return ret_val
def __init__(self, config='/etc/salt/eventsd'): # retrieve current settings from the config file self.opts = None self._read_yaml(config) # make sure we have a 'general' section if 'general' in self.opts.keys(): self.gen_opts = self.opts['general'] self._init_logger() log.info("loaded config from {0}".format(config))
def _dmidecode_data(regex_dict): ''' Parse the output of dmidecode in a generic fashion that can be used for the multiple system types which have dmidecode. ''' ret = {} if 'proxyminion' in __opts__: return {} # No use running if dmidecode/smbios isn't in the path if salt.utils.which('dmidecode'): out = __salt__['cmd.run']('dmidecode') elif salt.utils.which('smbios'): out = __salt__['cmd.run']('smbios') else: log.info( 'The `dmidecode` binary is not available on the system. GPU grains ' 'will not be available.') return ret for section in regex_dict: section_found = False # Look at every line for the right section for line in out.splitlines(): if not line: continue # We've found it, woohoo! if re.match(section, line): section_found = True continue if not section_found: continue # Now that a section has been found, find the data for item in regex_dict[section]: # Examples: # Product Name: 64639SU # Version: 7LETC1WW (2.21 ) regex = re.compile(r'\s+{0}\s+(.*)$'.format(item)) grain = regex_dict[section][item] # Skip to the next iteration if this grain # has been found in the dmidecode output. if grain in ret: continue match = regex.match(line) # Finally, add the matched data to the grains returned if match: ret[grain] = match.group(1).strip() return ret
def _return_pub(self, ret, ret_cmd='_return'): ''' Return the data from the executed command to the master server ''' if self.opts['multiprocessing']: fn_ = os.path.join(self.proc_dir, ret['jid']) if os.path.isfile(fn_): try: os.remove(fn_) except (OSError, IOError): # The file is gone already pass log.info('Returning information for job: {0}'.format(ret['jid'])) sreq = salt.payload.SREQ(self.opts['master_uri']) if ret_cmd == '_syndic_return': load = {'cmd': ret_cmd, 'jid': ret['jid'], 'id': self.opts['id']} load['return'] = {} for key, value in ret.items(): if key == 'jid' or key == 'fun': continue load['return'][key] = value else: load = { 'return': ret['return'], 'cmd': ret_cmd, 'jid': ret['jid'], 'id': self.opts['id'] } try: if hasattr(self.functions[ret['fun']], '__outputter__'): oput = self.functions[ret['fun']].__outputter__ if isinstance(oput, string_types): load['out'] = oput except KeyError: pass try: ret_val = sreq.send('aes', self.crypticle.dumps(load)) except SaltReqTimeoutError: ret_val = '' if isinstance(ret_val, string_types) and not ret_val: # The master AES key has changed, reauth self.authenticate() ret_val = sreq.send('aes', self.crypticle.dumps(load)) if self.opts['cache_jobs']: # Local job cache has been enabled fn_ = os.path.join(self.opts['cachedir'], 'minion_jobs', load['jid'], 'return.p') jdir = os.path.dirname(fn_) if not os.path.isdir(jdir): os.makedirs(jdir) salt.utils.fopen(fn_, 'w+').write(self.serial.dumps(ret)) return ret_val
def _init_worker(self, qdata): ''' The method dumps the data into a worker thread which handles pushing the data into different backends. ''' self.threads_cre += 1 log.info("Starting worker #{0}".format(self.threads_cre)) # make sure we pass a copy of the list worker = SaltEventsdWorker(list(qdata), self.threads_cre, self.event_map, self.backends, **self.opts) worker.start() self.running_workers.append(worker)
def ec2_tags(): boto_version = StrictVersion(boto.__version__) required_boto_version = StrictVersion('2.8.0') if boto_version < required_boto_version: log.error("%s: installed boto version %s < %s, can't find ec2_tags", __name__, boto_version, required_boto_version) return None if not _on_ec2(): log.info("%s: not an EC2 instance, skipping", __name__) return None (instance_id, region) = _get_instance_info() credentials = _get_credentials() # Connect to EC2 and parse the Roles tags for this instance try: conn = boto.ec2.connect_to_region( region, aws_access_key_id=credentials['access_key'], aws_secret_access_key=credentials['secret_key']) except: if not (credentials['access_key'] and credentials['secret_key']): log.error( "%s: no AWS credentials found, see documentation for how to provide them.", __name__) return None else: log.error( "%s: invalid AWS credentials found, see documentation for how to provide them.", __name__) return None tags = {} try: _tags = conn.get_all_tags(filters={ 'resource-type': 'instance', 'resource-id': instance_id }) for tag in _tags: tags[tag.name] = tag.value except IndexError, e: log.error("Couldn't retrieve instance information: %s", e) return None
def _init_worker(self, qdata): ''' write a collection of events to the database. every invocation of this methoed creates its own thread that writes into the database ''' self.threads_cre += 1 log.info("starting worker #{0}".format(self.threads_cre)) # make sure we pass a copy of the list worker = SaltEventsdWorker(list(qdata), self.threads_cre, self.event_map, self.backends, **self.opts) worker.start() self.running_workers.append(worker)
def get_id(): ''' Guess the id of the minion. - If socket.getfqdn() returns us something other than localhost, use it - Check /etc/hosts for something that isn't localhost that maps to 127.* - Look for a routeable / public IP - A private IP is better than a loopback IP - localhost may be better than killing the minion ''' log.debug('Guessing ID. The id can be explicitly in set {0}' .format('/etc/salt/minion')) fqdn = socket.getfqdn() if 'localhost' != fqdn: log.info('Found minion id from getfqdn(): {0}'.format(fqdn)) return fqdn, False # Can /etc/hosts help us? try: # TODO Add Windows host file support with open('/etc/hosts') as f: line = f.readline() while line: names = line.split() ip = names.pop(0) if ip.startswith('127.'): for name in names: if name != 'localhost': log.info('Found minion id in hosts file: {0}' .format(name)) return name, False line = f.readline() except Exception: pass # What IP addresses do we have? ip_addresses = [salt.utils.socket_util.IPv4Address(a) for a in salt.utils.socket_util.ip4_addrs() if not a.startswith('127.')] for a in ip_addresses: if not a.is_private: log.info('Using public ip address for id: {0}'.format(a)) return str(a), True if ip_addresses: a = ip_addresses.pop(0) log.info('Using private ip address for id: {0}'.format(a)) return str(a), True log.error('No id found, falling back to localhost') return 'localhost', False
def ec2_tags(): boto_version = StrictVersion(boto.__version__) required_boto_version = StrictVersion('2.8.0') if boto_version < required_boto_version: log.error("Installed boto version %s < %s, can't find ec2_tags", boto_version, required_boto_version) return None if not _on_ec2(): log.info("Not an EC2 instance, skipping") return None instance_id, region = _get_instance_info() credentials = _get_credentials() # Connect to EC2 and parse the Roles tags for this instance try: conn = boto.ec2.connect_to_region( region, aws_access_key_id=credentials['access_key'], aws_secret_access_key=credentials['secret_key'], ) except Exception as e: log.error("Could not get AWS connection: %s", e) return None ec2_tags = {} try: tags = conn.get_all_tags(filters={'resource-type': 'instance', 'resource-id': instance_id}) for tag in tags: ec2_tags[tag.name] = tag.value except Exception as e: log.error("Couldn't retrieve instance tags: %s", e) return None ret = dict(ec2_tags=ec2_tags) # Provide ec2_tags_roles functionality if 'Roles' in ec2_tags: ret['ec2_roles'] = ec2_tags['Roles'].split(',') return ret
def _init_worker(self, qdata): ''' The method dumps the data into a worker thread which handles pushing the data into different backends. ''' self.threads_cre += 1 log.info("Starting worker #{0}".format(self.threads_cre)) # make sure we pass a copy of the list worker = SaltEventsdWorker( list(qdata), self.threads_cre, self.event_map, self.backends, **self.opts ) worker.start() self.running_workers.append(worker)
def authenticate(self): ''' Authenticate with the master, this method breaks the functional paradigm, it will update the master information from a fresh sign in, signing in can occur as often as needed to keep up with the revolving master aes key. ''' log.debug( 'Attempting to authenticate with the Salt Master at {0}'.format( self.opts['master_ip'])) auth = salt.crypt.Auth(self.opts) while True: creds = auth.sign_in() if creds != 'retry': log.info('Authentication with master successful!') break log.info('Waiting for minion key to be accepted by the master.') time.sleep(self.opts['acceptance_wait_time']) self.aes = creds['aes'] self.publish_port = creds['publish_port'] self.crypticle = salt.crypt.Crypticle(self.opts, self.aes)
def _init_events(self, events={}): ''' Creates a dict of precompiled regexes for all defined events from config for maximum performance. ''' self.event_map = events # we precompile all regexes log.info("Initialising events...") for key in events.keys(): # we compile the regex configured in the config self.event_map[key]['tag'] = compile(events[key]['tag']) log.info("Added event '{0}'".format(key)) # if subevents are configured, also update them with # regex-matching object if 'subs' in events[key]: for sub_ev in events[key]['subs'].keys(): try: self.event_map[key]['subs'][sub_ev]['fun'] = compile(events[key]['subs'][sub_ev]['fun']) except KeyError: pass try: self.event_map[key]['subs'][sub_ev]['tag'] = compile(events[key]['subs'][sub_ev]['tag']) except KeyError: pass log.info("Added sub-event '{0}->{1}'".format(key, sub_ev))
def stop(self, signal, frame): ''' we override stop() to brake our main loop and have a pretty log message ''' log.info("received signal {0}".format(signal)) # if we have running workers, run through all and join() the ones # that have finished. if we still have running workers after that, # wait 5 secs for the rest and then exit. Maybe we should improv # this a litte bit more if( len(self.running_workers) > 0 ): clean_workers = [] for count in range(0, 2): for worker in self.running_workers: if worker.isAlive(): clean_workers.append(worker) else: worker.join() log.debug("joined worker #{0}".format(worker.getName())) if( len(clean_workers) > 0 ): log.info("waiting 5secs for remaining workers..") time.sleep(5) else: break log.info("salt-eventsd has shut down") # leave the cleanup to the supers stop super(SaltEventsDaemon, self).stop(signal, frame)
def _init_events(self, events={}): ''' Creates a dict of precompiled regexes for all defined events from config for maximum performance. ''' self.event_map = events # we precompile all regexes log.info("Initialising events...") for key in events.keys(): # we compile the regex configured in the config self.event_map[key]['tag'] = compile(events[key]['tag']) log.info("Added event '{0}'".format(key)) # if subevents are configured, also update them with # regex-matching object if 'subs' in events[key]: for sub_ev in events[key]['subs'].keys(): try: self.event_map[key]['subs'][sub_ev]['fun'] = compile( events[key]['subs'][sub_ev]['fun']) except KeyError: pass try: self.event_map[key]['subs'][sub_ev]['tag'] = compile( events[key]['subs'][sub_ev]['tag']) except KeyError: pass log.info("Added sub-event '{0}->{1}'".format(key, sub_ev))
def _init_events(self, events={}): ''' this is used to tell the class about the events it should handle. it has to be a dictionary with appropriate mappings in it. see the config file for examples on how to compose the dict. each entry is converted to a precompiled regex for maximum flexibility ''' self.event_map = events # we precompile all regexes log.info("initialising events...") for key in events.keys(): # we compile the regex configured in the config self.event_map[key]['tag'] = compile( events[key]['tag'] ) log.info("Added event '{0}'".format(key)) # if subevents are configured, also update them with # regex-macthing object if( events[key].has_key('subs') ): for sub_ev in events[key]['subs'].keys(): try: self.event_map[key]['subs'][sub_ev]['fun'] = compile(events[key]['subs'][sub_ev]['fun']) except KeyError: pass try: self.event_map[key]['subs'][sub_ev]['tag'] = compile(events[key]['subs'][sub_ev]['tag']) except KeyError: pass log.info("Added sub-event '{0}->{1}'".format(key, sub_ev))
def _write_state(self): ''' writes a current status to the defined status-file this includes the current pid, events received/handled and threads created/joined ''' try: # write the info to the specified log statf = open(self.state_file, 'w') statf.writelines(simplejson.dumps({'events_received':self.events_rec, 'events_handled':self.events_han, 'threads_created':self.threads_cre, 'threads_joined':self.threads_join} )) # if we have the same pid as the pidfile, we are the running daemon # and also print the current counters to the logfile with 'info' if( os.getpid() == self.pid ): log.info("running with pid {0}".format(self.pid)) log.info("events (han/recv): {0}/{1}".format(self.events_han, self.events_rec)) log.info("threads (cre/joi):{0}/{1}".format(self.threads_cre, self.threads_join)) statf.write("\n") statf.close() sys.stdout.flush() except IOError as ioerr: log.critical("Failed to write state to {0}".format(self.state_file)) log.exception(ioerr) except OSError as oserr: log.critical("Failed to write state to {0}".format(self.state_file)) log.exception(oserr)
def ec2_tags(): boto_version = StrictVersion(boto.__version__) required_boto_version = StrictVersion('2.8.0') if boto_version < required_boto_version: log.error("%s: installed boto version %s < %s, can't find ec2_tags", __name__, boto_version, required_boto_version) return None if not _on_ec2(): log.info("%s: not an EC2 instance, skipping", __name__) return None (instance_id, region) = _get_instance_info() credentials = _get_credentials() # Connect to EC2 and parse the Roles tags for this instance try: conn = boto.ec2.connect_to_region(region, aws_access_key_id=credentials['access_key'], aws_secret_access_key=credentials['secret_key']) except: if not (credentials['access_key'] and credentials['secret_key']): log.error("%s: no AWS credentials found, see documentation for how to provide them.", __name__) return None else: log.error("%s: invalid AWS credentials found, see documentation for how to provide them.", __name__) return None tags = {} try: _tags = conn.get_all_tags(filters={'resource-type': 'instance', 'resource-id': instance_id}) for tag in _tags: tags[tag.name] = tag.value except IndexError, e: log.error("Couldn't retrieve instance information: %s", e) return None
def authenticate(self): ''' Authenticate with the master, this method breaks the functional paradigm, it will update the master information from a fresh sign in, signing in can occur as often as needed to keep up with the revolving master aes key. ''' log.debug( 'Attempting to authenticate with the Salt Master at {0}'.format( self.opts['master_ip'] ) ) auth = salt.crypt.Auth(self.opts) while True: creds = auth.sign_in() if creds != 'retry': log.info('Authentication with master successful!') break log.info('Waiting for minion key to be accepted by the master.') time.sleep(self.opts['acceptance_wait_time']) self.aes = creds['aes'] self.publish_port = creds['publish_port'] self.crypticle = salt.crypt.Crypticle(self.opts, self.aes)
def send(self, enc, load, tries=1, timeout=60): ''' Takes two arguments, the encryption type and the base payload ''' payload = {'enc': enc} payload['load'] = load pkg = self.serial.dumps(payload) self.socket.send(pkg) self.poller.register(self.socket, zmq.POLLIN) tried = 0 while True: polled = self.poller.poll(timeout * 1000) tried += 1 if polled: break if tries > 1: log.info('SaltReqTimeoutError: after {0} seconds. (Try {1} of {2})'.format( timeout, tried, tries)) if tried >= tries: self.clear_socket() raise SaltReqTimeoutError( 'SaltReqTimeoutError: after {0} seconds, ran {1} tries'.format(timeout * tried, tried) ) return self.serial.loads(self.socket.recv())
def run(self): ''' Main loop of the ConCache, starts updates in intervals and answers requests from the MWorkers ''' context = zmq.Context() # the socket for incoming cache requests creq_in = context.socket(zmq.REP) creq_in.setsockopt(zmq.LINGER, 100) creq_in.bind('ipc://' + self.cache_sock) # the socket for incoming cache-updates from workers cupd_in = context.socket(zmq.SUB) cupd_in.setsockopt(zmq.SUBSCRIBE, '') cupd_in.setsockopt(zmq.LINGER, 100) cupd_in.bind('ipc://' + self.update_sock) # the socket for the timer-event timer_in = context.socket(zmq.SUB) timer_in.setsockopt(zmq.SUBSCRIBE, '') timer_in.setsockopt(zmq.LINGER, 100) timer_in.connect('ipc://' + self.upd_t_sock) poller = zmq.Poller() poller.register(creq_in, zmq.POLLIN) poller.register(cupd_in, zmq.POLLIN) poller.register(timer_in, zmq.POLLIN) # our serializer serial = salt.payload.Serial(self.opts.get('serial', '')) # register a signal handler signal.signal(signal.SIGINT, self.signal_handler) # secure the sockets from the world self.secure() log.info('ConCache started') while self.running: # we check for new events with the poller try: socks = dict(poller.poll(1)) except KeyboardInterrupt: self.stop() except zmq.ZMQError as zmq_err: log.error('ConCache ZeroMQ-Error occurred') log.exception(zmq_err) self.stop() # check for next cache-request if socks.get(creq_in) == zmq.POLLIN: msg = serial.loads(creq_in.recv()) log.debug('ConCache Received request: {0}'.format(msg)) # requests to the minion list are send as str's if isinstance(msg, str): if msg == 'minions': # Send reply back to client reply = serial.dumps(self.minions) creq_in.send(reply) # check for next cache-update from workers if socks.get(cupd_in) == zmq.POLLIN: new_c_data = serial.loads(cupd_in.recv()) # tell the worker to exit #cupd_in.send(serial.dumps('ACK')) # check if the returned data is usable if not isinstance(new_c_data, list): log.error('ConCache Worker returned unusable result') del new_c_data continue # the cache will receive lists of minions # 1. if the list only has 1 item, its from an MWorker, we append it # 2. if the list contains another list, its from a CacheWorker and # the currently cached minions are replaced with that list # 3. anything else is considered malformed try: if len(new_c_data) == 0: log.debug('ConCache Got empty update from worker') continue data = new_c_data[0] if isinstance(data, str): if data not in self.minions: log.debug('ConCache Adding minion {0} to cache'.format(new_c_data[0])) self.minions.append(data) elif isinstance(data, list): log.debug('ConCache Replacing minion list from worker') self.minions = data except IndexError: log.debug('ConCache Got malformed result dict from worker') del new_c_data log.info('ConCache {0} entries in cache'.format(len(self.minions))) # check for next timer-event to start new jobs if socks.get(timer_in) == zmq.POLLIN: sec_event = serial.loads(timer_in.recv()) # update the list every 30 seconds if int(sec_event % 30) == 0: cw = CacheWorker(self.opts) cw.start() self.stop() creq_in.close() cupd_in.close() timer_in.close() context.term() log.debug('ConCache Shutting down')
def tune_in(self): ''' Lock onto the publisher. This is the main event loop for the minion ''' log.info( '{0} is starting as user \'{1}\''.format( self.__class__.__name__, getpass.getuser() ) ) log.debug('Minion "{0}" trying to tune in'.format(self.opts['id'])) self.context = zmq.Context() # Prepare the minion event system # # Start with the publish socket id_hash = hashlib.md5(self.opts['id']).hexdigest() epub_sock_path = os.path.join( self.opts['sock_dir'], 'minion_event_{0}_pub.ipc'.format(id_hash) ) epull_sock_path = os.path.join( self.opts['sock_dir'], 'minion_event_{0}_pull.ipc'.format(id_hash) ) self.epub_sock = self.context.socket(zmq.PUB) if self.opts.get('ipc_mode', '') == 'tcp': epub_uri = 'tcp://127.0.0.1:{0}'.format( self.opts['tcp_pub_port'] ) epull_uri = 'tcp://127.0.0.1:{0}'.format( self.opts['tcp_pull_port'] ) else: epub_uri = 'ipc://{0}'.format(epub_sock_path) salt.utils.check_ipc_path_max_len(epub_uri) epull_uri = 'ipc://{0}'.format(epull_sock_path) salt.utils.check_ipc_path_max_len(epull_uri) log.debug( '{0} PUB socket URI: {1}'.format( self.__class__.__name__, epub_uri ) ) log.debug( '{0} PULL socket URI: {1}'.format( self.__class__.__name__, epull_uri ) ) # Create the pull socket self.epull_sock = self.context.socket(zmq.PULL) # Bind the event sockets self.epub_sock.bind(epub_uri) self.epull_sock.bind(epull_uri) # Restrict access to the sockets if not self.opts.get('ipc_mode', '') == 'tcp': os.chmod( epub_sock_path, 448 ) os.chmod( epull_sock_path, 448 ) self.poller = zmq.Poller() self.epoller = zmq.Poller() self.socket = self.context.socket(zmq.SUB) self.socket.setsockopt(zmq.SUBSCRIBE, '') self.socket.setsockopt(zmq.IDENTITY, self.opts['id']) if hasattr(zmq, 'RECONNECT_IVL_MAX'): self.socket.setsockopt( zmq.RECONNECT_IVL_MAX, self.opts['recon_max'] ) if hasattr(zmq, 'TCP_KEEPALIVE'): self.socket.setsockopt( zmq.TCP_KEEPALIVE, self.opts['tcp_keepalive'] ) self.socket.setsockopt( zmq.TCP_KEEPALIVE_IDLE, self.opts['tcp_keepalive_idle'] ) self.socket.setsockopt( zmq.TCP_KEEPALIVE_CNT, self.opts['tcp_keepalive_cnt'] ) self.socket.setsockopt( zmq.TCP_KEEPALIVE_INTVL, self.opts['tcp_keepalive_intvl'] ) if hasattr(zmq, 'IPV4ONLY'): self.socket.setsockopt( zmq.IPV4ONLY, int(not int(self.opts.get('ipv6_enable', False))) ) self.socket.connect(self.master_pub) self.poller.register(self.socket, zmq.POLLIN) self.epoller.register(self.epull_sock, zmq.POLLIN) # Send an event to the master that the minion is live self._fire_master( 'Minion {0} started at {1}'.format( self.opts['id'], time.asctime() ), 'minion_start' ) if self.opts['multiprocessing'] and not salt.utils.is_windows(): signal.signal(signal.SIGCHLD, self.handle_sigchld) # Make sure to gracefully handle SIGUSR1 enable_sigusr1_handler() # On first startup execute a state run if configured to do so self._state_run() while True: try: self.schedule.eval() socks = dict(self.poller.poll( self.opts['loop_interval'] * 1000) ) if self.socket in socks and socks[self.socket] == zmq.POLLIN: payload = self.serial.loads(self.socket.recv()) self._handle_payload(payload) time.sleep(0.05) # Clean up the minion processes which have been executed and # have finished # Check if modules and grains need to be refreshed self.passive_refresh() # Check the event system if self.epoller.poll(1): try: package = self.epull_sock.recv(zmq.NOBLOCK) self.epub_sock.send(package) except Exception: pass except zmq.ZMQError: # This is thrown by the inturupt caused by python handling the # SIGCHLD. This is a safe error and we just start the poll # again continue except Exception: log.critical(traceback.format_exc())
def _linux_gpu_data(): ''' num_gpus: int gpus: - vendor: nvidia|amd|ati|... model: string ''' lspci = salt.utils.which('lspci') if not lspci: log.info( 'The `lspci` binary is not available on the system. GPU grains ' 'will not be available.') return {} elif __opts__.get('enable_gpu_grains', None) is False: log.info( 'Skipping lspci call because enable_gpu_grains was set to False ' 'in the config. GPU grains will not be available.') return {} # dominant gpu vendors to search for (MUST be lowercase for matching below) known_vendors = ['nvidia', 'amd', 'ati', 'intel'] devs = [] try: lspci_out = __salt__['cmd.run']('lspci -vmm') cur_dev = {} error = False # Add a blank element to the lspci_out.splitlines() list, # otherwise the last device is not evaluated as a cur_dev and ignored. lspci_list = lspci_out.splitlines() lspci_list.append('') for line in lspci_list: # check for record-separating empty lines if line == '': if cur_dev.get('Class', '') == 'VGA compatible controller': devs.append(cur_dev) # XXX; may also need to search for "3D controller" cur_dev = {} continue if re.match(r'^\w+:\s+.*', line): key, val = line.split(':', 1) cur_dev[key.strip()] = val.strip() else: error = True log.debug('Unexpected lspci output: \'{0}\''.format(line)) if error: log.warn('Error loading grains, unexpected linux_gpu_data output, ' 'check that you have a valid shell configured and ' 'permissions to run lspci command') except OSError: pass gpus = [] for gpu in devs: vendor_strings = gpu['Vendor'].lower().split() # default vendor to 'unknown', overwrite if we match a known one vendor = 'unknown' for name in known_vendors: # search for an 'expected' vendor name in the list of strings if name in vendor_strings: vendor = name break gpus.append({'vendor': vendor, 'model': gpu['Device']}) grains = {} grains['num_gpus'] = len(gpus) grains['gpus'] = gpus return grains
def run(self): ''' Main loop of the FSCache, checks schedule, retrieves result-data from the workers and answer requests with data from the cache ''' context = zmq.Context() # the socket for incoming cache requests creq_in = context.socket(zmq.REP) creq_in.setsockopt(zmq.LINGER, 100) creq_in.bind('ipc:///' + self.cache_sock) # the socket for incoming cache-updates from workers cupd_in = context.socket(zmq.REP) cupd_in.setsockopt(zmq.LINGER, 100) cupd_in.bind('ipc:///' + self.update_sock) # wait for the timer to bind to its socket log.debug('wait 2 secs for the timer') time.sleep(2) # the socket for the timer-event timer_in = context.socket(zmq.PULL) timer_in.setsockopt(zmq.LINGER, 100) timer_in.connect('ipc:///' + self.upd_t_sock) poller = zmq.Poller() poller.register(creq_in, zmq.POLLIN) poller.register(cupd_in, zmq.POLLIN) poller.register(timer_in, zmq.POLLIN) # our serializer serial = salt.payload.Serial(self.opts.get('serial', '')) # register a signal handler signal.signal(signal.SIGINT, self.signal_handler) # secure the sockets from the world self.secure() log.info('FSCache started') log.debug('FSCache started') while self.running: # we check for new events with the poller try: socks = dict(poller.poll()) except KeyboardInterrupt: self.stop() except zmq.ZMQError as t: self.stop() # check for next cache-request if socks.get(creq_in) == zmq.POLLIN: msg = serial.loads(creq_in.recv()) log.debug('Received request: {0}'.format(msg)) # we only accept requests as lists [req_id, <path>] if isinstance(msg, list): # for now only one item is assumed to be requested msgid, file_n = msg[:] log.debug('Looking for {0}:{1}'.format(msgid, file_n)) fdata = self.path_data.get(file_n, None) if fdata is not None: log.debug('Cache HIT') else: log.debug('Cache MISS') # simulate slow caches #randsleep = random.randint(0,3) #time.sleep(randsleep) # Send reply back to client reply = serial.dumps([msgid, fdata]) creq_in.send(reply) # wrong format, item not cached else: reply = serial.dumps([msgid, None]) creq_in.send(reply) # check for next cache-update from workers elif socks.get(cupd_in) == zmq.POLLIN: new_c_data = serial.loads(cupd_in.recv()) # tell the worker to exit cupd_in.send(serial.dumps('OK')) # check if the returned data is usable if not isinstance(new_c_data, dict): log.error('Worker returned unusable result') del new_c_data continue # the workers will return differing data: # 1. '{'file1': <data1>, 'file2': <data2>,...}' - a cache update # 2. '{search-path: None}' - job was not run, pre-checks failed # 3. '{}' - no files found, check the pattern if defined? # 4. anything else is considered malformed if len(new_c_data) == 0: log.debug('Got empty update from worker') elif new_c_data.values()[0] is not None: log.debug('Got cache update with {0} item(s)'.format( len(new_c_data))) self.path_data.update(new_c_data) else: log.debug('Got malformed result dict from worker') log.info('{0} entries in cache'.format(len(self.path_data))) # check for next timer-event to start new jobs elif socks.get(timer_in) == zmq.POLLIN: sec_event = serial.loads(timer_in.recv()) log.debug('Timer event: #{0}'.format(sec_event)) # loop through the jobs and start if a jobs ival matches for item in self.jobs: if sec_event in self.jobs[item]['ival']: self.run_job(item) self.stop() creq_in.close() cupd_in.close() timer_in.close() context.term() log.debug('Shutting down')\
def _write_state(self): ''' Writes a current status to the defined status-file this includes the current pid, events received/handled and threads created/joined ''' ev_hdl_per_s = float((float(self.events_han - self.stat_hdl_count)) / float(self.state_timer_intrvl)) ev_tot_per_s = float((float(self.events_rec - self.stat_rec_count)) / float(self.state_timer_intrvl)) if self.config['stat_worker']: stat_data = { 'events_rec': self.events_rec, 'events_hdl': self.events_han, 'events_hdl_sec': round(ev_hdl_per_s, 2), 'events_tot_sec': round(ev_tot_per_s, 2), 'threads_created': self.threads_cre, 'threads_joined': self.threads_join } self.threads_cre += 1 st_worker = SaltEventsdWorker( stat_data, self.threads_cre, None, self.backends, **self.opts ) st_worker.start() try: self.running_workers.append(st_worker) except AttributeError: log.error('self is missing running_workers') try: log.info(self) log.info(dir(self)) except Exception: log.error('Failed to dump dir(self)') try: # write the info to the specified log statf = open(self.state_file, 'w') statf.writelines( json.dumps({ 'events_rec': self.events_rec, 'events_hdl': self.events_han, 'events_hdl_sec': round(ev_hdl_per_s, 2), 'events_tot_sec': round(ev_tot_per_s, 2), 'threads_created': self.threads_cre, 'threads_joined': self.threads_join }) ) # if we have the same pid as the pidfile, we are the running daemon # and also print the current counters to the logfile with 'info' if os.getpid() == self.pid: log.info("Running with pid {0}".format(self.pid)) log.info("Events (han/recv): {0}/{1}".format( self.events_han, self.events_rec, )) log.info("Threads (cre/joi):{0}/{1}".format( self.threads_cre, self.threads_join, )) statf.write("\n") statf.close() sys.stdout.flush() except IOError as ioerr: log.critical("Failed to write state to {0}".format(self.state_file)) log.exception(ioerr) except OSError as oserr: log.critical("Failed to write state to {0}".format(self.state_file)) log.exception(oserr) self.stat_rec_count = self.events_rec self.stat_hdl_count = self.events_han
def tune_in(self): ''' Lock onto the publisher. This is the main event loop for the minion ''' log.info('{0} is starting as user \'{1}\''.format( self.__class__.__name__, getpass.getuser())) log.debug('Minion "{0}" trying to tune in'.format(self.opts['id'])) self.context = zmq.Context() # Prepare the minion event system # # Start with the publish socket id_hash = hashlib.md5(self.opts['id']).hexdigest() epub_sock_path = os.path.join( self.opts['sock_dir'], 'minion_event_{0}_pub.ipc'.format(id_hash)) epull_sock_path = os.path.join( self.opts['sock_dir'], 'minion_event_{0}_pull.ipc'.format(id_hash)) self.epub_sock = self.context.socket(zmq.PUB) if self.opts.get('ipc_mode', '') == 'tcp': epub_uri = 'tcp://127.0.0.1:{0}'.format(self.opts['tcp_pub_port']) epull_uri = 'tcp://127.0.0.1:{0}'.format( self.opts['tcp_pull_port']) else: epub_uri = 'ipc://{0}'.format(epub_sock_path) salt.utils.check_ipc_path_max_len(epub_uri) epull_uri = 'ipc://{0}'.format(epull_sock_path) salt.utils.check_ipc_path_max_len(epull_uri) log.debug('{0} PUB socket URI: {1}'.format(self.__class__.__name__, epub_uri)) log.debug('{0} PULL socket URI: {1}'.format(self.__class__.__name__, epull_uri)) # Create the pull socket self.epull_sock = self.context.socket(zmq.PULL) # Bind the event sockets self.epub_sock.bind(epub_uri) self.epull_sock.bind(epull_uri) # Restrict access to the sockets if not self.opts.get('ipc_mode', '') == 'tcp': os.chmod(epub_sock_path, 448) os.chmod(epull_sock_path, 448) self.poller = zmq.Poller() self.epoller = zmq.Poller() self.socket = self.context.socket(zmq.SUB) self.socket.setsockopt(zmq.SUBSCRIBE, '') self.socket.setsockopt(zmq.IDENTITY, self.opts['id']) if hasattr(zmq, 'RECONNECT_IVL_MAX'): self.socket.setsockopt(zmq.RECONNECT_IVL_MAX, self.opts['recon_max']) if hasattr(zmq, 'TCP_KEEPALIVE'): self.socket.setsockopt(zmq.TCP_KEEPALIVE, self.opts['tcp_keepalive']) self.socket.setsockopt(zmq.TCP_KEEPALIVE_IDLE, self.opts['tcp_keepalive_idle']) self.socket.setsockopt(zmq.TCP_KEEPALIVE_CNT, self.opts['tcp_keepalive_cnt']) self.socket.setsockopt(zmq.TCP_KEEPALIVE_INTVL, self.opts['tcp_keepalive_intvl']) if hasattr(zmq, 'IPV4ONLY'): self.socket.setsockopt( zmq.IPV4ONLY, int(not int(self.opts.get('ipv6_enable', False)))) self.socket.connect(self.master_pub) self.poller.register(self.socket, zmq.POLLIN) self.epoller.register(self.epull_sock, zmq.POLLIN) # Send an event to the master that the minion is live self._fire_master( 'Minion {0} started at {1}'.format(self.opts['id'], time.asctime()), 'minion_start') if self.opts['multiprocessing'] and not salt.utils.is_windows(): signal.signal(signal.SIGCHLD, self.handle_sigchld) # Make sure to gracefully handle SIGUSR1 enable_sigusr1_handler() # On first startup execute a state run if configured to do so self._state_run() while True: try: self.schedule.eval() socks = dict( self.poller.poll(self.opts['loop_interval'] * 1000)) if self.socket in socks and socks[self.socket] == zmq.POLLIN: payload = self.serial.loads(self.socket.recv()) self._handle_payload(payload) time.sleep(0.05) # Clean up the minion processes which have been executed and # have finished # Check if modules and grains need to be refreshed self.passive_refresh() # Check the event system if self.epoller.poll(1): try: package = self.epull_sock.recv(zmq.NOBLOCK) self.epub_sock.send(package) except Exception: pass except zmq.ZMQError: # This is thrown by the inturupt caused by python handling the # SIGCHLD. This is a safe error and we just start the poll # again continue except Exception: log.critical(traceback.format_exc())
def _linux_gpu_data(): """ num_gpus: int gpus: - vendor: nvidia|amd|ati|... model: string """ lspci = salt.utils.which("lspci") if not lspci: log.info("The `lspci` binary is not available on the system. GPU grains " "will not be available.") return {} elif __opts__.get("enable_gpu_grains", None) is False: log.info( "Skipping lspci call because enable_gpu_grains was set to False " "in the config. GPU grains will not be available." ) return {} # dominant gpu vendors to search for (MUST be lowercase for matching below) known_vendors = ["nvidia", "amd", "ati", "intel"] devs = [] try: lspci_out = __salt__["cmd.run"]("lspci -vmm") cur_dev = {} error = False # Add a blank element to the lspci_out.splitlines() list, # otherwise the last device is not evaluated as a cur_dev and ignored. lspci_list = lspci_out.splitlines() lspci_list.append("") for line in lspci_list: # check for record-separating empty lines if line == "": if cur_dev.get("Class", "") == "VGA compatible controller": devs.append(cur_dev) # XXX; may also need to search for "3D controller" cur_dev = {} continue if re.match(r"^\w+:\s+.*", line): key, val = line.split(":", 1) cur_dev[key.strip()] = val.strip() else: error = True log.debug("Unexpected lspci output: '{0}'".format(line)) if error: log.warn( "Error loading grains, unexpected linux_gpu_data output, " "check that you have a valid shell configured and " "permissions to run lspci command" ) except OSError: pass gpus = [] for gpu in devs: vendor_strings = gpu["Vendor"].lower().split() # default vendor to 'unknown', overwrite if we match a known one vendor = "unknown" for name in known_vendors: # search for an 'expected' vendor name in the list of strings if name in vendor_strings: vendor = name break gpus.append({"vendor": vendor, "model": gpu["Device"]}) grains = {} grains["num_gpus"] = len(gpus) grains["gpus"] = gpus return grains
def __init__(self): self.opts = SaltEventsdLoader().getopts() self._pre_startup(self.opts) if type(self.opts) is not dict: log.info("Received invalid configdata, startup cancelled") sys.exit(1) self.config = self.opts['general'] super(SaltEventsDaemon, self).__init__(self.config['pidfile']) # the map of events are stored here, loaded in _init_events() self.event_map = None self._init_events(self.opts['events']) self.backends = self._init_backends( self.config['backends'] ) log.info(self.backends) # the socket to listen on for the events self.sock_dir = self.config['sock_dir'] # two possible values for 'node': master and minion # they do the same thing, just on different sockets self.node = self.config['node'] # the id, usually 'master' self.nodeid = self.config['id'] # the statefile where we write the daemon status self.state_file = self.config['state_file'] # how many events to handle before updating the status self.state_upd = self.config['state_upd'] # we dont know our pid (yet) self.pid = None # how many parallel workers to start max self.max_workers = self.config['max_workers'] # the number of events to collect before starting a worker self.event_limit = self.config['event_limit'] # a list to keep track of the currently running workers # this is mainly for debugging to check wether all started # workers are correctly joined over time so we dont leak memory self.running_workers = [] # setup some counters used for the status self.events_han = 0 self.events_rec = 0 self.threads_cre = 0 self.threads_join = 0 # the timer thats write data to the database every x seconds # this is used to push data into the database even if # self.event_limit is not reached regularly self.ev_timer_ev = False self.ev_timer_intrvl = self.config['dump_timer'] self.ev_timer = ResetTimer(self.ev_timer_intrvl, self)
def listen(self): ''' the main event loop where we receive the events and start the workers that dump our data into the database ''' # log on to saltstacks event-bus event = salt.utils.event.SaltEvent(self.node, self.sock_dir) # we store our events in a list, we dont really care about an order # or what kind of data is put in there. all that is configured with the # sql-template configured in the configfile event_queue = [] # start our dump_timer self.ev_timer.start() # this is for logline chronology so the timer-message always comes # _before_ the actual startup-message of the listening loop below :-) time.sleep(1) log.info("entering main event loop") log.info("listening on: {0}".format(event.puburi)) # read everything we can get our hands on while True: # the zmq-socket does not like ^C very much, make the error # a little more graceful. alright, alright, ignore the damn thing, # we're exiting anyways... try: ret = event.get_event(full=True) except zmq.ZMQError: pass if ret is None: continue # if the timer has expired, we may have not received enough # events in the queue to reach event_limit, in that case we dump # the data anyway to have it in the database if(self.ev_timer_ev): if (len(self.running_workers) < self.max_workers) and \ (len(event_queue) > 0): self._init_worker(event_queue) # reset our queue to prevent duplicate entries del event_queue[:] # we reset the timer.ev_timer_ev at the end of the loop # so we can update the stats that are logged # filter only the events we're interested in. all events have a tag # we can filter them by. we match with a precompiled regex if( 'tag' in ret ): # filter out events with an empty tag. those are special if( ret['tag'] != '' ): # run through our configured events and try to match the # current events tag against the ones we're interested in for key in self.event_map.keys(): if( self.event_map[key]['tag'].match(ret['tag'])): log.debug("matching on {0}:{1}".format(key, ret['tag'])) prio = self.event_map[key].get('prio', 0) # push prio1-events directly into a worker if prio > 0: log.debug('Prio1 event found, pushing immediately!') self.events_han += 1 self._init_worker([ret]) else: event_queue.append(ret) self.events_han += 1 # once we reach the event_limit, start a worker that # writes that data in to the database if len(event_queue) >= self.event_limit: # only start a worker if not too many workers are running if len(self.running_workers) < self.max_workers: self._init_worker(event_queue) # reset the timer self.ev_timer.reset() # reset our queue to prevent duplicate entries del event_queue[:] else: # FIXME: we need to handle this situation somehow if # too many workers are running. just flush the events? # there really is no sane way except queueing more and more # until some sort of limit is reached and we care more about # our saltmaster than about the collected events! log.critical("too many workers running, loosing data!!!") # a list for the workers that are still running clean_workers = [] # run through all the workers and join() the ones # that have finished dumping their data and keep # the running ones on our list for worker in self.running_workers: if worker.isAlive(): clean_workers.append(worker) else: worker.join() log.debug("joined worker #{0}".format(worker.getName())) self.threads_join += 1 # get rid of the old reference and set a new one # FIXME: is this really neccessary? del self.running_workers self.running_workers = clean_workers self.events_rec += 1 # we update the stats every 'received div handled == 0' # or if we recevied a timer event from our ResetTimer if( (self.events_rec % self.state_upd) == 0 ): self._write_state() elif(self.ev_timer_ev): self._write_state() self.ev_timer_ev = False log.info("listen loop ended...")
def run(self): ''' Main loop of the ConCache, starts updates in intervals and answers requests from the MWorkers ''' context = zmq.Context() # the socket for incoming cache requests creq_in = context.socket(zmq.REP) creq_in.setsockopt(zmq.LINGER, 100) creq_in.bind('ipc://' + self.cache_sock) # the socket for incoming cache-updates from workers cupd_in = context.socket(zmq.SUB) cupd_in.setsockopt(zmq.SUBSCRIBE, b'') cupd_in.setsockopt(zmq.LINGER, 100) cupd_in.bind('ipc://' + self.update_sock) # the socket for the timer-event timer_in = context.socket(zmq.SUB) timer_in.setsockopt(zmq.SUBSCRIBE, b'') timer_in.setsockopt(zmq.LINGER, 100) timer_in.connect('ipc://' + self.upd_t_sock) poller = zmq.Poller() poller.register(creq_in, zmq.POLLIN) poller.register(cupd_in, zmq.POLLIN) poller.register(timer_in, zmq.POLLIN) # our serializer serial = salt.payload.Serial(self.opts.get('serial', '')) # register a signal handler signal.signal(signal.SIGINT, self.signal_handler) # secure the sockets from the world self.secure() log.info('ConCache started') while self.running: # we check for new events with the poller try: socks = dict(poller.poll(1)) except KeyboardInterrupt: self.stop() except zmq.ZMQError as zmq_err: log.error('ConCache ZeroMQ-Error occurred') log.exception(zmq_err) self.stop() # check for next cache-request if socks.get(creq_in) == zmq.POLLIN: msg = serial.loads(creq_in.recv()) log.debug('ConCache Received request: %s', msg) # requests to the minion list are send as str's if isinstance(msg, six.string_types): if msg == 'minions': # Send reply back to client reply = serial.dumps(self.minions) creq_in.send(reply) # check for next cache-update from workers if socks.get(cupd_in) == zmq.POLLIN: new_c_data = serial.loads(cupd_in.recv()) # tell the worker to exit #cupd_in.send(serial.dumps('ACK')) # check if the returned data is usable if not isinstance(new_c_data, list): log.error('ConCache Worker returned unusable result') del new_c_data continue # the cache will receive lists of minions # 1. if the list only has 1 item, its from an MWorker, we append it # 2. if the list contains another list, its from a CacheWorker and # the currently cached minions are replaced with that list # 3. anything else is considered malformed try: if not new_c_data: log.debug('ConCache Got empty update from worker') continue data = new_c_data[0] if isinstance(data, six.string_types): if data not in self.minions: log.debug('ConCache Adding minion %s to cache', new_c_data[0]) self.minions.append(data) elif isinstance(data, list): log.debug('ConCache Replacing minion list from worker') self.minions = data except IndexError: log.debug('ConCache Got malformed result dict from worker') del new_c_data log.info('ConCache %s entries in cache', len(self.minions)) # check for next timer-event to start new jobs if socks.get(timer_in) == zmq.POLLIN: sec_event = serial.loads(timer_in.recv()) # update the list every 30 seconds if int(sec_event % 30) == 0: cw = CacheWorker(self.opts) cw.start() self.stop() creq_in.close() cupd_in.close() timer_in.close() context.term() log.debug('ConCache Shutting down')
def get_id(): """ Guess the id of the minion. - Check /etc/hostname for a value other than localhost - If socket.getfqdn() returns us something other than localhost, use it - Check /etc/hosts for something that isn't localhost that maps to 127.* - Look for a routeable / public IP - A private IP is better than a loopback IP - localhost may be better than killing the minion Returns two values: the detected ID, and a boolean value noting whether or not an IP address is being used for the ID. """ log.debug("Guessing ID. The id can be explicitly in set {0}".format(os.path.join(syspaths.CONFIG_DIR, "minion"))) # Check /etc/hostname try: with salt.utils.fopen("/etc/hostname") as hfl: name = hfl.read().strip() if re.search(r"\s", name): log.warning( "Whitespace character detected in /etc/hostname. " "This file should not contain any whitespace." ) else: if name != "localhost": return name, False except Exception: pass # Nothing in /etc/hostname or /etc/hostname not found fqdn = socket.getfqdn() if fqdn != "localhost": log.info("Found minion id from getfqdn(): {0}".format(fqdn)) return fqdn, False # Can /etc/hosts help us? try: with salt.utils.fopen("/etc/hosts") as hfl: for line in hfl: names = line.split() ip_ = names.pop(0) if ip_.startswith("127."): for name in names: if name != "localhost": log.info("Found minion id in hosts file: {0}".format(name)) return name, False except Exception: pass # Can Windows 'hosts' file help? try: windir = os.getenv("WINDIR") with salt.utils.fopen(windir + "\\system32\\drivers\\etc\\hosts") as hfl: for line in hfl: # skip commented or blank lines if line[0] == "#" or len(line) <= 1: continue # process lines looking for '127.' in first column try: entry = line.split() if entry[0].startswith("127."): for name in entry[1:]: # try each name in the row if name != "localhost": log.info("Found minion id in hosts file: {0}".format(name)) return name, False except IndexError: pass # could not split line (malformed entry?) except Exception: pass # What IP addresses do we have? ip_addresses = [ salt.utils.network.IPv4Address(addr) for addr in salt.utils.network.ip_addrs(include_loopback=True) if not addr.startswith("127.") ] for addr in ip_addresses: if not addr.is_private: log.info("Using public ip address for id: {0}".format(addr)) return str(addr), True if ip_addresses: addr = ip_addresses.pop(0) log.info("Using private ip address for id: {0}".format(addr)) return str(addr), True log.error("No id found, falling back to localhost") return "localhost", False
def get_id(): ''' Guess the id of the minion. - Check /etc/hostname for a value other than localhost - If socket.getfqdn() returns us something other than localhost, use it - Check /etc/hosts for something that isn't localhost that maps to 127.* - Look for a routeable / public IP - A private IP is better than a loopback IP - localhost may be better than killing the minion Returns two values: the detected ID, and a boolean value noting whether or not an IP address is being used for the ID. ''' log.debug('Guessing ID. The id can be explicitly in set {0}'.format( os.path.join(syspaths.CONFIG_DIR, 'minion'))) # Check /etc/hostname try: with salt.utils.fopen('/etc/hostname') as hfl: name = hfl.read().strip() if re.search(r'\s', name): log.warning('Whitespace character detected in /etc/hostname. ' 'This file should not contain any whitespace.') else: if name != 'localhost': return name, False except Exception: pass # Nothing in /etc/hostname or /etc/hostname not found fqdn = socket.getfqdn() if fqdn != 'localhost': log.info('Found minion id from getfqdn(): {0}'.format(fqdn)) return fqdn, False # Can /etc/hosts help us? try: with salt.utils.fopen('/etc/hosts') as hfl: for line in hfl: names = line.split() ip_ = names.pop(0) if ip_.startswith('127.'): for name in names: if name != 'localhost': log.info( 'Found minion id in hosts file: {0}'.format( name)) return name, False except Exception: pass # Can Windows 'hosts' file help? try: windir = os.getenv("WINDIR") with salt.utils.fopen(windir + '\\system32\\drivers\\etc\\hosts') as hfl: for line in hfl: # skip commented or blank lines if line[0] == '#' or len(line) <= 1: continue # process lines looking for '127.' in first column try: entry = line.split() if entry[0].startswith('127.'): for name in entry[1:]: # try each name in the row if name != 'localhost': log.info('Found minion id in hosts file: {0}'. format(name)) return name, False except IndexError: pass # could not split line (malformed entry?) except Exception: pass # What IP addresses do we have? ip_addresses = [ salt.utils.network.IPv4Address(addr) for addr in salt.utils.network.ip_addrs(include_loopback=True) if not addr.startswith('127.') ] for addr in ip_addresses: if not addr.is_private: log.info('Using public ip address for id: {0}'.format(addr)) return str(addr), True if ip_addresses: addr = ip_addresses.pop(0) log.info('Using private ip address for id: {0}'.format(addr)) return str(addr), True log.error('No id found, falling back to localhost') return 'localhost', False
def _linux_gpu_data(): ''' num_gpus: int gpus: - vendor: nvidia|amd|ati|... model: string ''' lspci = salt.utils.which('lspci') if not lspci: log.info( 'The `lspci` binary is not available on the system. GPU grains ' 'will not be available.' ) return {} elif __opts__.get('enable_gpu_grains', None) is False: log.info( 'Skipping lspci call because enable_gpu_grains was set to False ' 'in the config. GPU grains will not be available.' ) return {} # dominant gpu vendors to search for (MUST be lowercase for matching below) known_vendors = ['nvidia', 'amd', 'ati', 'intel'] devs = [] try: lspci_out = __salt__['cmd.run']('lspci -vmm') cur_dev = {} error = False # Add a blank element to the lspci_out.splitlines() list, # otherwise the last device is not evaluated as a cur_dev and ignored. lspci_list = lspci_out.splitlines() lspci_list.append('') for line in lspci_list: # check for record-separating empty lines if line == '': if cur_dev.get('Class', '') == 'VGA compatible controller': devs.append(cur_dev) # XXX; may also need to search for "3D controller" cur_dev = {} continue if re.match(r'^\w+:\s+.*', line): key, val = line.split(':', 1) cur_dev[key.strip()] = val.strip() else: error = True log.debug('Unexpected lspci output: \'{0}\''.format(line)) if error: log.warn( 'Error loading grains, unexpected linux_gpu_data output, ' 'check that you have a valid shell configured and ' 'permissions to run lspci command' ) except OSError: pass gpus = [] for gpu in devs: vendor_strings = gpu['Vendor'].lower().split() # default vendor to 'unknown', overwrite if we match a known one vendor = 'unknown' for name in known_vendors: # search for an 'expected' vendor name in the list of strings if name in vendor_strings: vendor = name break gpus.append({'vendor': vendor, 'model': gpu['Device']}) grains = {} grains['num_gpus'] = len(gpus) grains['gpus'] = gpus return grains
def _write_state(self): ''' Writes a current status to the defined status-file this includes the current pid, events received/handled and threads created/joined ''' ev_hdl_per_s = float((float(self.events_han - self.stat_hdl_count)) / float(self.state_timer_intrvl)) ev_tot_per_s = float((float(self.events_rec - self.stat_rec_count)) / float(self.state_timer_intrvl)) if self.config['stat_worker']: stat_data = { 'events_rec': self.events_rec, 'events_hdl': self.events_han, 'events_hdl_sec': round(ev_hdl_per_s, 2), 'events_tot_sec': round(ev_tot_per_s, 2), 'threads_created': self.threads_cre, 'threads_joined': self.threads_join } self.threads_cre += 1 st_worker = SaltEventsdWorker(stat_data, self.threads_cre, None, self.backends, **self.opts) st_worker.start() try: self.running_workers.append(st_worker) except AttributeError: log.error('self is missing running_workers') try: log.info(self) log.info(dir(self)) except Exception: log.error('Failed to dump dir(self)') try: # write the info to the specified log statf = open(self.state_file, 'w') statf.writelines( json.dumps({ 'events_rec': self.events_rec, 'events_hdl': self.events_han, 'events_hdl_sec': round(ev_hdl_per_s, 2), 'events_tot_sec': round(ev_tot_per_s, 2), 'threads_created': self.threads_cre, 'threads_joined': self.threads_join })) # if we have the same pid as the pidfile, we are the running daemon # and also print the current counters to the logfile with 'info' if os.getpid() == self.pid: log.info("Running with pid {0}".format(self.pid)) log.info("Events (han/recv): {0}/{1}".format( self.events_han, self.events_rec, )) log.info("Threads (cre/joi):{0}/{1}".format( self.threads_cre, self.threads_join, )) statf.write("\n") statf.close() sys.stdout.flush() except IOError as ioerr: log.critical("Failed to write state to {0}".format( self.state_file)) log.exception(ioerr) except OSError as oserr: log.critical("Failed to write state to {0}".format( self.state_file)) log.exception(oserr) self.stat_rec_count = self.events_rec self.stat_hdl_count = self.events_han