def __init__(self, features=['os', 'cpu'], environment='cloudsight', user_list='ALL', host_namespace='', plugin_places=['plugins'], options={}): BaseCrawler.__init__( self, features=features, plugin_places=plugin_places, options=options) plugins_manager.reload_env_plugin(environment, plugin_places) plugins_manager.reload_container_crawl_plugins( features, plugin_places, options) self.plugins = plugins_manager.get_container_crawl_plugins(features) self.environment = environment self.host_namespace = host_namespace self.user_list = user_list
def snapshot( urls=['stdout://'], namespace=misc.get_host_ipaddr(), features=config_parser.get_config()['general']['features_to_crawl'], options={}, frequency=-1, crawlmode=Modes.INVM, format='csv', overwrite=False, first_snapshot_num=0, max_snapshots=-1): """Entrypoint for crawler functionality. This is the function executed by long running crawler processes. It just loops sleeping for `frequency` seconds at each crawl interval. During each interval, it collects the features listed in `features`, and sends them to the outputs listed in `urls`. :param urls: The url used as the output of the snapshot. :param namespace: This a pointer to a specific system (e.g. IP for INVM). :param features: List of features to crawl. :param options: Tree of options with details like what config files. :param frequency: Target time period for iterations. -1 means just one run. :param crawlmode: What's the system we want to crawl. :param format: The format of the frame, defaults to csv. """ global should_exit saved_args = locals() logger.debug('snapshot args: %s' % (saved_args)) environment = options.get( 'environment', config_parser.get_config()['general']['environment']) plugin_places = options.get( 'plugin_places', config_parser.get_config()['general']['plugin_places']) plugin_mode = config_parser.get_config()['general']['plugin_mode'] plugins_manager.reload_env_plugin(plugin_places=plugin_places, environment=environment) plugins_manager.reload_container_crawl_plugins(plugin_places=plugin_places, features=features, plugin_mode=plugin_mode) plugins_manager.reload_vm_crawl_plugins(plugin_places=plugin_places, features=features, plugin_mode=plugin_mode) plugins_manager.reload_host_crawl_plugins(plugin_places=plugin_places, features=features, plugin_mode=plugin_mode) next_iteration_time = None snapshot_num = first_snapshot_num # Die if the parent dies PR_SET_PDEATHSIG = 1 try: libc.prctl(PR_SET_PDEATHSIG, signal.SIGHUP) signal.signal(signal.SIGHUP, signal_handler_exit) except AttributeError: logger.warning('prctl is not available. MacOS is not supported.') containers = [] # This is the main loop of the system, taking a snapshot and sleeping at # every iteration. while True: snapshot_time = int(time.time()) if crawlmode == Modes.OUTCONTAINER: containers = snapshot_containers( containers=containers, urls=urls, snapshot_num=snapshot_num, features=features, options=options, format=format, overwrite=overwrite, host_namespace=namespace, ) elif crawlmode == Modes.MESOS: snapshot_mesos( crawlmode=crawlmode, urls=urls, snapshot_num=snapshot_num, options=options, format=format, overwrite=overwrite, namespace=namespace, ) elif crawlmode == Modes.OUTVM: snapshot_vms( urls=urls, snapshot_num=snapshot_num, features=features, options=options, format=format, overwrite=overwrite, namespace=namespace, ) elif crawlmode in [Modes.INVM, Modes.MOUNTPOINT]: snapshot_generic(crawlmode=crawlmode, urls=urls, snapshot_num=snapshot_num, features=features, options=options, format=format, namespace=namespace, overwrite=overwrite) else: raise NotImplementedError('Crawl mode %s is not implemented' % crawlmode) # Frequency < 0 means only one run. if (frequency < 0 or should_exit or snapshot_num == max_snapshots): logger.info('Bye') break time_to_sleep, next_iteration_time = _get_next_iteration_time( next_iteration_time, frequency, snapshot_time) if time_to_sleep > 0: time.sleep(time_to_sleep) snapshot_num += 1
def snapshot( urls=['stdout://'], namespace=misc.get_host_ipaddr(), features=defaults.DEFAULT_FEATURES_TO_CRAWL, options=defaults.DEFAULT_CRAWL_OPTIONS, since='BOOT', frequency=-1, crawlmode=Modes.INVM, inputfile='Undefined', format='csv', overwrite=False, ): """Entrypoint for crawler functionality. This is the function executed by long running crawler processes. It just loops sleeping for `frequency` seconds at each crawl interval. During each interval, it collects the features listed in `features`, and sends them to the outputs listed in `urls`. :param urls: The url used as the output of the snapshot. :param namespace: This a pointer to a specific system (e.g. IP for INVM). :param features: List of features to crawl. :param options: Tree of options with details like what config files. :param since: Calculate deltas or not. XXX needs some work. :param frequency: Target time period for iterations. -1 means just one run. :param crawlmode: What's the system we want to crawl. :param inputfile: Applies to mode.FILE. The frame emitted is this file. :param format: The format of the frame, defaults to csv. """ global should_exit saved_args = locals() logger.debug('snapshot args: %s' % (saved_args)) assert('metadata' in options) environment = options.get('environment', defaults.DEFAULT_ENVIRONMENT) plugin_places = options.get('plugin_places', defaults.DEFAULT_PLUGIN_PLACES).split(',') plugins_manager.reload_env_plugin(plugin_places=plugin_places, environment=environment) since_timestamp, last_snapshot_time = get_initial_since_values(since) next_iteration_time = None snapshot_num = 0 # Die if the parent dies PR_SET_PDEATHSIG = 1 libc.prctl(PR_SET_PDEATHSIG, signal.SIGHUP) signal.signal(signal.SIGHUP, signal_handler_exit) if crawlmode == Modes.OUTCONTAINER: containers = get_filtered_list_of_containers(options, namespace) # This is the main loop of the system, taking a snapshot and sleeping at # every iteration. while True: snapshot_time = int(time.time()) if crawlmode == Modes.OUTCONTAINER: curr_containers = get_filtered_list_of_containers(options, namespace) deleted = [c for c in containers if c not in curr_containers] containers = curr_containers for container in deleted: if options.get('link_container_log_files', False): try: container.unlink_logfiles(options) except NotImplementedError: pass logger.debug('Crawling %d containers' % (len(containers))) for container in containers: logger.info( 'Crawling container %s %s %s' % (container.pid, container.short_id, container.namespace)) if options.get('link_container_log_files', False): # This is a NOP if files are already linked (which is # pretty much always). try: container.link_logfiles(options=options) except NotImplementedError: pass # no feature crawling if 'nofeatures' in features: continue snapshot_container( urls=urls, snapshot_num=snapshot_num, features=features, options=options, format=format, inputfile=inputfile, container=container, since=since, since_timestamp=since_timestamp, overwrite=overwrite ) elif crawlmode in (Modes.INVM, Modes.MOUNTPOINT, Modes.DEVICE, Modes.FILE, Modes.ISCSI): snapshot_generic( crawlmode=crawlmode, urls=urls, snapshot_num=snapshot_num, features=features, options=options, format=format, inputfile=inputfile, namespace=namespace, since=since, since_timestamp=since_timestamp, overwrite=overwrite ) elif crawlmode in (Modes.MESOS): snapshot_mesos( crawlmode=crawlmode, urls=urls, snapshot_num=snapshot_num, options=options, format=format, inputfile=inputfile, overwrite=overwrite, namespace=namespace, since=since, since_timestamp=since_timestamp ) else: raise RuntimeError('Unknown Mode') if since == 'LASTSNAPSHOT': # Subsequent snapshots will update this value. since_timestamp = snapshot_time # Frequency <= 0 means only one run. if frequency < 0 or should_exit: logger.info('Bye') break elif frequency == 0: continue if next_iteration_time is None: next_iteration_time = snapshot_time + frequency else: next_iteration_time = next_iteration_time + frequency while next_iteration_time + frequency < time.time(): next_iteration_time = next_iteration_time + frequency time_to_sleep = next_iteration_time - time.time() if time_to_sleep > 0: time.sleep(time_to_sleep) snapshot_num += 1
def snapshot( urls=['stdout://'], namespace=misc.get_host_ipaddr(), features=defaults.DEFAULT_FEATURES_TO_CRAWL, options=defaults.DEFAULT_CRAWL_OPTIONS, since='BOOT', frequency=-1, crawlmode=Modes.INVM, inputfile='Undefined', format='csv', overwrite=False, ): """Entrypoint for crawler functionality. This is the function executed by long running crawler processes. It just loops sleeping for `frequency` seconds at each crawl interval. During each interval, it collects the features listed in `features`, and sends them to the outputs listed in `urls`. :param urls: The url used as the output of the snapshot. :param namespace: This a pointer to a specific system (e.g. IP for INVM). :param features: List of features to crawl. :param options: Tree of options with details like what config files. :param since: Calculate deltas or not. XXX needs some work. :param frequency: Target time period for iterations. -1 means just one run. :param crawlmode: What's the system we want to crawl. :param inputfile: Applies to mode.FILE. The frame emitted is this file. :param format: The format of the frame, defaults to csv. """ global should_exit saved_args = locals() logger.debug('snapshot args: %s' % (saved_args)) assert('metadata' in options) environment = options.get('environment', defaults.DEFAULT_ENVIRONMENT) plugin_places = options.get('plugin_places', defaults.DEFAULT_PLUGIN_PLACES).split(',') plugins_manager.reload_env_plugin(plugin_places=plugin_places, environment=environment) since_timestamp, last_snapshot_time = get_initial_since_values(since) next_iteration_time = None snapshot_num = 0 # Die if the parent dies PR_SET_PDEATHSIG = 1 libc.prctl(PR_SET_PDEATHSIG, signal.SIGHUP) signal.signal(signal.SIGHUP, signal_handler_exit) if crawlmode == Modes.OUTCONTAINER: containers = get_filtered_list_of_containers(options, namespace) # This is the main loop of the system, taking a snapshot and sleeping at # every iteration. while True: snapshot_time = int(time.time()) if crawlmode == Modes.OUTCONTAINER: curr_containers = get_filtered_list_of_containers(options, namespace) deleted = [c for c in containers if c not in curr_containers] containers = curr_containers for container in deleted: if options.get('link_container_log_files', False): try: container.unlink_logfiles(options) except NotImplementedError: pass logger.debug('Crawling %d containers' % (len(containers))) for container in containers: logger.info( 'Crawling container %s %s %s' % (container.pid, container.short_id, container.namespace)) if options.get('link_container_log_files', False): # This is a NOP if files are already linked (which is # pretty much always). try: container.link_logfiles(options=options) except NotImplementedError: pass # no feature crawling if 'nofeatures' in features: continue snapshot_container( urls=urls, snapshot_num=snapshot_num, features=features, options=options, format=format, inputfile=inputfile, container=container, since=since, since_timestamp=since_timestamp, overwrite=overwrite ) elif crawlmode in (Modes.INVM, Modes.MOUNTPOINT, Modes.DEVICE, Modes.FILE, Modes.ISCSI): snapshot_generic( crawlmode=crawlmode, urls=urls, snapshot_num=snapshot_num, features=features, options=options, format=format, inputfile=inputfile, namespace=namespace, since=since, since_timestamp=since_timestamp, overwrite=overwrite ) else: raise RuntimeError('Unknown Mode') if since == 'LASTSNAPSHOT': # Subsequent snapshots will update this value. since_timestamp = snapshot_time # Frequency <= 0 means only one run. if frequency < 0 or should_exit: logger.info('Bye') break elif frequency == 0: continue if next_iteration_time is None: next_iteration_time = snapshot_time + frequency else: next_iteration_time = next_iteration_time + frequency while next_iteration_time + frequency < time.time(): next_iteration_time = next_iteration_time + frequency time_to_sleep = next_iteration_time - time.time() if time_to_sleep > 0: time.sleep(time_to_sleep) snapshot_num += 1