Exemplo n.º 1
0
 def test_containers_crawler(self, *args):
     crawler = ContainersCrawler(features=['os'])
     frames = list(crawler.crawl())
     namespaces = sorted([f.metadata['namespace'] for f in frames])
     assert namespaces == sorted(['aaa', 'bbb', 'ccc'])
     features_count = sorted([f.num_features for f in frames])
     assert features_count == sorted([2, 2, 2])
     system_types = sorted([f.metadata['system_type'] for f in frames])
     assert system_types == sorted(['container', 'container', 'container'])
     assert args[0].call_count == 1
     assert args[1].call_count == 1
Exemplo n.º 2
0
 def test_failed_containers_crawler_with_ignore_failure(self, *args):
     crawler = ContainersCrawler(features=['os'])
     frames = list(crawler.crawl())  # defaults to ignore_plugin_exception
     namespaces = sorted([f.metadata['namespace'] for f in frames])
     assert namespaces == sorted(['aaa', 'errorid', 'ccc'])
     features_count = sorted([f.num_features for f in frames])
     assert features_count == sorted([3, 2, 3])
     system_types = [f.metadata['system_type'] for f in frames]
     assert system_types == ['container', 'container', 'container']
     assert args[0].call_count == 1
     assert args[1].call_count == 1
Exemplo n.º 3
0
 def testCrawlContainer1(self):
     crawler = ContainersCrawler(
         features=['cpu', 'memory', 'interface', 'package'],
         environment='kubernetes')
     frames = list(crawler.crawl())
     output = str(frames[0])
     print output  # only printed if the test fails
     assert 'interface-lo' in output
     assert 'if_octets_tx=' in output
     assert 'cpu-0' in output
     assert 'cpu_nice=' in output
     assert 'memory' in output
     assert 'memory_buffered=' in output
     assert 'apt' in output
     assert 'pkgarchitecture=' in output
 def testCrawlContainerAvoidSetns(self):
     options = {'avoid_setns': True}
     crawler = ContainersCrawler(
         user_list=self.container['Id'],
         features=['cpu', 'memory', 'interface', 'package'],
         options=options)
     frames = list(crawler.crawl())
     output = str(frames[0])
     print output  # only printed if the test fails
     # interface in avoid_setns mode is not supported
     #assert 'interface-lo' in output
     #assert 'if_octets_tx=' in output
     assert 'cpu-0' in output
     assert 'cpu_nice=' in output
     assert 'memory' in output
     assert 'memory_buffered=' in output
     assert 'apt' in output
     assert 'pkgarchitecture=' in output
    def testCrawlContainerKafka2(self):
        emitters = EmittersManager(urls=['kafka://localhost:9092/test'])
        crawler = ContainersCrawler(
            features=['os', 'process'],
            user_list=self.container['Id'])
        worker = Worker(emitters=emitters, frequency=-1,
                        crawler=crawler)
        worker.iterate()
        kafka = pykafka.KafkaClient(hosts='localhost:9092')
        topic = kafka.topics['test']
        consumer = topic.get_simple_consumer()
        message = consumer.consume()
        assert '"cmd":"/bin/sleep 60"' in message.value

        for i in range(1, 5):
            worker.iterate()
            message = consumer.consume()
            assert '"cmd":"/bin/sleep 60"' in message.value
Exemplo n.º 6
0
 def test_failed_containers_crawler(self, *args):
     crawler = ContainersCrawler(features=['os'])
     with self.assertRaises(OSError):
         frames = list(crawler.crawl(ignore_plugin_exception=False))
     assert args[0].call_count == 1
     assert args[1].call_count == 1
Exemplo n.º 7
0
def main():

    euid = os.geteuid()
    if euid != 0:
        print 'Need to run this as root.'
        exit(1)

    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--options',
        dest='options',
        type=json_parser,
        default={},
        help='JSON dict of crawler options used to be passed as arguments'
             'to the crawler plugins.'
    )
    parser.add_argument(
        '--url',
        dest='url',
        type=csv_list,
        default=['stdout://'],
        help='Send the snapshot data to URL. Defaults to the console.',
    )
    parser.add_argument(
        '--namespace',
        dest='namespace',
        type=str,
        nargs='?',
        default=misc.get_host_ipaddr(),
        help='Data source this crawler is associated with. Defaults to '
             '/localhost',
    )
    parser.add_argument(
        '--features',
        dest='features',
        type=csv_list,
        default=['os', 'cpu'],
        help='Comma-separated list of feature-types to crawl. Defaults to '
             'os,cpu',
    )
    parser.add_argument(
        '--frequency',
        dest='frequency',
        type=int,
        default=-1,
        help='Target time period for iterations. Defaults to -1 which '
             'means only run one iteration.'
    )
    parser.add_argument(
        '--compress',
        dest='compress',
        action='store_true',
        default=False,
        help='Whether to GZIP-compress the output frame data, must be one of '
             '{true,false}. Defaults to false',
    )
    parser.add_argument('--logfile', dest='logfile', type=str,
                        default='crawler.log',
                        help='Logfile path. Defaults to crawler.log'
                        )
    parser.add_argument(
        '--crawlmode',
        dest='crawlmode',
        type=str,
        choices=[
            Modes.INVM,
            Modes.OUTVM,
            Modes.MOUNTPOINT,
            Modes.OUTCONTAINER,
            Modes.MESOS,
        ],
        default=Modes.INVM,
        help='The crawler mode: '
             '{INVM,OUTVM,MOUNTPOINT,OUTCONTAINER}. '
             'Defaults to INVM',
    )
    parser.add_argument(
        '--mountpoint',
        dest='mountpoint',
        type=str,
        default='/',
        help='Mountpoint location used as the / for features like packages,'
             'files, config'
    )
    parser.add_argument(
        '--format',
        dest='format',
        type=str,
        default='csv',
        choices=['csv', 'graphite', 'json', 'logstash'],
        help='Emitted data format.',
    )
    parser.add_argument(
        '--crawlContainers',
        dest='crawlContainers',
        type=str,
        nargs='?',
        default='ALL',
        help='List of containers to crawl as a list of Docker container IDs'
             '(only Docker is supported at the moment). ' 'Defaults to all '
             'running containers. Example: --crawlContainers aaa,bbb',
    )
    parser.add_argument(
        '--crawlVMs',
        dest='vm_descs_list',
        nargs='+',
        default='ALL',
        help='List of VMs to crawl'
             'Default is \'ALL\' VMs'
             'Currently need following as input for each VM'
             '\'vm_name, kernel_version_long, linux_flavour, arch\''
             'Auto kernel version detection in future, when only vm names'
             '(\'ALL\' by default) would need to be passed'
             'Example --crawlVM'
             'vm1,3.13.0-24-generic_3.13.0-24.x86_64,ubuntu,x86_64'
             'vm2,4.0.3.x86_64,vanilla,x86_64',
    )
    parser.add_argument(
        '--environment',
        dest='environment',
        type=str,
        default='cloudsight',
        help='This speficies some environment specific behavior, like how '
             'to name a container. The way to add a new behavior is by '
             'implementing a plugin (see plugins/cloudsight_environment.py '
             'as an example. Defaults to "cloudsight".',
    )
    parser.add_argument(
        '--plugins',
        dest='plugin_places',
        type=csv_list,
        default=['plugins'],
        help='This is a comma separated list of directories where to find '
             'plugins. Each path can be an absolute, or a relative to the '
             'location of the crawler.py. Default is "plugins"',
    )
    parser.add_argument(
        '--numprocesses',
        dest='numprocesses',
        type=int,
        default=1,
        help='Number of processes used for container crawling. Defaults '
             'to the number of cores. NOT SUPPORTED.'
    )
    parser.add_argument(
        '--extraMetadata',
        dest='extraMetadata',
        type=json_parser,
        default={},
        help='Json with data to annotate all features. It can be used '
             'to append a set of system identifiers to the metadata feature '
             'and if the --extraMetadataForAll'
    )
    parser.add_argument(
        '--avoidSetns',
        dest='avoid_setns',
        action='store_true',
        default=False,
        help='Avoids the use of the setns() syscall to crawl containers. '
             'Some features like process will not work with this option. '
             'Only applies to the OUTCONTAINER mode'
    )

    args = parser.parse_args()
    misc.setup_logger('crawlutils', args.logfile)
    misc.setup_logger('yapsy', 'yapsy.log')

    options = args.options
    options['avoid_setns'] = args.avoid_setns
    options['mountpoint'] = args.mountpoint

    emitters = EmittersManager(urls=args.url,
                               format=args.format,
                               compress=args.compress,
                               extra_metadata=args.extraMetadata,
                               plugin_places=args.plugin_places)

    if args.crawlmode == 'OUTCONTAINER':
        crawler = ContainersCrawler(
            features=args.features,
            environment=args.environment,
            user_list=args.crawlContainers,
            host_namespace=args.namespace,
            plugin_places=args.plugin_places,
            options=options)
    elif args.crawlmode == 'INVM' or args.crawlmode == 'MOUNTPOINT':
        crawler = HostCrawler(
            features=args.features,
            namespace=args.namespace,
            plugin_places=args.plugin_places,
            options=options)
    elif args.crawlmode == 'OUTVM':
        crawler = VirtualMachinesCrawler(
            features=args.features,
            user_list=args.vm_descs_list,
            host_namespace=args.namespace,
            plugin_places=args.plugin_places,
            options=options)
    else:
        raise NotImplementedError('Invalid crawlmode')

    worker = Worker(emitters=emitters,
                    frequency=args.frequency,
                    crawler=crawler)

    try:
        worker.run()
    except KeyboardInterrupt:
        pass