Пример #1
0
    def run(self):
        utils.set_etcd_client(self, True)
        self.log.info('Parser server "{}" binded to queue on "{}:{}"'.format(
            self.cfg.name, self.cfg.host, self.cfg.port))

        request_prefix = self.cfg.etcd_format_key_request.format(
            name=self.cfg.name)
        response_prefix = self.cfg.etcd_format_key_response.format(
            name=self.cfg.name)

        while not self.gtfo:
            time_start = utils.time_ns()
            utils.set_etcd_client(self, False)
            messages = self.client.get_prefix(request_prefix)
            for message in messages:
                timestamp = utils.time_ns()
                request = nudnik.entity_pb2.Request()
                request.ParseFromString(message[0])
                response = utils.parse_request(self, request, timestamp)
                grpc_response = nudnik.entity_pb2.Response(**response)

                request_key = message[1].key
                response_key = request_key.replace(request_prefix,
                                                   response_prefix, 1)
                stat = nudnik.stats.Stat(request, grpc_response, timestamp)
                self.stats.append(stat)
                self.client.put(response_key,
                                grpc_response.SerializeToString())
                self.client.delete(request_key)

            elapsed = utils.diff_seconds(time_start, utils.time_ns())
            if elapsed < self.cfg.interval:
                self.event.wait(timeout=(self.cfg.interval - elapsed))
Пример #2
0
    def run(self):
        self.log.debug('Load thread {} started'.format(self.name))

        while not self.gtfo:
            time_start = utils.time_ns()

            for load in self.cfg.load_list:
                utils.generate_load(self.log, load, self.cfg.meta)

            elapsed = utils.diff_seconds(time_start, utils.time_ns())
            if elapsed < self.cfg.interval:
                self.event.wait(timeout=(self.cfg.interval - elapsed))
Пример #3
0
 def __init__(self, node):
     super(Metric, self).__init__(timestamp=utils.time_ns())
     self.node = node
     self.cpu = MetricCpu()
     self.mem = MetricMemory()
     self.disk = MetricDisk()
     self.net = MetricNet()
Пример #4
0
    def run(self):
        self.log.debug('Running {}'.format(self.name))
        mode = 'servermetrics' if self.cfg.server else 'clientmetrics'

        while not self.gtfo:
            time_start = utils.time_ns()

            metric = Metric(self.node)

            if self.cfg.debug:
                for strmetric in _parse_metrics(self.log, mode, [metric], self.cfg.metrics_format_stdout):
                    self.log.debug(strmetric)
            elif 'stdout' in self.cfg.metrics:
                for strmetric in _parse_metrics(self.log, mode, [metric], self.cfg.metrics_format_stdout):
                    self.log.info(strmetric)

            if 'file' in self.cfg.metrics:
                thread = MetricsFileOutput(self.log, self.cfg.metrics_file_path, mode, [metric], self.cfg.metrics_format_file)
                thread.start()
                self.workers.append(thread)
            if 'influxdb' in self.cfg.metrics:
                thread = MetricsInfluxdbOutput(self.log, self.cfg.influxdb_url_metrics, mode, [metric], self.cfg.metrics_format_influxdb)
                thread.start()
                self.workers.append(thread)
            if 'prometheus' in self.cfg.metrics:
                thread = MetricsPrometheusOutput(self.log, self.cfg.prometheus_url_metrics, mode, [metric], self.cfg.metrics_format_prometheus)
                thread.start()
                self.workers.append(thread)

            while len(self.workers) > 0:
                for index, thread in enumerate(self.workers):
                    if thread.is_alive():
                        thread.join(0.25)
                    else:
                        self.workers.pop(index)

            elapsed = utils.diff_seconds(time_start, utils.time_ns())
            if elapsed < self.cfg.metrics_interval:
                self.event.wait(timeout=(self.cfg.metrics_interval - elapsed))
Пример #5
0
    def set_grpc_client(self, force):
        resolved_elapsed = utils.diff_seconds(self.host_resolved_at,
                                              utils.time_ns())
        if resolved_elapsed < self.cfg.dns_ttl and force is False:
            return

        utils.resolv_host(self, True)
        self.client = None
        index = 0
        while self.client is None:
            try:
                self.client = ParserClient(self.host_address, self.cfg.port,
                                           self.cfg.timeout)
            except Exception as e:
                self.log.warn('Reinitializing gRPC client due to {}'.format(e))
                self.event.wait(timeout=((index * 100) / 1000))
                index += 1

        if self.cfg.vvv:
            self.log.debug('gRPC Client to {} initialized, {}'.format(
                self.host_address, self.client))
Пример #6
0
    def run(self):
        self.log.debug('Running {}'.format(self.name))
        mode = 'serverstats' if self.cfg.server else 'clientstats'

        while not self.gtfo:
            time_start = utils.time_ns()

            current_report = list(self.stats)
            current_report_length = len(current_report)
            if current_report_length > 0:
                if self.cfg.vvv:
                    self.log.debug('Reporting {}/{} items'.format(
                        current_report_length, len(self.stats)))

                if self.cfg.debug:
                    for stat in _parse_stats(
                            self.log, mode, current_report,
                            self.cfg.stats_format_stdout,
                            self.cfg.stats_format_retransmit_stdout):
                        self.log.debug(stat)
                elif 'stdout' in self.cfg.stats:
                    for stat in _parse_stats(
                            self.log, mode, current_report,
                            self.cfg.stats_format_stdout,
                            self.cfg.stats_format_retransmit_stdout):
                        self.log.info(stat)

                if 'file' in self.cfg.stats:
                    thread = FileStats(self.log, self.cfg.stats_file_path,
                                       mode, current_report,
                                       self.cfg.stats_format_file,
                                       self.cfg.stats_format_retransmit_file)
                    thread.start()
                    self.workers.append(thread)
                if 'influxdb' in self.cfg.stats:
                    thread = InfluxdbStats(
                        self.log, self.cfg.influxdb_url_stats, mode,
                        current_report, self.cfg.stats_format_influxdb,
                        self.cfg.stats_format_retransmit_influxdb)
                    thread.start()
                    self.workers.append(thread)
                if 'prometheus' in self.cfg.stats:
                    thread = PrometheusStats(
                        self.log, self.cfg.prometheus_url_stats, mode,
                        current_report, self.cfg.stats_format_prometheus,
                        self.cfg.stats_format_retransmit_prometheus)
                    thread.start()
                    self.workers.append(thread)

                for i in range(0, current_report_length):
                    if self.cfg.vvvvv:
                        self.log.debug('Popping {}/{} items'.format(
                            i, current_report_length))
                    self.stats.pop(0)

                while len(self.workers) > 0:
                    for index, thread in enumerate(self.workers):
                        if thread.is_alive():
                            thread.join(0.25)
                        else:
                            self.workers.pop(index)

            elif self.cfg.vvvv:
                self.log.debug('Nothing to report')

            elapsed = utils.diff_seconds(time_start, utils.time_ns())
            if elapsed < self.cfg.stats_interval:
                self.event.wait(timeout=(self.cfg.stats_interval - elapsed))
Пример #7
0
    def run(self):
        self.log.debug(
            'Stream {} started, sending {} messages per second'.format(
                self.name, (self.cfg.rate / float(self.cfg.interval))))

        sequence_id = 0

        active_workers = threading.Semaphore(self.cfg.workers)
        for worker_id in range(0, self.cfg.workers):
            active_workers.acquire()
            thread = MessageSender(self.cfg, self.log, self.stream_id,
                                   worker_id, active_workers, self.queue,
                                   self.stats)
            thread.daemon = True
            self.workers.append(thread)
            thread.start()

        # Wait for all workers to initialize clients
        for worker_id in range(0, self.cfg.workers):
            active_workers.acquire()

        while not self.gtfo:
            time_start = utils.time_ns()

            for index in range(0, self.cfg.rate):

                message_id = (sequence_id * self.cfg.rate) + index
                if (self.cfg.count > 0) and (message_id >= self.cfg.count):
                    self.exit()
                    return

                if self.cfg.protocol in ['grpc', 'etcd']:
                    request = nudnik.entity_pb2.Request(
                        name=self.cfg.name,
                        stream_id=self.stream_id,
                        sequence_id=sequence_id,
                        message_id=message_id,
                        ctime=utils.time_ns(),
                        load=self.cfg.load_list)
                else:
                    headers = dict()
                    for header in self.cfg.headers:
                        headers.update({str(header[0]): str(header[1])})
                    data = self.cfg.request_format.format(
                        name=self.cfg.name,
                        stream_id=self.stream_id,
                        sequence_id=sequence_id,
                        message_id=message_id,
                        ctime=utils.time_ns(),
                        load=self.cfg.load_list)

                    req = requests.Request(self.cfg.method,
                                           'http://place_holder',
                                           data=data,
                                           headers=headers)
                    request = req.prepare()
                    request.name = self.cfg.name
                    request.stream_id = self.stream_id
                    request.sequence_id = sequence_id
                    request.message_id = message_id
                    request.ctime = utils.time_ns()
                    request.load = self.cfg.load_list

                self.queue.put(request)

            if self.cfg.vvv:
                self.log.debug('Active workers/tasks: {}/{}'.format(
                    threading.active_count(), self.queue.qsize()))

            sequence_id += 1

            if self.cfg.chaos > 0 and random.randint(
                    0, self.cfg.cycle_per_hour) <= self.cfg.chaos:
                chaos_exception = utils.ChaosException(self.cfg.chaos_string)
                self.log.fatal(chaos_exception)
                self.exit()
                raise chaos_exception

            elapsed = utils.diff_seconds(time_start, utils.time_ns())
            if elapsed < self.cfg.interval:
                self.event.wait(timeout=(self.cfg.interval - elapsed))
Пример #8
0
    def run(self):
        if self.cfg.protocol == 'grpc':
            self.set_grpc_client(True)
        elif self.cfg.protocol == 'etcd':
            request_prefix = self.cfg.etcd_format_key_request.format(
                name=self.cfg.name)
            response_prefix = self.cfg.etcd_format_key_response.format(
                name=self.cfg.name)
            utils.set_etcd_client(self, True)

        self.active_workers.release()

        if self.cfg.vv and self.client:
            self.log.debug('MessageSender {} initiated'.format(self.name))

        while not self.gtfo:
            timestamp = None

            if self.cfg.protocol == 'grpc':
                self.set_grpc_client(False)
            elif self.cfg.protocol == 'etcd':
                utils.set_etcd_client(self, False)
            else:
                utils.resolv_host(self, False)

            request = None
            while request is None:
                if self.gtfo:
                    return
                try:
                    request = self.queue.get(block=True, timeout=0.2)
                except queue.Empty:
                    pass

            request.worker_id = self.worker_id

            if self.cfg.vvvvv:
                self.log.debug('Handling message_id {}'.format(
                    request.message_id))

            retry_count = 0
            try_count = 1 + self.cfg.retry_count
            send_was_successful = False
            while not self.gtfo and (not send_was_successful and
                                     ((self.cfg.retry_count < 0) or
                                      (try_count > 0))):
                request.stime = utils.time_ns()

                meta = self.cfg.meta.format(
                    req=request, node=nudnik.metrics.MetricNode(
                    )) if self.cfg.meta is not None else None
                request.meta = utils.get_meta(meta, self.cfg.meta_size)

                if getattr(request, 'load', None) is not None:
                    for load in request.load:
                        utils.generate_load(self.log, load, meta)

                response = None
                if self.cfg.protocol == 'grpc':
                    try:
                        response = self.client.get_response_for_request(
                            request)
                    except grpc._channel._Rendezvous as e:
                        resp = {'status_code': 500}
                        response = nudnik.entity_pb2.Response(**resp)
                        self.log.warn(
                            'Reinitializing gRPC client due to {}'.format(e))
                        self.set_grpc_client(True)

                elif self.cfg.protocol == 'etcd':
                    try:
                        if self.cfg.vvv:
                            self.log.debug('Etcd request: {}'.format(request))
                        key = '{}/{}/{}'.format(
                            self.cfg.etcd_format_key_request.format(
                                name=request.name), request.sequence_id,
                            request.message_id)
                        value = request.SerializeToString()
                        if self.cfg.vvvvv:
                            self.log.debug('Writing {} => {}'.format(
                                key, value))
                        self.client.put(key, value)
                        response_key = key.replace(request_prefix,
                                                   response_prefix, 1)
                        watch_id = self.client.add_watch_callback(
                            response_key, self.watch_callback_key_release)
                        self.lock.acquire()
                        if self.cfg.vvvvv:
                            self.log.debug(
                                'Waiting for response at "{}"'.format(
                                    response_key))
                        with self.lock:
                            self.client.cancel_watch(watch_id)
                            resp = self.client.get(response_key)
                            response = nudnik.entity_pb2.Response()
                            response.ParseFromString(resp[0])
                            self.client.delete(response_key)
                    except Exception as e:
                        resp = {'status_code': 500}
                        response = nudnik.entity_pb2.Response(**resp)
                        self.log.warn(
                            'Reinitializing Etcd client due to "{}"'.format(e))
                        utils.set_etcd_client(self, True)

                else:
                    try:
                        request.url = '{}://{}:{}{}'.format(
                            self.cfg.protocol, self.host_address,
                            self.cfg.port, self.cfg.path)
                        response = self.session.send(request)
                        if response.status_code >= 200 and response.status_code < 300:
                            response.status_code = 0
                    except Exception as e:
                        response = None
                        self.log.warn('Resending request due to {}'.format(e))
                        utils.resolv_host(self, True)

                if self.cfg.vvvvv:
                    self.log.debug(response)

                timestamp = utils.time_ns()

                send_was_successful = (
                    (response is not None) and (response.status_code == 0)
                    and (self.stats.get_fail_ratio() >= self.cfg.fail_ratio))

                if send_was_successful:
                    if self.cfg.vvvvv:
                        self.log.debug('Request was successful')
                    self.stats.add_success()
                    stat = nudnik.stats.Stat(request, response, timestamp)
                    self.stats.append(stat)
                else:
                    self.log.warn('Request was not successful')
                    self.stats.add_failure()
                    try_count -= 1
                    retry_count += 1
                    request.rtime = utils.time_ns()
                    request.rcount = retry_count

            if self.cfg.vv and timestamp is not None:
                total_rtt = utils.diff_seconds(request.ctime,
                                               timestamp) * self.cfg.rate
                if total_rtt > self.cfg.interval:
                    self.log.warn(
                        'Predicted total rtt {} for rate {} exceeds interval {}'
                        .format(total_rtt, self.cfg.rate, self.cfg.interval))

        self.log.debug('{} has left the building'.format(self))