class VoltaBox500Hz(VoltaBox): """ VoltaBox500Hz - works with plain-text 500hz box, grabs data and stores data to queue """ def __init__(self, config, core): VoltaBox.__init__(self, config, core) self.sample_rate = config.get_option('volta', 'sample_rate', 500) self.baud_rate = config.get_option('volta', 'baud_rate', 115200) # initialize data source self.source_opener.baud_rate = self.baud_rate self.source_opener.read_timeout = self.grab_timeout self.data_source = self.source_opener() logger.debug('Data source initialized: %s', self.data_source) self.my_metrics = {} def __create_my_metrics(self): self.my_metrics['currents'] = self.core.data_session.new_metric() def start_test(self, results): """ Grab stage - starts grabber thread and puts data to results queue +clean up dirty buffer pipeline read source data -> chop by samplerate w/ ratio -> make pandas DataFrame -> drain DataFrame to queue `results` """ logger.info('volta start test') self.grabber_q = results # clean up dirty buffer for _ in range(self.sample_rate): self.data_source.readline() logger.info('reader init?') self.reader = BoxPlainTextReader(self.data_source, self.sample_rate) logger.info('reader init!') self.pipeline = Drain( TimeChopper(self.reader, self.sample_rate, self.chop_ratio), self.my_metrics['currents']) logger.info('Starting grab thread...') self.pipeline.start() logger.debug('Waiting grabber thread finish...') def end_test(self): self.reader.close() self.pipeline.close() self.pipeline.join(10) self.data_source.close() def get_info(self): data = {} if self.pipeline: data['grabber_alive'] = self.pipeline.isAlive() if self.grabber_q: data['grabber_queue_size'] = self.grabber_q.qsize() return data
def test_interrupt_and_wait(self): """ Test we can interrupt the drain """ source = range(1000000) destination = Queue() drain = Drain(source, destination) drain.start() drain.join() assert destination.qsize() == 1000000
class Plugin(AbstractPlugin, AggregateResultListener, MonitoringDataListener): RC_STOP_FROM_WEB = 8 VERSION = '3.0' SECTION = 'uploader' def __init__(self, core, cfg, name): AbstractPlugin.__init__(self, core, cfg, name) self.data_queue = Queue() self.monitoring_queue = Queue() if self.core.error_log: self.events_queue = Queue() self.events_reader = EventsReader(self.core.error_log) self.events_processing = Drain(self.events_reader, self.events_queue) self.add_cleanup(self.stop_events_processing) self.events_processing.start() self.events = threading.Thread(target=self.__events_uploader) self.events.daemon = True self.retcode = -1 self._target = None self.task_name = '' self.token_file = None self.version_tested = None self.send_status_period = 10 self.status_sender = threading.Thread(target=self.__send_status) self.status_sender.daemon = True self.upload = threading.Thread(target=self.__data_uploader) self.upload.daemon = True self.monitoring = threading.Thread(target=self.__monitoring_uploader) self.monitoring.daemon = True self._is_telegraf = None self.backend_type = BackendTypes.identify_backend(self.cfg['api_address'], self.cfg_section_name) self._task = None self._api_token = '' self._lp_job = None self._lock_duration = None self._info = None self.locked_targets = [] self.web_link = None self.finished = False def set_option(self, option, value): self.cfg.setdefault('meta', {})[option] = value @staticmethod def get_key(): return __file__ @property def lock_duration(self): if self._lock_duration is None: info = self.get_generator_info() self._lock_duration = info.duration if info.duration else \ expand_to_seconds(self.get_option("target_lock_duration")) return self._lock_duration def get_available_options(self): opts = [ "api_address", "writer_endpoint", "task", "job_name", "job_dsc", "notify", "ver", "component", "operator", "jobno_file", "ignore_target_lock", "target_lock_duration", "lock_targets", "jobno", "upload_token", 'connection_timeout', 'network_attempts', 'api_attempts', 'maintenance_attempts', 'network_timeout', 'api_timeout', 'maintenance_timeout', 'strict_lock', 'send_status_period', 'log_data_requests', 'log_monitoring_requests', 'log_status_requests', 'log_other_requests', 'threads_timeout', 'chunk_size' ] return opts def configure(self): pass def check_task_is_open(self): if self.backend_type == BackendTypes.OVERLOAD: return TASK_TIP = 'The task should be connected to Lunapark.' \ 'Open startrek task page, click "actions" -> "load testing".' logger.debug("Check if task %s is open", self.task) try: task_data = self.lp_job.get_task_data(self.task)[0] try: task_status = task_data['status'] if task_status == 'Open': logger.info("Task %s is ok", self.task) self.task_name = str(task_data['name']) else: logger.info("Task %s:" % self.task) logger.info(task_data) raise RuntimeError("Task is not open") except KeyError: try: error = task_data['error'] raise RuntimeError( "Task %s error: %s\n%s" % (self.task, error, TASK_TIP)) except KeyError: raise RuntimeError( 'Unknown task data format:\n{}'.format(task_data)) except requests.exceptions.HTTPError as ex: logger.error( "Failed to check task status for '%s': %s", self.task, ex) if ex.response.status_code == 404: raise RuntimeError("Task not found: %s\n%s" % (self.task, TASK_TIP)) elif ex.response.status_code == 500 or ex.response.status_code == 400: raise RuntimeError( "Unable to check task staus, id: %s, error code: %s" % (self.task, ex.response.status_code)) raise ex @staticmethod def search_task_from_cwd(cwd): issue = re.compile("^([A-Za-z]+-[0-9]+)(-.*)?") while cwd: logger.debug("Checking if dir is named like JIRA issue: %s", cwd) if issue.match(os.path.basename(cwd)): res = re.search(issue, os.path.basename(cwd)) return res.group(1).upper() newdir = os.path.abspath(os.path.join(cwd, os.path.pardir)) if newdir == cwd: break else: cwd = newdir raise RuntimeError( "task=dir requested, but no JIRA issue name in cwd: %s" % os.getcwd()) def prepare_test(self): info = self.get_generator_info() port = info.port instances = info.instances if info.ammo_file is not None: if info.ammo_file.startswith( "http://") or info.ammo_file.startswith("https://"): ammo_path = info.ammo_file else: ammo_path = os.path.realpath(info.ammo_file) else: logger.warning('Failed to get info about ammo path') ammo_path = 'Undefined' loop_count = int(info.loop_count) try: lp_job = self.lp_job self.add_cleanup(self.unlock_targets) self.locked_targets = self.check_and_lock_targets(strict=self.get_option('strict_lock'), ignore=self.get_option('ignore_target_lock')) self.add_cleanup(self.close_job) if lp_job._number: self.make_symlink(lp_job._number) self.check_task_is_open() else: self.check_task_is_open() lp_job.create() self.make_symlink(lp_job.number) self.publish('job_no', lp_job.number) except (APIClient.JobNotCreated, APIClient.NotAvailable, APIClient.NetworkError) as e: logger.error(e.message) logger.error( 'Failed to connect to Lunapark, disabling DataUploader') self.start_test = lambda *a, **kw: None self.post_process = lambda *a, **kw: None self.on_aggregated_data = lambda *a, **kw: None self.monitoring_data = lambda *a, **kw: None return cmdline = ' '.join(sys.argv) lp_job.edit_metainfo( instances=instances, ammo_path=ammo_path, loop_count=loop_count, regression_component=self.get_option("component"), cmdline=cmdline, ) self.core.job.subscribe_plugin(self) try: console = self.core.get_plugin_of_type(ConsolePlugin) except KeyError: logger.debug("Console plugin not found", exc_info=True) console = None if console: console.add_info_widget(JobInfoWidget(self)) self.set_option('target_host', self.target) self.set_option('target_port', port) self.set_option('cmdline', cmdline) self.set_option('ammo_path', ammo_path) self.set_option('loop_count', loop_count) self.__save_conf() def start_test(self): self.add_cleanup(self.join_threads) self.status_sender.start() self.upload.start() self.monitoring.start() if self.core.error_log: self.events.start() self.web_link = urljoin(self.lp_job.api_client.base_url, str(self.lp_job.number)) logger.info("Web link: %s", self.web_link) self.publish("jobno", self.lp_job.number) self.publish("web_link", self.web_link) jobno_file = self.get_option("jobno_file", '') if jobno_file: logger.debug("Saving jobno to: %s", jobno_file) with open(jobno_file, 'w') as fdes: fdes.write(str(self.lp_job.number)) self.core.add_artifact_file(jobno_file) self.__save_conf() def is_test_finished(self): return self.retcode def end_test(self, retcode): if retcode != 0: self.lp_job.interrupted.set() self.__save_conf() self.unlock_targets() return retcode def close_job(self): self.lp_job.close(self.retcode) def join_threads(self): self.lp_job.interrupted.set() if self.monitoring.is_alive(): self.monitoring.join() if self.upload.is_alive(): self.upload.join() def stop_events_processing(self): self.events_queue.put(None) self.events_reader.close() self.events_processing.close() if self.events_processing.is_alive(): self.events_processing.join() if self.events.is_alive(): self.lp_job.interrupted.set() self.events.join() def post_process(self, rc): self.retcode = rc self.monitoring_queue.put(None) self.data_queue.put(None) if self.core.error_log: self.events_queue.put(None) self.events_reader.close() self.events_processing.close() self.events.join() logger.info("Waiting for sender threads to join.") if self.monitoring.is_alive(): self.monitoring.join() if self.upload.is_alive(): self.upload.join() self.finished = True logger.info( "Web link: %s", self.web_link) autostop = None try: autostop = self.core.get_plugin_of_type(AutostopPlugin) except KeyError: logger.debug("No autostop plugin loaded", exc_info=True) if autostop and autostop.cause_criterion: rps = 0 if autostop.cause_criterion.cause_second: rps = autostop.cause_criterion.cause_second[ 1]["metrics"]["reqps"] if not rps: rps = autostop.cause_criterion.cause_second[0][ "overall"]["interval_real"]["len"] self.lp_job.set_imbalance_and_dsc( int(rps), autostop.cause_criterion.explain()) else: logger.debug("No autostop cause detected") self.__save_conf() return rc def on_aggregated_data(self, data, stats): """ @data: aggregated data @stats: stats about gun """ if not self.lp_job.interrupted.is_set(): self.data_queue.put((data, stats)) def monitoring_data(self, data_list): if not self.lp_job.interrupted.is_set(): if len(data_list) > 0: [self.monitoring_queue.put(chunk) for chunk in chop(data_list, self.get_option("chunk_size"))] def __send_status(self): logger.info('Status sender thread started') lp_job = self.lp_job while not lp_job.interrupted.is_set(): try: self.lp_job.send_status(self.core.status) time.sleep(self.get_option('send_status_period')) except (APIClient.NetworkError, APIClient.NotAvailable) as e: logger.warn('Failed to send status') logger.debug(e.message) break except APIClient.StoppedFromOnline: logger.info("Test stopped from Lunapark") self.retcode = self.RC_STOP_FROM_WEB break if self.finished: break logger.info("Closing Status sender thread") def __uploader(self, queue, sender_method, name='Uploader'): logger.info('{} thread started'.format(name)) while not self.lp_job.interrupted.is_set(): try: entry = queue.get(timeout=1) if entry is None: logger.info("{} queue returned None".format(name)) break sender_method(entry) except Empty: continue except APIClient.StoppedFromOnline: logger.warning("Lunapark is rejecting {} data".format(name)) break except (APIClient.NetworkError, APIClient.NotAvailable, APIClient.UnderMaintenance) as e: logger.warn('Failed to push {} data'.format(name)) logger.warn(e.message) self.lp_job.interrupted.set() except Exception: exc_type, exc_value, exc_traceback = sys.exc_info() logger.error("Mysterious exception:\n%s\n%s\n%s", (exc_type, exc_value, exc_traceback)) break # purge queue while not queue.empty(): if queue.get_nowait() is None: break logger.info("Closing {} thread".format(name)) def __data_uploader(self): self.__uploader(self.data_queue, lambda entry: self.lp_job.push_test_data(*entry), 'Data Uploader') def __monitoring_uploader(self): self.__uploader(self.monitoring_queue, self.lp_job.push_monitoring_data, 'Monitoring Uploader') def __events_uploader(self): self.__uploader(self.events_queue, self.lp_job.push_events_data, 'Events Uploader') # TODO: why we do it here? should be in core def __save_conf(self): for requisites, content in self.core.artifacts_to_send: self.lp_job.send_config(requisites, content) def parse_lock_targets(self): # prepare target lock list locks_list_cfg = self.get_option('lock_targets', 'auto') def no_target(): logging.warn("Target lock set to 'auto', but no target info available") return {} locks_set = {self.target} or no_target() if locks_list_cfg == 'auto' else set(locks_list_cfg) targets_to_lock = [host for host in locks_set if host] return targets_to_lock def lock_targets(self, targets_to_lock, ignore, strict): locked_targets = [target for target in targets_to_lock if self.lp_job.lock_target(target, self.lock_duration, ignore, strict)] return locked_targets def unlock_targets(self): logger.info("Unlocking targets: %s", self.locked_targets) for target in self.locked_targets: logger.info(target) self.lp_job.api_client.unlock_target(target) def check_and_lock_targets(self, strict, ignore): targets_list = self.parse_lock_targets() logger.info('Locking targets: %s', targets_list) locked_targets = self.lock_targets(targets_list, ignore=ignore, strict=strict) logger.info('Locked targets: %s', locked_targets) return locked_targets def make_symlink(self, name): PLUGIN_DIR = os.path.join(self.core.artifacts_base_dir, 'lunapark') if not os.path.exists(PLUGIN_DIR): os.makedirs(PLUGIN_DIR) try: os.symlink( os.path.relpath( self.core.artifacts_dir, PLUGIN_DIR), os.path.join( PLUGIN_DIR, str(name))) # this exception catch for filesystems w/o symlinks except OSError: logger.warning('Unable to create symlink for artifact: %s', name) def _get_user_agent(self): plugin_agent = 'Uploader/{}'.format(self.VERSION) return ' '.join((plugin_agent, self.core.get_user_agent())) def __get_operator(self): try: return self.get_option( 'operator') or pwd.getpwuid( os.geteuid())[0] except: # noqa: E722 logger.error( "Couldn't get username from the OS. Please, set the 'meta.operator' option explicitly in your config " "file.") raise def __get_api_client(self): logging.info('Using {} backend'.format(self.backend_type)) if self.backend_type == BackendTypes.LUNAPARK: client = APIClient self._api_token = None elif self.backend_type == BackendTypes.OVERLOAD: client = OverloadClient self._api_token = self.read_token(self.get_option("token_file")) else: raise RuntimeError("Backend type doesn't match any of the expected") return client(base_url=self.get_option('api_address'), writer_url=self.get_option('writer_endpoint'), network_attempts=self.get_option('network_attempts'), api_attempts=self.get_option('api_attempts'), maintenance_attempts=self.get_option('maintenance_attempts'), network_timeout=self.get_option('network_timeout'), api_timeout=self.get_option('api_timeout'), maintenance_timeout=self.get_option('maintenance_timeout'), connection_timeout=self.get_option('connection_timeout'), user_agent=self._get_user_agent(), api_token=self.api_token, core_interrupted=self.interrupted) @property def lp_job(self): """ :rtype: LPJob """ if self._lp_job is None: self._lp_job = self.__get_lp_job() self.core.publish(self.SECTION, 'job_no', self._lp_job.number) self.core.publish(self.SECTION, 'web_link', self._lp_job.web_link) return self._lp_job def __get_lp_job(self): """ :rtype: LPJob """ api_client = self.__get_api_client() info = self.get_generator_info() port = info.port loadscheme = [] if isinstance(info.rps_schedule, (str, dict)) else info.rps_schedule lp_job = LPJob(client=api_client, target_host=self.target, target_port=port, number=self.cfg.get('jobno', None), token=self.get_option('upload_token'), person=self.__get_operator(), task=self.task, name=self.get_option('job_name', 'none'), description=self.get_option('job_dsc'), tank=self.core.job.tank, notify_list=self.get_option("notify"), load_scheme=loadscheme, version=self.get_option('ver'), log_data_requests=self.get_option('log_data_requests'), log_monitoring_requests=self.get_option('log_monitoring_requests'), log_status_requests=self.get_option('log_status_requests'), log_other_requests=self.get_option('log_other_requests')) lp_job.send_config(LPRequisites.CONFIGINITIAL, yaml.dump(self.core.configinitial)) return lp_job @property def task(self): if self._task is None: task = self.get_option('task') if task == 'dir': task = self.search_task_from_cwd(os.getcwd()) self._task = task return self._task @property def api_token(self): if self._api_token == '': if self.backend_type == BackendTypes.LUNAPARK: self._api_token = None elif self.backend_type == BackendTypes.OVERLOAD: self._api_token = self.read_token(self.get_option("token_file", "")) else: raise RuntimeError("Backend type doesn't match any of the expected") return self._api_token @staticmethod def read_token(filename): if filename: logger.debug("Trying to read token from %s", filename) try: with open(filename, 'r') as handle: data = handle.read().strip() logger.info( "Read authentication token from %s, " "token length is %d bytes", filename, len(str(data))) except IOError: logger.error( "Failed to read Overload API token from %s", filename) logger.info( "Get your Overload API token from https://overload.yandex.net and provide it via 'overload.token_file' parameter" ) raise RuntimeError("API token error") return data else: logger.error("Overload API token filename is not defined") logger.info( "Get your Overload API token from https://overload.yandex.net and provide it via 'overload.token_file' parameter" ) raise RuntimeError("API token error") def get_generator_info(self): return self.core.job.generator_plugin.get_info() @property def target(self): if self._target is None: self._target = self.get_generator_info().address logger.info("Detected target: %s", self.target) return self._target
class VoltaBoxBinary(VoltaBox): """ VoltaBoxBinary - works with binary box, grabs data and stores data to queue """ def __init__(self, config, core): VoltaBox.__init__(self, config, core) self.sample_rate = config.get_option('volta', 'sample_rate', 10000) self.baud_rate = config.get_option('volta', 'baud_rate', 230400) self.source_opener.baud_rate = self.baud_rate self.source_opener.read_timeout = self.grab_timeout self.data_source = self.source_opener() logger.debug('Data source initialized: %s', self.data_source) self.my_metrics = {} self.__create_my_metrics() def __create_my_metrics(self): self.my_metrics['current'] = self.core.data_session.new_metric({ 'type': 'metrics', 'name': 'current', 'source': 'voltabox' }) def start_test(self, results): """ Grab stage - starts grabber thread and puts data to results queue + handshake w/ device, get samplerate pipeline read source data -> chop by samplerate w/ ratio -> make pandas DataFrame -> drain DataFrame to queue `results` Args: results: object answers to put() and get() methods """ self.grabber_q = results # handshake logger.info('Awaiting handshake') while self.data_source.readline() != "VOLTAHELLO\n": pass volta_spec = json.loads(self.data_source.readline()) self.sample_rate = volta_spec["sps"] logger.info('Sample rate handshake success: %s', self.sample_rate) while self.data_source.readline() != "DATASTART\n": pass self.reader = BoxBinaryReader(self.data_source, self.sample_rate, self.slope, self.offset, self.power_voltage, self.precision, sample_swap=self.sample_swap) self.pipeline = Drain( TimeChopper(self.reader, self.sample_rate, self.chop_ratio), self.my_metrics['current']) logger.info('Starting grab thread...') self.pipeline.start() logger.debug('Waiting grabber thread finish...') def end_test(self): try: self.reader.close() except AttributeError: logger.warn( 'VoltaBox has no Reader. Seems like VoltaBox initialization failed' ) logger.debug( 'VoltaBox has no Reader. Seems like VoltaBox initialization failed', exc_info=True) try: self.pipeline.close() except AttributeError: logger.warn( 'VoltaBox has no Pipeline. Seems like VoltaBox initialization failed' ) else: self.pipeline.join(10) self.data_source.close() def get_info(self): data = {} if self.pipeline: data['grabber_alive'] = self.pipeline.isAlive() if self.grabber_q: data['grabber_queue_size'] = self.grabber_q.qsize() return data
class Plugin(AbstractPlugin, AggregateResultListener, MonitoringDataListener): RC_STOP_FROM_WEB = 8 VERSION = '3.0' SECTION = 'uploader' def __init__(self, core, cfg, name): AbstractPlugin.__init__(self, core, cfg, name) self.data_queue = Queue() self.monitoring_queue = Queue() if self.core.error_log: self.events_queue = Queue() self.events_reader = EventsReader(self.core.error_log) self.events_processing = Drain(self.events_reader, self.events_queue) self.add_cleanup(self.stop_events_processing) self.events_processing.start() self.events = threading.Thread(target=self.__events_uploader) self.events.daemon = True self.retcode = -1 self._target = None self.task_name = '' self.token_file = None self.version_tested = None self.send_status_period = 10 self.status_sender = threading.Thread(target=self.__send_status) self.status_sender.daemon = True self.upload = threading.Thread(target=self.__data_uploader) self.upload.daemon = True self.monitoring = threading.Thread(target=self.__monitoring_uploader) self.monitoring.daemon = True self._is_telegraf = None self.backend_type = BackendTypes.identify_backend( self.cfg['api_address'], self.cfg_section_name) self._task = None self._api_token = '' self._lp_job = None self._lock_duration = None self._info = None self.locked_targets = [] self.web_link = None self.finished = False def set_option(self, option, value): self.cfg.setdefault('meta', {})[option] = value self.core.publish(self.SECTION, 'meta.{}'.format(option), value) @staticmethod def get_key(): return __file__ @property def lock_duration(self): if self._lock_duration is None: info = self.get_generator_info() self._lock_duration = info.duration if info.duration else \ expand_to_seconds(self.get_option("target_lock_duration")) return self._lock_duration def get_available_options(self): opts = [ "api_address", "writer_endpoint", "task", "job_name", "job_dsc", "notify", "ver", "component", "operator", "jobno_file", "ignore_target_lock", "target_lock_duration", "lock_targets", "jobno", "upload_token", 'connection_timeout', 'network_attempts', 'api_attempts', 'maintenance_attempts', 'network_timeout', 'api_timeout', 'maintenance_timeout', 'strict_lock', 'send_status_period', 'log_data_requests', 'log_monitoring_requests', 'log_status_requests', 'log_other_requests', 'threads_timeout', 'chunk_size' ] return opts def configure(self): pass def check_task_is_open(self): if self.backend_type == BackendTypes.OVERLOAD: return TASK_TIP = 'The task should be connected to Lunapark.' \ 'Open startrek task page, click "actions" -> "load testing".' logger.debug("Check if task %s is open", self.task) try: task_data = self.lp_job.get_task_data(self.task)[0] try: task_status = task_data['status'] if task_status == 'Open': logger.info("Task %s is ok", self.task) self.task_name = str(task_data['name']) else: logger.info("Task %s:" % self.task) logger.info(task_data) raise RuntimeError("Task is not open") except KeyError: try: error = task_data['error'] raise RuntimeError("Task %s error: %s\n%s" % (self.task, error, TASK_TIP)) except KeyError: raise RuntimeError( 'Unknown task data format:\n{}'.format(task_data)) except requests.exceptions.HTTPError as ex: logger.error("Failed to check task status for '%s': %s", self.task, ex) if ex.response.status_code == 404: raise RuntimeError("Task not found: %s\n%s" % (self.task, TASK_TIP)) elif ex.response.status_code == 500 or ex.response.status_code == 400: raise RuntimeError( "Unable to check task staus, id: %s, error code: %s" % (self.task, ex.response.status_code)) raise ex @staticmethod def search_task_from_cwd(cwd): issue = re.compile("^([A-Za-z]+-[0-9]+)(-.*)?") while cwd: logger.debug("Checking if dir is named like JIRA issue: %s", cwd) if issue.match(os.path.basename(cwd)): res = re.search(issue, os.path.basename(cwd)) return res.group(1).upper() newdir = os.path.abspath(os.path.join(cwd, os.path.pardir)) if newdir == cwd: break else: cwd = newdir raise RuntimeError( "task=dir requested, but no JIRA issue name in cwd: %s" % os.getcwd()) def prepare_test(self): info = self.get_generator_info() port = info.port instances = info.instances if info.ammo_file is not None: if info.ammo_file.startswith( "http://") or info.ammo_file.startswith("https://"): ammo_path = info.ammo_file else: ammo_path = os.path.realpath(info.ammo_file) else: logger.warning('Failed to get info about ammo path') ammo_path = 'Undefined' loop_count = int(info.loop_count) try: lp_job = self.lp_job self.add_cleanup(self.unlock_targets) self.locked_targets = self.check_and_lock_targets( strict=self.get_option('strict_lock'), ignore=self.get_option('ignore_target_lock')) self.add_cleanup(self.close_job) if lp_job._number: self.make_symlink(lp_job._number) self.check_task_is_open() else: self.check_task_is_open() lp_job.create() self.make_symlink(lp_job.number) self.publish('job_no', lp_job.number) except (APIClient.JobNotCreated, APIClient.NotAvailable, APIClient.NetworkError) as e: logger.error(e.message) logger.error( 'Failed to connect to Lunapark, disabling DataUploader') self.start_test = lambda *a, **kw: None self.post_process = lambda *a, **kw: None self.on_aggregated_data = lambda *a, **kw: None self.monitoring_data = lambda *a, **kw: None return cmdline = ' '.join(sys.argv) lp_job.edit_metainfo( instances=instances, ammo_path=ammo_path, loop_count=loop_count, regression_component=self.get_option("component"), cmdline=cmdline, ) self.core.job.subscribe_plugin(self) try: console = self.core.get_plugin_of_type(ConsolePlugin) except KeyError: logger.debug("Console plugin not found", exc_info=True) console = None if console: console.add_info_widget(JobInfoWidget(self)) self.set_option('target_host', self.target) self.set_option('target_port', port) self.set_option('cmdline', cmdline) self.set_option('ammo_path', ammo_path) self.set_option('loop_count', loop_count) self.__save_conf() def start_test(self): self.add_cleanup(self.join_threads) self.status_sender.start() self.upload.start() self.monitoring.start() if self.core.error_log: self.events.start() self.web_link = urljoin(self.lp_job.api_client.base_url, str(self.lp_job.number)) logger.info("Web link: %s", self.web_link) self.publish("jobno", self.lp_job.number) self.publish("web_link", self.web_link) jobno_file = self.get_option("jobno_file", '') if jobno_file: logger.debug("Saving jobno to: %s", jobno_file) with open(jobno_file, 'w') as fdes: fdes.write(str(self.lp_job.number)) self.core.add_artifact_file(jobno_file) self.__save_conf() def is_test_finished(self): return self.retcode def end_test(self, retcode): if retcode != 0: self.lp_job.interrupted.set() self.__save_conf() self.unlock_targets() return retcode def close_job(self): self.lp_job.close(self.retcode) def join_threads(self): self.lp_job.interrupted.set() if self.monitoring.is_alive(): self.monitoring.join() if self.upload.is_alive(): self.upload.join() def stop_events_processing(self): self.events_queue.put(None) self.events_reader.close() self.events_processing.close() if self.events_processing.is_alive(): self.events_processing.join() if self.events.is_alive(): self.lp_job.interrupted.set() self.events.join() def post_process(self, rc): self.retcode = rc self.monitoring_queue.put(None) self.data_queue.put(None) if self.core.error_log: self.events_queue.put(None) self.events_reader.close() self.events_processing.close() self.events.join() logger.info("Waiting for sender threads to join.") if self.monitoring.is_alive(): self.monitoring.join() if self.upload.is_alive(): self.upload.join() self.finished = True logger.info("Web link: %s", self.web_link) autostop = None try: autostop = self.core.get_plugin_of_type(AutostopPlugin) except KeyError: logger.debug("No autostop plugin loaded", exc_info=True) if autostop and autostop.cause_criterion: rps = 0 if autostop.cause_criterion.cause_second: rps = autostop.cause_criterion.cause_second[1]["metrics"][ "reqps"] if not rps: rps = autostop.cause_criterion.cause_second[0]["overall"][ "interval_real"]["len"] self.lp_job.set_imbalance_and_dsc( int(rps), autostop.cause_criterion.explain()) else: logger.debug("No autostop cause detected") self.__save_conf() return rc def on_aggregated_data(self, data, stats): """ @data: aggregated data @stats: stats about gun """ if not self.lp_job.interrupted.is_set(): self.data_queue.put((data, stats)) def monitoring_data(self, data_list): if not self.lp_job.interrupted.is_set(): if len(data_list) > 0: [ self.monitoring_queue.put(chunk) for chunk in chop(data_list, self.get_option("chunk_size")) ] def __send_status(self): logger.info('Status sender thread started') lp_job = self.lp_job while not lp_job.interrupted.is_set(): try: self.lp_job.send_status(self.core.status) time.sleep(self.get_option('send_status_period')) except (APIClient.NetworkError, APIClient.NotAvailable) as e: logger.warn('Failed to send status') logger.debug(e.message) break except APIClient.StoppedFromOnline: logger.info("Test stopped from Lunapark") self.retcode = self.RC_STOP_FROM_WEB break if self.finished: break logger.info("Closing Status sender thread") def __uploader(self, queue, sender_method, name='Uploader'): logger.info('{} thread started'.format(name)) while not self.lp_job.interrupted.is_set(): try: entry = queue.get(timeout=1) if entry is None: logger.info("{} queue returned None".format(name)) break sender_method(entry) except Empty: continue except APIClient.StoppedFromOnline: logger.warning("Lunapark is rejecting {} data".format(name)) break except (APIClient.NetworkError, APIClient.NotAvailable, APIClient.UnderMaintenance) as e: logger.warn('Failed to push {} data'.format(name)) logger.warn(e.message) self.lp_job.interrupted.set() except Exception: exc_type, exc_value, exc_traceback = sys.exc_info() logger.error("Mysterious exception:\n%s\n%s\n%s", (exc_type, exc_value, exc_traceback)) break # purge queue while not queue.empty(): if queue.get_nowait() is None: break logger.info("Closing {} thread".format(name)) def __data_uploader(self): self.__uploader(self.data_queue, lambda entry: self.lp_job.push_test_data(*entry), 'Data Uploader') def __monitoring_uploader(self): self.__uploader(self.monitoring_queue, self.lp_job.push_monitoring_data, 'Monitoring Uploader') def __events_uploader(self): self.__uploader(self.events_queue, self.lp_job.push_events_data, 'Events Uploader') # TODO: why we do it here? should be in core def __save_conf(self): for requisites, content in self.core.artifacts_to_send: self.lp_job.send_config(requisites, content) def parse_lock_targets(self): # prepare target lock list locks_list_cfg = self.get_option('lock_targets', 'auto') def no_target(): logging.warn( "Target lock set to 'auto', but no target info available") return {} locks_set = { self.target } or no_target() if locks_list_cfg == 'auto' else set(locks_list_cfg) targets_to_lock = [host for host in locks_set if host] return targets_to_lock def lock_targets(self, targets_to_lock, ignore, strict): locked_targets = [ target for target in targets_to_lock if self.lp_job.lock_target( target, self.lock_duration, ignore, strict) ] return locked_targets def unlock_targets(self): logger.info("Unlocking targets: %s", self.locked_targets) for target in self.locked_targets: logger.info(target) self.lp_job.api_client.unlock_target(target) def check_and_lock_targets(self, strict, ignore): targets_list = self.parse_lock_targets() logger.info('Locking targets: %s', targets_list) locked_targets = self.lock_targets(targets_list, ignore=ignore, strict=strict) logger.info('Locked targets: %s', locked_targets) return locked_targets def make_symlink(self, name): PLUGIN_DIR = os.path.join(self.core.artifacts_base_dir, 'lunapark') if not os.path.exists(PLUGIN_DIR): os.makedirs(PLUGIN_DIR) try: os.symlink(os.path.relpath(self.core.artifacts_dir, PLUGIN_DIR), os.path.join(PLUGIN_DIR, str(name))) # this exception catch for filesystems w/o symlinks except OSError: logger.warning('Unable to create symlink for artifact: %s', name) def _get_user_agent(self): plugin_agent = 'Uploader/{}'.format(self.VERSION) return ' '.join((plugin_agent, self.core.get_user_agent())) def __get_operator(self): try: return self.get_option('operator') or pwd.getpwuid(os.geteuid())[0] except: # noqa: E722 logger.error( "Couldn't get username from the OS. Please, set the 'meta.operator' option explicitly in your config " "file.") raise def __get_api_client(self): logging.info('Using {} backend'.format(self.backend_type)) if self.backend_type == BackendTypes.LUNAPARK: client = APIClient self._api_token = None elif self.backend_type == BackendTypes.OVERLOAD: client = OverloadClient self._api_token = self.read_token(self.get_option("token_file")) else: raise RuntimeError( "Backend type doesn't match any of the expected") return client( base_url=self.get_option('api_address'), writer_url=self.get_option('writer_endpoint'), network_attempts=self.get_option('network_attempts'), api_attempts=self.get_option('api_attempts'), maintenance_attempts=self.get_option('maintenance_attempts'), network_timeout=self.get_option('network_timeout'), api_timeout=self.get_option('api_timeout'), maintenance_timeout=self.get_option('maintenance_timeout'), connection_timeout=self.get_option('connection_timeout'), user_agent=self._get_user_agent(), api_token=self.api_token, core_interrupted=self.interrupted) @property def lp_job(self): """ :rtype: LPJob """ if self._lp_job is None: self._lp_job = self.__get_lp_job() self.core.publish(self.SECTION, 'job_no', self._lp_job.number) self.core.publish(self.SECTION, 'web_link', self._lp_job.web_link) self.core.publish(self.SECTION, 'job_name', self._lp_job.name) self.core.publish(self.SECTION, 'job_dsc', self._lp_job.description) self.core.publish(self.SECTION, 'person', self._lp_job.person) self.core.publish(self.SECTION, 'task', self._lp_job.task) self.core.publish(self.SECTION, 'version', self._lp_job.version) return self._lp_job def __get_lp_job(self): """ :rtype: LPJob """ api_client = self.__get_api_client() info = self.get_generator_info() port = info.port loadscheme = [] if isinstance(info.rps_schedule, (str, dict)) else info.rps_schedule lp_job = LPJob( client=api_client, target_host=self.target, target_port=port, number=self.cfg.get('jobno', None), token=self.get_option('upload_token'), person=self.__get_operator(), task=self.task, name=self.get_option('job_name', 'none'), description=self.get_option('job_dsc'), tank=self.core.job.tank, notify_list=self.get_option("notify"), load_scheme=loadscheme, version=self.get_option('ver'), log_data_requests=self.get_option('log_data_requests'), log_monitoring_requests=self.get_option('log_monitoring_requests'), log_status_requests=self.get_option('log_status_requests'), log_other_requests=self.get_option('log_other_requests')) lp_job.send_config(LPRequisites.CONFIGINITIAL, yaml.dump(self.core.configinitial)) return lp_job @property def task(self): if self._task is None: task = self.get_option('task') if task == 'dir': task = self.search_task_from_cwd(os.getcwd()) self._task = task return self._task @property def api_token(self): if self._api_token == '': if self.backend_type == BackendTypes.LUNAPARK: self._api_token = None elif self.backend_type == BackendTypes.OVERLOAD: self._api_token = self.read_token( self.get_option("token_file", "")) else: raise RuntimeError( "Backend type doesn't match any of the expected") return self._api_token @staticmethod def read_token(filename): if filename: logger.debug("Trying to read token from %s", filename) try: with open(filename, 'r') as handle: data = handle.read().strip() logger.info( "Read authentication token from %s, " "token length is %d bytes", filename, len(str(data))) except IOError: logger.error("Failed to read Overload API token from %s", filename) logger.info( "Get your Overload API token from https://overload.yandex.net and provide it via 'overload.token_file' parameter" ) raise RuntimeError("API token error") return data else: logger.error("Overload API token filename is not defined") logger.info( "Get your Overload API token from https://overload.yandex.net and provide it via 'overload.token_file' parameter" ) raise RuntimeError("API token error") def get_generator_info(self): return self.core.job.generator_plugin.get_info() @property def target(self): if self._target is None: self._target = self.get_generator_info().address logger.info("Detected target: %s", self.target) return self._target
class TankAggregator(object): """ Plugin that manages aggregation and stats collection """ SECTION = 'aggregator' @staticmethod def get_key(): return __file__ def __init__(self, generator): # AbstractPlugin.__init__(self, core, cfg) """ :type generator: GeneratorPlugin """ self.generator = generator self.listeners = [] # [LoggingListener()] self.results = q.Queue() self.stats_results = q.Queue() self.data_cache = {} self.stat_cache = {} self.reader = None self.stats_reader = None self.drain = None self.stats_drain = None @staticmethod def load_config(): return json.loads( resource_string(__name__, 'config/phout.json').decode('utf8')) def start_test(self, poll_period=1): self.reader = self.generator.get_reader() self.stats_reader = self.generator.get_stats_reader() aggregator_config = self.load_config() verbose_histogram = True if verbose_histogram: logger.info("using verbose histogram") if self.reader and self.stats_reader: pipeline = Aggregator( TimeChopper(DataPoller(source=self.reader, poll_period=poll_period), cache_size=3), aggregator_config, verbose_histogram) self.drain = Drain(pipeline, self.results) self.drain.start() self.stats_drain = Drain( Chopper( DataPoller(source=self.stats_reader, poll_period=poll_period)), self.stats_results) self.stats_drain.start() else: logger.warning( "Generator not found. Generator must provide a reader and a stats_reader interface" ) def _collect_data(self, end=False): """ Collect data, cache it and send to listeners """ data = get_nowait_from_queue(self.results) stats = get_nowait_from_queue(self.stats_results) logger.debug("Data timestamps: %s" % [d.get('ts') for d in data]) logger.debug("Stats timestamps: %s" % [d.get('ts') for d in stats]) for item in data: ts = item['ts'] if ts in self.stat_cache: # send items data_item = item stat_item = self.stat_cache.pop(ts) self.__notify_listeners(data_item, stat_item) else: self.data_cache[ts] = item for item in stats: ts = item['ts'] if ts in self.data_cache: # send items data_item = self.data_cache.pop(ts) stat_item = item self.__notify_listeners(data_item, stat_item) else: self.stat_cache[ts] = item if end and len(self.data_cache) > 0: logger.info('Timestamps without stats:') for ts, data_item in sorted(self.data_cache.items(), key=lambda i: i[0]): logger.info(ts) self.__notify_listeners(data_item, StatsReader.stats_item(ts, 0, 0)) def is_aggr_finished(self): return self.drain._finished.is_set( ) and self.stats_drain._finished.is_set() def is_test_finished(self): self._collect_data() return -1 def end_test(self, retcode): retcode = self.generator.end_test(retcode) if self.stats_reader: logger.debug('Closing stats reader') self.stats_reader.close() if self.drain: logger.debug('Waiting for gun drain to finish') self.drain.join() logger.debug('Waiting for stats drain to finish') self.stats_drain.join() logger.debug('Collecting remaining data') self._collect_data(end=True) return retcode def add_result_listener(self, listener): self.listeners.append(listener) def __notify_listeners(self, data, stats): """ notify all listeners about aggregate data and stats """ for listener in self.listeners: listener.on_aggregated_data(data, stats)
class TankAggregator(object): """ Plugin that manages aggregation and stats collection """ SECTION = 'aggregator' @staticmethod def get_key(): return __file__ def __init__(self, generator): # AbstractPlugin.__init__(self, core, cfg) """ :type generator: GeneratorPlugin """ self.generator = generator self.listeners = [] # [LoggingListener()] self.results = q.Queue() self.stats_results = q.Queue() self.data_cache = {} self.stat_cache = {} self.reader = None self.stats_reader = None self.drain = None self.stats_drain = None @staticmethod def load_config(): return json.loads(resource_string(__name__, 'config/phout.json').decode('utf8')) def start_test(self, poll_period=1): self.reader = self.generator.get_reader() self.stats_reader = self.generator.get_stats_reader() aggregator_config = self.load_config() verbose_histogram = True if verbose_histogram: logger.info("using verbose histogram") if self.reader and self.stats_reader: pipeline = Aggregator( TimeChopper( DataPoller(source=self.reader, poll_period=poll_period), cache_size=3), aggregator_config, verbose_histogram) self.drain = Drain(pipeline, self.results) self.drain.start() self.stats_drain = Drain( Chopper(DataPoller( source=self.stats_reader, poll_period=poll_period)), self.stats_results) self.stats_drain.start() else: logger.warning("Generator not found. Generator must provide a reader and a stats_reader interface") def _collect_data(self, end=False): """ Collect data, cache it and send to listeners """ data = get_nowait_from_queue(self.results) stats = get_nowait_from_queue(self.stats_results) logger.debug("Data timestamps: %s" % [d.get('ts') for d in data]) logger.debug("Stats timestamps: %s" % [d.get('ts') for d in stats]) for item in data: ts = item['ts'] if ts in self.stat_cache: # send items data_item = item stat_item = self.stat_cache.pop(ts) self.__notify_listeners(data_item, stat_item) else: self.data_cache[ts] = item for item in stats: ts = item['ts'] if ts in self.data_cache: # send items data_item = self.data_cache.pop(ts) stat_item = item self.__notify_listeners(data_item, stat_item) else: self.stat_cache[ts] = item if end and len(self.data_cache) > 0: logger.info('Timestamps without stats:') for ts, data_item in sorted(self.data_cache.items(), key=lambda i: i[0]): logger.info(ts) self.__notify_listeners(data_item, StatsReader.stats_item(ts, 0, 0)) def is_aggr_finished(self): return self.drain._finished.is_set() and self.stats_drain._finished.is_set() def is_test_finished(self): self._collect_data() return -1 def end_test(self, retcode): retcode = self.generator.end_test(retcode) if self.stats_reader: logger.debug('Closing stats reader') self.stats_reader.close() if self.drain: logger.debug('Waiting for gun drain to finish') self.drain.join() logger.debug('Waiting for stats drain to finish') self.stats_drain.join() logger.debug('Collecting remaining data') self._collect_data(end=True) return retcode def add_result_listener(self, listener): self.listeners.append(listener) def __notify_listeners(self, data, stats): """ notify all listeners about aggregate data and stats """ for listener in self.listeners: listener.on_aggregated_data(data, stats)