def _setup_grafana(self): # grafana needs daemon-reload before enable and start ServiceControlEL7.daemon_reload() service = ServiceControl.create("grafana-server") error = service.enable() if error: log.error(error) raise RuntimeError(error) if service.running: service.stop() error = service.start() if error: log.error(error) raise RuntimeError(error)
def _setup_rabbitmq_service(self): log.info("Starting RabbitMQ...") # special case where service requires legacy service control rabbit_service = ServiceControlEL7("rabbitmq-server") error = rabbit_service.enable() if error: log.error(error) raise RuntimeError(error) # FIXME: HYD_640: there's really no sane reason to have to set the stderr and # stdout to None here except that subprocess.PIPE ends up # blocking subprocess.communicate(). # we need to figure out why # FIXME: this should also be converted to use the common Shell utility class #self.try_shell(["service", "rabbitmq-server", "restart"], # mystderr=None, mystdout=None) # ServiceControlEL7 really needs a _restart() method error = rabbit_service._stop() if error: log.error(error) raise RuntimeError(error) error = rabbit_service._start() if error: log.error(error) raise RuntimeError(error)
def setUp(self): try: for service in self.SERVICES: log.info("Starting service '%s'" % service) ServiceControlEL7(service).start(retry_time=0.1, validate_time=0) except: # Ensure we don't leave a systemd unit up self.tearDown() raise
def _setup_rabbitmq_service(self): log.info("Starting RabbitMQ...") # special case where service requires legacy service control rabbit_service = ServiceControlEL7("rabbitmq-server") error = rabbit_service.enable() if error: log.error(error) raise RuntimeError(error) try: self.try_shell(["systemctl", "restart", "rabbitmq-server"]) except CommandError as error: log.error(error) raise error
def tearDown(self): # You can't import this gobally because DJANGO_SETTINGS_MODULE is not initialized yet for some # reason, but maybe by the time the code meanders its way to here it will work. from chroma_core.services.rpc import RpcClientFactory # Shutdown any RPC Threads if they were started. Bit of horrible insider knowledge here. if RpcClientFactory._lightweight is False: RpcClientFactory.shutdown_threads() RpcClientFactory._lightweight = True RpcClientFactory._available = True RpcClientFactory._instances = {} for service in self.SERVICES: log.info("Stopping service '%s'" % service) ServiceControlEL7(service).stop(retry_time=0.1, validate_time=0)
def _setup_rabbitmq_service(self): log.info("Starting RabbitMQ...") # special case where service requires legacy service control rabbit_service = ServiceControlEL7("rabbitmq-server") error = rabbit_service.enable() if error: log.error(error) raise RuntimeError(error) error = rabbit_service._stop() if error: log.error(error) raise RuntimeError(error) error = rabbit_service._start() if error: log.error(error) raise RuntimeError(error)
def _setup_influxdb(self): influx_service = ServiceControlEL7("influxdb") # Disable reporting # Disable influx http logging (of every write and every query) with open("/etc/default/influxdb", "w") as f: f.write("INFLUXDB_DATA_QUERY_LOG_ENABLED=false\n") f.write("INFLUXDB_REPORTING_DISABLED=true\n") f.write("INFLUXDB_HTTP_LOG_ENABLED=false\n") log.info("Starting InfluxDB...") error = influx_service.enable() if error: log.error(error) raise RuntimeError(error) error = influx_service._stop() if error: log.error(error) raise RuntimeError(error) error = influx_service._start() if error: log.error(error) raise RuntimeError(error) # Wait for influx to finish starting wait_for_result( lambda: self.try_shell(["influx", "-execute", "exit"]), logger=log, timeout=60, expected_exception_classes=[CommandError], ) # When changing any of the following also change: docker/influxdb/setup-influxdb.sh log.info("Creating InfluxDB database...") self.try_shell([ "influx", "-execute", "CREATE DATABASE {}".format(settings.INFLUXDB_IML_DB) ]) self.try_shell([ "influx", "-execute", "CREATE DATABASE {}".format(settings.INFLUXDB_STRATAGEM_SCAN_DB) ]) self.try_shell([ "influx", "-database", settings.INFLUXDB_STRATAGEM_SCAN_DB, "-execute", 'ALTER RETENTION POLICY "autogen" ON "{}" DURATION 90d SHARD DURATION 9d' .format(settings.INFLUXDB_STRATAGEM_SCAN_DB), ]) self.try_shell([ "influx", "-execute", "CREATE DATABASE {}".format(settings.INFLUXDB_IML_STATS_DB) ]) try: self.try_shell([ "influx", "-database", settings.INFLUXDB_IML_STATS_DB, "-execute", 'CREATE RETENTION POLICY "long_term" ON "{}" DURATION {} REPLICATION 1 SHARD DURATION 5d' .format( settings.INFLUXDB_IML_STATS_DB, settings.INFLUXDB_IML_STATS_LONG_DURATION, ), ]) except CommandError: self.try_shell([ "influx", "-database", settings.INFLUXDB_IML_STATS_DB, "-execute", 'ALTER RETENTION POLICY "long_term" ON "{}" DURATION {} REPLICATION 1 SHARD DURATION 5d' .format( settings.INFLUXDB_IML_STATS_DB, settings.INFLUXDB_IML_STATS_LONG_DURATION, ), ]) self.try_shell([ "influx", "-database", settings.INFLUXDB_IML_STATS_DB, "-execute", "{}; {}; {}; {}; {}; {}; {}; {}".format( 'DROP CONTINUOUS QUERY "downsample_means" ON "{}"'.format( settings.INFLUXDB_IML_STATS_DB), 'DROP CONTINUOUS QUERY "downsample_lnet" ON "{}"'.format( settings.INFLUXDB_IML_STATS_DB), 'DROP CONTINUOUS QUERY "downsample_samples" ON "{}"'.format( settings.INFLUXDB_IML_STATS_DB), 'DROP CONTINUOUS QUERY "downsample_sums" ON "{}"'.format( settings.INFLUXDB_IML_STATS_DB), 'CREATE CONTINUOUS QUERY "downsample_means" ON "{}" BEGIN SELECT mean(*) INTO "{}"."long_term".:MEASUREMENT FROM "{}"."autogen"."target","{}"."autogen"."host","{}"."autogen"."node" GROUP BY time(30m),* END' .format( settings.INFLUXDB_IML_STATS_DB, settings.INFLUXDB_IML_STATS_DB, settings.INFLUXDB_IML_STATS_DB, settings.INFLUXDB_IML_STATS_DB, settings.INFLUXDB_IML_STATS_DB, ), 'CREATE CONTINUOUS QUERY "downsample_lnet" ON "{}" BEGIN SELECT (last("send_count") - first("send_count")) / count("send_count") AS "mean_diff_send", (last("recv_count") - first("recv_count")) / count("recv_count") AS "mean_diff_recv" INTO "{}"."long_term"."lnet" FROM "lnet" WHERE "nid" != \'"0@lo"\' GROUP BY time(30m),"host","nid" END' .format( settings.INFLUXDB_IML_STATS_DB, settings.INFLUXDB_IML_STATS_DB, ), 'CREATE CONTINUOUS QUERY "downsample_samples" ON "{}" BEGIN SELECT (last("samples") - first("samples")) / count("samples") AS "mean_diff_samples" INTO "{}"."long_term"."target" FROM "target" GROUP BY time(30m),* END' .format( settings.INFLUXDB_IML_STATS_DB, settings.INFLUXDB_IML_STATS_DB, ), 'CREATE CONTINUOUS QUERY "downsample_sums" ON "{}" BEGIN SELECT (last("sum") - first("sum")) / count("sum") AS "mean_diff_sum" INTO "{}"."long_term"."target" FROM "target" WHERE "units"=\'"bytes"\' GROUP BY time(30m),* END' .format( settings.INFLUXDB_IML_STATS_DB, settings.INFLUXDB_IML_STATS_DB, ), ), ]) self.try_shell([ "influx", "-database", settings.INFLUXDB_IML_STATS_DB, "-execute", 'ALTER RETENTION POLICY "autogen" ON "{}" DURATION 1d REPLICATION 1 SHARD DURATION 2h DEFAULT' .format(settings.INFLUXDB_IML_STATS_DB), ])
def restart(self, program): ServiceControlEL7(program).restart(retry_time=0.1, validate_time=0)
def stop(self, program): ServiceControlEL7(program).stop(retry_time=0.1, validate_time=0)
def start(self, program): ServiceControlEL7(program).start(retry_time=0.1, validate_time=0) for port in self.PORTS.get(program, []): self._wait_for_port(port)