def run(self): while True: # Get current time to retrieve state env, now, now_string = self.environment, datetime_now(format=None), datetime_now() try: env.auto = True # Really better like that ;-) index, event = env.events.get(now, default_value={}) print(u'[{0}] Update charts at index {1}.'.format(self.name, index)) for service, stats in env.statistics.iteritems(): label = SERVICE_TO_LABEL.get(service, service) units_api, tasks_api = SERVICE_TO_UNITS_API[service], SERVICE_TO_TASKS_API[service] planned = event.get(service, None) api_client = env.api_client api_client.auth = env.daemons_auth if env.enable_units_status: if env.enable_units_api: units = getattr(api_client, units_api).list() else: units = env.get_units(service) else: units = {k: {u'agent-state': py_juju.STARTED} for k in range(planned)} tasks = getattr(api_client, tasks_api).list(head=True) if env.enable_tasks_status else None stats.update(now_string, planned, units, tasks) stats.generate_units_pie_chart_by_status(env.charts_path) stats.generate_units_line_chart(env.charts_path) stats.generate_tasks_line_chart(env.charts_path) stats.write() except (ConnectionError, Timeout) as e: # FIXME do something here ... print(u'[{0}] WARNING! Communication error, details: {1}.'.format(self.name, e)) self.sleep()
def run(self): while True: # Get current time to retrieve state env, now, now_string = self.environment, datetime_now(format=None), datetime_now() try: env.auto = True # Really better like that ;-) index, event = env.events.get(now, default_value={}) print(u'[{0}] Handle scaling at index {1}.'.format(self.name, index)) for service, stats in env.statistics.iteritems(): label = SERVICE_TO_LABEL.get(service, service) units_api = SERVICE_TO_UNITS_API[service] planned = event.get(service, None) if env.enable_units_api: api_client = env.api_client api_client.auth = env.daemons_auth units = getattr(api_client, units_api).list() else: units = env.get_units(service) if len(units) != planned: print(u'[{0}] Ensure {1} instances of service {2}'.format(self.name, planned, label)) env.ensure_num_units(service, service, num_units=planned) env.cleanup_machines() # Safer way to terminate machines ! else: print(u'[{0}] Nothing to do !'.format(self.name)) # Recover faulty units # FIXME only once and then destroy and warn admin by mail ... for number, unit_dict in units.iteritems(): if unit_dict.get(u'agent-state') in ERROR_STATES: unit = u'{0}/{1}'.format(service, number) juju_do(u'resolved', environment=env.name, options=[u'--retry', unit], fail=False) except (ConnectionError, Timeout) as e: # FIXME do something here ... print(u'[{0}] WARNING! Communication error, details: {1}.'.format(self.name, e)) self.sleep()
def read_or_default(environment, service, **kwargs): label = SERVICE_TO_LABEL.get(service, service) filename = os.path.join(STATISTICS_PATH, u'{0}_{1}.pkl'.format(environment, label)) statistics = ServiceStatistics.read(filename, store_filename=True, create_if_error=True, environment=environment, service=service, **kwargs) print(u'Read {0} with {1} measures.'.format(os.path.basename(filename), len(statistics.units_planned))) if statistics.unknown_states: print(u'[WARNING] Unknown states: {0}.'.format(statistics.unknown_states)) return statistics
def service_label(self): return SERVICE_TO_LABEL.get(self.service, self.service)