def _is_server_for_update(self, server, status): repo_url = status['repo_url'] devel_branch = server.get('user-data.scm_branch', None) ver_info = self.get_szr_ver_from_repo(devel_branch=devel_branch) try: szr_ver_repo = ver_info[repo_url] except KeyError: pkg_type = helper.pkg_type_by_name(status['dist'].split()[0]) szr_ver_repo = ver_info[status['repository']][pkg_type] if parse_version( server['scalarizr.version']) >= parse_version(szr_ver_repo): return False if 'in-progress' in status['state']: # skip in-progress server return False if status['executed_at']: last_update_dt = datetime.datetime.strptime( status['executed_at'], '%a %d %b %Y %H:%M:%S %Z') last_update_dt = last_update_dt.replace(minute=0, second=0, microsecond=0) utcnow_dt = datetime.datetime.utcnow() utcnow_dt = utcnow_dt.replace(minute=0, second=0, microsecond=0) if last_update_dt == utcnow_dt and status['state'] == 'error': # skip failed server LOG.debug( 'Skip server: {0}, reason: server in error state'.format( server['server_id'])) return False return True
def __call__(self): self.change_permissions() while True: try: self.iteration_timestamp = time.time() self.before_iteration() g = self._do_iteration() try: g.get(timeout=self.iteration_timeout) except: self.on_iteration_error() raise finally: if not g.ready(): g.kill() self.after_iteration() iteration_time = time.time() - self.iteration_timestamp msg = 'End iteration: {0:.1f} seconds'.format(iteration_time) LOG.debug(msg) except: LOG.exception('Iteration failed') time.sleep(self.error_sleep) finally: if self.config['interval']: next_iteration_time = self.iteration_timestamp + self.config['interval'] time.sleep(next_iteration_time - time.time())
def __init__(self, record=None): Table.__init__(self) self._types = { 'usage_id': UUIDType, 'dtime': QuoteType, 'platform': QuoteType, 'url': QuoteType, 'cloud_location': QuoteType, 'instance_type': QuoteType, 'os': NoQuoteType, 'num': NoQuoteType, 'cost': NoQuoteType, } self._fill(record) if 'usage_id' not in self: try: formatted = self._format() unique = '; '.join([ str(formatted['dtime']).strip(), str(formatted['platform']).strip(), str(formatted['url']).strip(), str(formatted['cloud_location']).strip(), str(formatted['instance_type']).strip(), str(formatted['os']).strip(), ]) self['usage_id'] = uuid.uuid5(UUID, unique).hex except KeyError: msg = "Can't set not managed usage_id for record: {record}, reason: {error}" msg = msg.format(record=record, error=helper.exc_info()) LOG.warning(msg)
def do_iteration(self): while len(self._processing_messages) > self.max_processing_messages: time.sleep(1) messages = self.get_messages() if not messages: time.sleep(self.nothing_todo_sleep) return servers = self.get_servers(messages) servers_map = dict((server['server_id'], server) for server in servers) for message in messages: try: if message['messageid'] in self._processing_messages: continue self._processing_messages.add(message['messageid']) if message['server_id'] not in servers_map: msg = ( "Server '{server_id}' doesn't exist or not in right status, set message " "status to 3" ).format(server_id=message['server_id']) LOG.warning(msg) message['status'] = 3 self._pool.wait() self._pool.apply_async(self.update, (message,)) else: server = servers_map[message['server_id']] self._pool.wait() self._pool.apply_async(self.process_message, (message, server)) except: msg = "Unable to process message: {message_id}, reason: {error}" msg = msg.format(message_id=message['messageid'], error=helper.exc_info()) LOG.warning(msg)
def download_aws_billing_file(self, cred, bucket_name, date=None): if date is None: date = datetime.datetime.utcnow().date() conn = get_s3_conn(cred) bucket = conn.get_bucket(bucket_name) account_id = cryptotool.decrypt_scalr(app.crypto_key, cred['account_id']) file_name = get_aws_csv_file_name(account_id, date) key = bucket.get_key(file_name) if not key: msg = "AWS detailed billing CSV file {0} wasn't found in bucket {1}" msg = msg.format(file_name, bucket_name) if datetime.datetime.utcnow().day == 1: LOG.warning(msg) return None else: raise Exception(msg) last_modified_dt = datetime.datetime.strptime( key.last_modified, self.last_modified_format) update_interval = self.config['interval'] utcnow = datetime.datetime.utcnow() delta = datetime.timedelta(seconds=update_interval) condition1 = utcnow > last_modified_dt and utcnow < last_modified_dt + delta condition2 = ((utcnow - last_modified_dt).seconds / 3600) % 8 == 0 if condition1 or condition2: local_file_path = os.path.join(self.tmp_dir, file_name) LOG.debug('Downloading {0}'.format(file_name)) key.get_contents_to_filename(local_file_path) return local_file_path else: return None
def ver_from_rpm_repo(self, repo, branch=None): out = {} rpm_repo_url_template = repo["rpm_repo_url"] if rpm_repo_url_template: rpm_repo_url_template = rpm_repo_url_template.strip() if branch: rpm_repo_url_template = rpm_repo_url_template % branch for release in ["5", "6", "7"]: rpm_repo_url = rpm_repo_url_template.replace("$releasever", release) rpm_repo_url = rpm_repo_url.replace("$basearch", "x86_64") url = os.path.join(rpm_repo_url, "repodata/primary.xml.gz") try: r = requests.get(url) r.raise_for_status() assert r.text, "Empty primary.xml file" s = StringIO.StringIO(r.content) f = gzip.GzipFile(fileobj=s, mode="r") f.seek(0) xml = minidom.parse(f) try: out[rpm_repo_url_template] = self.rpm_pattern_1.findall(xml.toxml())[0].strip() except: out[rpm_repo_url_template] = self.rpm_pattern_2.findall(xml.toxml())[0].strip() except (requests.exceptions.HTTPError, requests.exceptions.InvalidSchema): msg = "RPM repository {0} failed, file not found: {1}" msg = msg.format(repo["rpm_repo_url"], url) LOG.warning(msg) return out
def _is_server_for_update(self, server, status): repo_url = status["repo_url"] devel_branch = server.get("user-data.scm_branch", None) ver_info = self.get_szr_ver_from_repo(devel_branch=devel_branch) try: szr_ver_repo = ver_info[repo_url] except KeyError: pkg_type = helper.pkg_type_by_name(status["dist"].split()[0]) szr_ver_repo = ver_info[status["repository"]][pkg_type] if parse_version(server["scalarizr.version"]) >= parse_version(szr_ver_repo): return False if "in-progress" in status["state"]: # skip in-progress server return False if status["executed_at"]: last_update_dt = datetime.datetime.strptime(status["executed_at"], "%a %d %b %Y %H:%M:%S %Z") last_update_dt = last_update_dt.replace(minute=0, second=0, microsecond=0) utcnow_dt = datetime.datetime.utcnow() utcnow_dt = utcnow_dt.replace(minute=0, second=0, microsecond=0) if last_update_dt == utcnow_dt and status["state"] == "error": # skip failed server LOG.debug("Skip server: {0}, reason: server in error state".format(server["server_id"])) return False return True
def get_metrics(host, port, key, api_type, metrics, headers=None, timeout=5): assert host, 'host' assert port, 'port' assert key, 'key' assert api_type, 'api_type' assert metrics, 'metrics' data = dict() endpoint = 'http://%s:%s' % (host, port) security = rpc.Security(key) hsp = rpc.HttpServiceProxy(endpoint, security=security, headers=headers) getters = { 'cpu': get_cpu_stat, 'la': get_la_stat, 'mem': get_mem_info, 'net': get_net_stat, 'io': get_io_stat, } for metric in metrics: try: data.update({metric: getters[metric](hsp, api_type, timeout=timeout)}) except (urllib2.URLError, urllib2.HTTPError, socket.timeout): raise except: msg = "Endpoint: %s, metric '%s' failed: %s" % (endpoint, metric, helper.exc_info()) LOG.warning(msg) continue return data
def _is_server_for_update(self, server, status): repo_url = status['repo_url'] devel_branch = server.get('user-data.scm_branch', None) ver_info = self.get_szr_ver_from_repo(devel_branch=devel_branch) try: szr_ver_repo = ver_info[repo_url] except KeyError: pkg_type = helper.pkg_type_by_name(status['dist'].split()[0]) szr_ver_repo = ver_info[status['repository']][pkg_type] if parse_version(server['scalarizr.version']) >= parse_version(szr_ver_repo): return False if 'in-progress' in status['state']: # skip in-progress server return False if status['executed_at']: last_update_dt = datetime.datetime.strptime( status['executed_at'], '%a %d %b %Y %H:%M:%S %Z') last_update_dt = last_update_dt.replace(minute=0, second=0, microsecond=0) utcnow_dt = datetime.datetime.utcnow() utcnow_dt = utcnow_dt.replace(minute=0, second=0, microsecond=0) if last_update_dt == utcnow_dt and status['state'] == 'error': # skip failed server LOG.debug( 'Skip server: {0}, reason: server in error state'.format(server['server_id'])) return False return True
def ver_from_rpm_repo(self, repo, branch=None): out = {} rpm_repo_url_template = repo['rpm_repo_url'] if rpm_repo_url_template: rpm_repo_url_template = rpm_repo_url_template.strip() if branch: rpm_repo_url_template = rpm_repo_url_template % branch for release in ['5', '6']: rpm_repo_url = rpm_repo_url_template.replace('$releasever', release) rpm_repo_url = rpm_repo_url.replace('$basearch', 'x86_64') url = os.path.join(rpm_repo_url, 'repodata/primary.xml.gz') try: r = requests.get(url) r.raise_for_status() except (requests.exceptions.HTTPError, requests.exceptions.InvalidSchema): msg = 'RPM repository {0} failed, file: {1} not found'.format(repo['rpm_repo_url'], url) LOG.warning(msg) return out s = StringIO.StringIO(r.content) f = gzip.GzipFile(fileobj=s, mode='r') f.seek(0) xml = minidom.parse(f) try: out[rpm_repo_url_template] = self.rpm_pattern_1.findall(xml.toxml())[0].strip() except: out[rpm_repo_url_template] = self.rpm_pattern_2.findall(xml.toxml())[0].strip() return out
def get_prices(self, servers): """ :returns: dict {account_id: {platform_url: {cloud_location: {instance_type: {os: cost}}}}} """ prices = dict() for raw_prices in self._get_raw_prices(servers): for raw_price in raw_prices: try: account_id = raw_price['account_id'] platform = raw_price['platform'] url = raw_price['url'] platform_url = '%s;%s' % (platform, url) cloud_location = raw_price['cloud_location'] instance_type = raw_price['instance_type'] os_type = raw_price['os'] cost = raw_price['cost'] prices.setdefault(account_id, dict()) prices[account_id].setdefault(platform_url, dict()) prices[account_id][platform_url].setdefault( cloud_location, dict()) prices[account_id][platform_url][ cloud_location].setdefault(instance_type, dict()) prices[account_id][platform_url][cloud_location][ instance_type][os_type] = cost except KeyError: msg = "Unable to get price from raw price, reason: {error}" msg = msg.format(error=helper.exc_info()) LOG.warning(msg) return prices
def execute(self, query, retries=0, retry_timeout=10): while True: try: if self._autocommit or not self._connection: self._local.connection = self._connection_pool.get( timeout=10) self._local.connection.autocommit(self._autocommit) self._local.cursor = self._connection.cursor() try: start_time = time.time() self._local.cursor.execute(query) end_time = time.time() if end_time - start_time > 1: LOG.debug('Query too slow: %s\n%s...' % (end_time - start_time, query[:150])) results = self._local.cursor.fetchall() if results is not None: results = tuple(results) return results finally: if self._autocommit: self._local.cursor.close() self._connection_pool.put(self._local.connection) self._local.connection = None self._local.cursor = None except (pymysql.err.OperationalError, pymysql.err.InternalError, socket.timeout): if not retries: raise retries -= 1 time.sleep(retry_timeout)
def ver_from_rpm_repo(self, repo, branch=None): out = {} rpm_repo_url_template = repo['rpm_repo_url'] if rpm_repo_url_template: rpm_repo_url_template = rpm_repo_url_template.strip() if branch: rpm_repo_url_template = rpm_repo_url_template % branch for release in ['5', '6']: rpm_repo_url = rpm_repo_url_template.replace( '$releasever', release) rpm_repo_url = rpm_repo_url.replace('$basearch', 'x86_64') url = os.path.join(rpm_repo_url, 'repodata/primary.xml.gz') try: r = requests.get(url) r.raise_for_status() except (requests.exceptions.HTTPError, requests.exceptions.InvalidSchema): msg = 'RPM repository {0} failed, file: {1} not found'.format( repo['rpm_repo_url'], url) LOG.warning(msg) return out s = StringIO.StringIO(r.content) f = gzip.GzipFile(fileobj=s, mode='r') f.seek(0) xml = minidom.parse(f) try: out[rpm_repo_url_template] = self.rpm_pattern_1.findall( xml.toxml())[0].strip() except: out[rpm_repo_url_template] = self.rpm_pattern_2.findall( xml.toxml())[0].strip() return out
def _serve_forever(self): LOG.debug('Starting plotter') try: cherrypy.quickstart(self, '/', {'/': {}}) except: LOG.error(helper.exc_info()) thread.interrupt_main()
def sorted_data_update(sorted_data): for region_data in sorted_data: for server in region_data['managed']: if server.get('os', None) is not None: continue query = ("SELECT os_type os " "FROM servers " "WHERE server_id='{server_id}'").format( server_id=server['server_id']) result = app.scalr_db.execute(query, retries=1) if not result: query = ("SELECT value AS os " "FROM server_properties " "WHERE server_id='{server_id}' " "AND name='os_type'").format( server_id=server['server_id']) result = app.scalr_db.execute(query, retries=1) if not result: server['os'] = 'linux' msg = "Can't detect OS type for server: {0}, set 'linux'".format( server['server_id']) LOG.warning(msg) else: server['os'] = result[0]['os'] for server in region_data['managed']: server['os'] = analytics.os_map[server.get('os', None)] for server in region_data['not_managed']: server['os'] = analytics.os_map[server.get('os', None)]
def execute(self, query, retries=0, retry_timeout=10): while True: try: if self._autocommit or not self._connection: self._local.connection = self._connection_pool.get(timeout=10) self._local.connection.autocommit(self._autocommit) self._local.cursor = self._connection.cursor() try: start_time = time.time() self._local.cursor.execute(query) end_time = time.time() if end_time - start_time > 1: LOG.debug('Query too slow: %s\n%s...' % (end_time - start_time, query[:150])) results = self._local.cursor.fetchall() if results is not None: results = tuple(results) return results finally: if self._autocommit: self._local.cursor.close() self._connection_pool.put(self._local.connection) self._local.connection = None self._local.cursor = None except (pymysql.err.OperationalError, pymysql.err.InternalError, socket.timeout): if not retries: raise retries -= 1 time.sleep(retry_timeout)
def get_metrics(host, port, key, api_type, metrics, headers=None, timeout=5): assert host, 'host' assert port, 'port' assert key, 'key' assert api_type, 'api_type' assert metrics, 'metrics' data = dict() endpoint = 'http://%s:%s' % (host, port) security = rpc.Security(key) hsp = rpc.HttpServiceProxy(endpoint, security=security, headers=headers) getters = { 'cpu': get_cpu_stat, 'la': get_la_stat, 'mem': get_mem_info, 'net': get_net_stat, 'io': get_io_stat, } for metric in metrics: try: data.update({metric: getters[metric](hsp, api_type, timeout=timeout)}) except (urllib2.URLError, urllib2.HTTPError, socket.timeout): msg = "Endpoint: {endpoint}, headers: {headers}, metric: '{metric}', reason: {err}" msg = msg.format( endpoint=endpoint, headers=headers, metric=metric, err=helper.exc_info()) raise Exception(msg) except: msg = "Endpoint: {endpoint}, headers: {headers}, metric '{metric}' failed, reason: {er}" msg = msg.format( endpoint=endpoint, headers=headers, metric=metric, err=helper.exc_info()) LOG.warning(msg) continue return data
def get_net_stat(hsp, api_type='linux', timeout=5): net = hsp.sysinfo.net_stats(timeout=timeout) if api_type == 'linux': ret = { 'in': float(net['eth0']['receive']['bytes']), 'out': float(net['eth0']['transmit']['bytes']), } elif api_type == 'windows': for key in net: if re.match(r'^.*Ethernet Adapter.*$', key) \ or re.match(r'^.*AWS PV Network Device.*$', key): ret = { 'in': float(net[key]['receive']['bytes']), 'out': float(net[key]['transmit']['bytes']), } break else: msg = ( "Can't find ['^.* Ethernet Adapter.*$', '^.*AWS PV Network Device.*$'] " "pattern in api response for endpoint: {0}, available: {1}, use {2}" ).format(hsp.endpoint, net.keys(), net.keys()[0]) LOG.warning(msg) first_key = net.keys()[0] ret = { 'in': float(net[first_key]['receive']['bytes']), 'out': float(net[first_key]['transmit']['bytes']), } else: raise APIError("Unsupported API type '%s' for NET stat" % api_type) return ret
def _plot(self, kwds, tz, metric): img_dir = self._get_image_dir(kwds) url_dir = self._get_url_dir(kwds) rrd_files = self._get_rrd_files(kwds, metric) if not rrd_files: msg = "Coudn't find rrd file(s) for request: {0}, metric: {1}" msg = msg.format(kwds, metric) LOG.warning(msg) raise Plotter.IOError('Statistics are not available') if metric == 'io': url = dict() options = rrd.GRAPH_OPT[kwds['period']] for rrd_file in rrd_files: dev = os.path.basename(rrd_file)[:-4] url[dev] = dict() img_file = os.path.join( img_dir, 'io_bits_%s_%s.png' % (dev, kwds['period'])) url[dev]['bits_per_sec'] = os.path.join( url_dir, 'io_bits_%s_%s.png' % (dev, kwds['period'])) rrd.plot_io_bits(str(img_file), str(rrd_file), options, tz=tz) img_file = os.path.join( img_dir, 'io_ops_%s_%s.png' % (dev, kwds['period'])) url[dev]['operations_per_sec'] = os.path.join( url_dir, 'io_ops_%s_%s.png' % (dev, kwds['period'])) rrd.plot_io_ops(str(img_file), str(rrd_file), options, tz=tz) else: rrd_file = rrd_files[0] img_file = os.path.join(img_dir, '%s_%s.png' % (metric, kwds['period'])) if not os.path.exists( img_file) or os.path.getmtime(img_file) + 60 < time.time(): options = rrd.GRAPH_OPT[kwds['period']] self._plotters[metric](str(img_file), str(rrd_file), options) url = os.path.join(url_dir, '%s_%s.png' % (metric, kwds['period'])) return url
def __call__(self): self.change_permissions() while True: try: self.iteration_timestamp = time.time() g = self._do_iteration() try: g.get(timeout=self.iteration_timeout) except gevent.Timeout: raise exceptions.IterationTimeoutError() finally: if not g.ready(): g.kill() except (SystemExit, KeyboardInterrupt): raise except exceptions.NothingToDoError: time_to_sleep = self.nothing_to_do_sleep except exceptions.QuitError: sys.exit(0) except: LOG.error('Iteration failed, reason: {0}'.format(helper.exc_info())) self.on_iteration_error() time_to_sleep = self.error_sleep else: time_to_sleep = 0.1 LOG.debug('End iteration: {0:.1f} seconds'.format(time.time() - self.iteration_timestamp)) if self.config['interval']: time_to_sleep = self.iteration_timestamp + self.config['interval'] - time.time() time.sleep(time_to_sleep)
def do_iteration(self): for envs in self.analytics.load_envs(): try: self.analytics.load_env_credentials(envs) unique = {} for env in envs: try: credentials = self.analytics.get_credentials([env]) for cred in credentials: if cred.platform == 'ec2' and env.get('ec2.detailed_billing.enabled', '0') == '1': continue unique.setdefault(cred.unique, {'envs_ids': [], 'cred': cred}) unique[cred.unique]['envs_ids'].append(env['id']) except: msg = 'Processing environment: {} failed'.format(env['id']) LOG.exception(msg) for data in unique.values(): while len(self.pool) > self.config['pool_size'] * 5 / 10: gevent.sleep(0.1) self.pool.apply_async(process_credential, args=(data['cred'],), kwds={'envs_ids': data['envs_ids']}) gevent.sleep(0) # force switch except: msg = 'Processing environments: {} failed'.format([env['id'] for env in envs]) LOG.exception(msg) self.pool.join()
def _average(self, results, ra=None, fa=None, rs=None, fs=None): ra = ra or dict() fa = fa or dict() rs = rs or dict() fs = fs or dict() for result in results: try: r_key, f_key = self._get_rf_keys(result) if 'snum' in self.config['metrics']: rs.setdefault(r_key, {'snum': {'s_running': 0}}) fs.setdefault(f_key, {'snum': {'s_running': 0}}) rs[r_key]['snum']['s_running'] += 1 fs[f_key]['snum']['s_running'] += 1 if not result['data']: continue for metrics_group_name, metrics_data in result['data'].iteritems(): if not metrics_data or metrics_group_name == 'io': continue for metric_name, value in metrics_data.iteritems(): try: ra.setdefault(r_key, {}) ra[r_key].setdefault(metrics_group_name, {}) ra[r_key][metrics_group_name].setdefault(metric_name, Average()) fa.setdefault(f_key, {}) fa[f_key].setdefault(metrics_group_name, {}) fa[f_key][metrics_group_name].setdefault(metric_name, Average()) ra[r_key][metrics_group_name][metric_name] += value fa[f_key][metrics_group_name][metric_name] += value except: LOG.error(helper.exc_info()) except: LOG.error(helper.exc_info()) return ra, fa, rs, fs
def daemonize(stdin='/dev/null', stdout='/dev/null', stderr='/dev/null'): LOG.debug("Daemonize") # first fork pid = os.fork() if pid > 0: sys.exit(0) os.chdir('/') os.setsid() os.umask(0) # second fork pid = os.fork() if pid > 0: sys.exit(0) # redirect standard file descriptors sys.stdout.flush() sys.stderr.flush() si = file(stdin, 'r') so = file(stdout, "a+") se = file(stderr, "a+", 0) os.dup2(si.fileno(), sys.stdin.fileno()) os.dup2(so.fileno(), sys.stdout.fileno()) os.dup2(se.fileno(), sys.stderr.fileno())
def _get_szr_conn_info(server, port, instances_connection_policy): ip = { 'public': server['remote_ip'], 'local': server['local_ip'], 'auto': server['remote_ip'] if server['remote_ip'] else server['local_ip'], }[instances_connection_policy] headers = {} if server['platform'] == 'ec2' and 'ec2.vpc.id' in server and 'router.vpc.ip' in server: if server['remote_ip']: ip = server['remote_ip'] else: headers.update({ 'X-Receiver-Host': server['local_ip'], 'X-Receiver-Port': port, }) ip = server['router.vpc.ip'] port = 80 # Start - Added by Chen Leji if not server['remote_ip']: LOG.info("=============Apply FLOATINGIP_PROXY Patch=============") ip = "localhost" proxy = floatingip_proxy.szrProxy(port, server['server_id']) port = proxy.get_proxy_port() return ip, port, headers
def do_iteration(self): for envs in self.analytics.load_envs(): try: self.analytics.load_env_credentials(envs) unique = {} for env in envs: try: credentials = self.analytics.get_credentials([env]) for cred in credentials: if cred.platform == 'ec2' and env.get( 'ec2.detailed_billing.enabled', '0') == '1': continue unique.setdefault(cred.unique, { 'envs_ids': [], 'cred': cred }) unique[cred.unique]['envs_ids'].append(env['id']) except: msg = 'Processing environment: {} failed'.format( env['id']) LOG.exception(msg) for data in unique.values(): while len(self.pool) > self.config['pool_size'] * 5 / 10: gevent.sleep(0.1) self.pool.apply_async(process_credential, args=(data['cred'], ), kwds={'envs_ids': data['envs_ids']}) gevent.sleep(0) # force switch except: msg = 'Processing environments: {} failed'.format( [env['id'] for env in envs]) LOG.exception(msg) self.pool.join()
def get_servers(self, limit=500): for servers in self._get_servers(limit=limit): prop = ['scalarizr.api_port', 'scalarizr.key'] self._db.load_server_properties(servers, prop) for server in servers: if 'scalarizr.api_port' not in server: server['scalarizr.api_port'] = 8010 if 'scalarizr.key' not in server: server['scalarizr.key'] = None self._db.load_vpc_settings(servers) out = [] for server in servers: try: if server['os_type'] == 'linux': exclude = ['snum'] elif server['os_type'] == 'windows': exclude = ['la', 'io', 'snum'] else: msg = "Wrong os type for server: '%s'" % server['server_id'] raise Exception(msg) metrics = [m for m in self.config['metrics'] if m not in exclude] server['metrics'] = metrics out.append(server) except: LOG.error(helper.exc_info()) continue yield out
def _get_statuses(self, servers): async_results = {} for server in servers: if 'scalarizr.key' not in server: msg = "Server: {0}, reason: Missing scalarizr key".format( server['server_id']) LOG.warning(msg) continue if 'scalarizr.updc_port' not in server: api_port = self.scalr_config['scalarizr_update'].get( 'api_port', 8008) server['scalarizr.updc_port'] = api_port self._pool.wait() async_results[server['server_id']] = self._pool.apply_async( self._get_status, (server, )) gevent.sleep(0) # force switch statuses = {} timeout = self.config['instances_connection_timeout'] for server in servers: try: server_id = server['server_id'] statuses[server_id] = async_results[server_id].get( timeout=timeout) except: msg = 'Unable to get update client status, server: {0}, reason: {1}' msg = msg.format(server['server_id'], helper.exc_info()) LOG.warning(msg) return statuses
def download_aws_billing_file(self, cred, bucket_name, date=None): if date is None: date = datetime.datetime.utcnow().date() conn = get_s3_conn(cred) bucket = conn.get_bucket(bucket_name) account_id = cryptotool.decrypt_scalr(app.crypto_key, cred['account_id']) file_name = get_aws_csv_file_name(account_id, date) key = bucket.get_key(file_name) if not key: msg = "AWS detailed billing CSV file {0} wasn't found in bucket {1}" msg = msg.format(file_name, bucket_name) if datetime.datetime.utcnow().day == 1: LOG.warning(msg) return None else: raise Exception(msg) last_modified_dt = datetime.datetime.strptime(key.last_modified, self.last_modified_format) update_interval = self.config['interval'] utcnow = datetime.datetime.utcnow() delta = datetime.timedelta(seconds=update_interval) condition1 = utcnow > last_modified_dt and utcnow < last_modified_dt + delta condition2 = ((utcnow - last_modified_dt).seconds / 3600) % 8 == 0 if condition1 or condition2: local_file_path = os.path.join(self.tmp_dir, file_name) LOG.debug('Downloading {0}'.format(file_name)) key.get_contents_to_filename(local_file_path) return local_file_path else: return None
def do_iteration(self): messages = self.get_messages() if not messages: raise exceptions.NothingToDoError() servers = self.get_servers(messages) servers_map = dict((server['server_id'], server) for server in servers) for message in messages: try: if message['server_id'] not in servers_map: msg = ( "Server '{server_id}' doesn't exist or not in right status, set message " "status to 3" ).format(server_id=message['server_id']) LOG.warning(msg) message['status'] = 3 self._pool.wait() self._pool.apply_async(self.update, (message,)) else: server = servers_map[message['server_id']] self._pool.wait() self._pool.apply_async(self.process_message, (message, server)) except: msg = "Unable to process message: {message_id}, reason: {error}" msg = msg.format(message_id=message['messageid'], error=helper.exc_info()) LOG.warning(msg) self._pool.join()
def do_iteration(self): while len(self._processing_messages) > self.max_processing_messages: time.sleep(1) messages = self.get_messages() if not messages: time.sleep(self.nothing_todo_sleep) return servers = self.get_servers(messages) servers_map = dict((server['server_id'], server) for server in servers) for message in messages: try: if message['messageid'] in self._processing_messages: continue self._processing_messages.add(message['messageid']) if message['server_id'] not in servers_map: msg = ( "Server '{server_id}' doesn't exist or not in right status, set message " "status to 3").format(server_id=message['server_id']) LOG.warning(msg) message['status'] = 3 self._pool.wait() self._pool.apply_async(self.update, (message, )) else: server = servers_map[message['server_id']] self._pool.wait() self._pool.apply_async(self.process_message, (message, server)) except: msg = "Unable to process message: {message_id}, reason: {error}" msg = msg.format(message_id=message['messageid'], error=helper.exc_info()) LOG.warning(msg)
def get_servers(self): for servers in self._get_servers(): prop = ['scalarizr.api_port', 'scalarizr.key'] self._db.load_server_properties(servers, prop) for server in servers: if 'scalarizr.api_port' not in server: server['scalarizr.api_port'] = 8010 if 'scalarizr.key' not in server: server['scalarizr.key'] = None self._db.load_vpc_settings(servers) out = [] for server in servers: try: if server['os_type'] == 'linux': exclude = ['snum'] elif server['os_type'] == 'windows': exclude = ['la', 'io', 'snum'] else: msg = "Wrong os type for server: '%s'" % server['server_id'] raise Exception(msg) metrics = [m for m in self.config['metrics'] if m not in exclude] server['metrics'] = metrics out.append(server) except: LOG.error(helper.exc_info()) continue yield out
def create_pid_file(pid_file): pid = str(os.getpid()) msg = "Creating pid file: %s" % pid_file LOG.debug(msg) if not os.path.exists(os.path.dirname(pid_file)): os.makedirs(os.path.dirname(pid_file), mode=0o755) file(pid_file, 'w+').write('%s\n' % pid)
def _get_statuses(self, servers): async_results = {} for server in servers: if 'scalarizr.key' not in server: msg = "Server: {0}, reason: Missing scalarizr key".format(server['server_id']) LOG.warning(msg) continue if 'scalarizr.updc_port' not in server: api_port = self.scalr_config['scalarizr_update'].get('api_port', 8008) server['scalarizr.updc_port'] = api_port self._pool.wait() async_results[server['server_id']] = self._pool.apply_async(self._get_status, (server,)) gevent.sleep(0) # force switch statuses = {} timeout = self.config['instances_connection_timeout'] for server in servers: try: server_id = server['server_id'] statuses[server_id] = async_results[server_id].get(timeout=timeout) except: msg = 'Unable to get update client status, server: {0}, reason: {1}' msg = msg.format(server['server_id'], helper.exc_info()) LOG.warning(msg) return statuses
def handle_error(message=None, level='exception'): c, e, t = sys.exc_info() if message: message = message.rstrip().rstrip('.') + '. Reason: {}'.format( exc_info()) else: message = exc_info() if isinstance(e, ( KeyboardInterrupt, GeneratorExit, greenlet_mod.GreenletExit, gevent.Timeout, )): LOG.debug(message) raise if isinstance(e, SystemExit) and sys.exc_info()[1].args[0] == 0: raise logging_map = { 'debug': LOG.debug, 'info': LOG.info, 'warning': LOG.warning, 'error': LOG.error, 'critical': LOG.critical, 'exception': LOG.exception, } #if isinstance(e, pymysql.err.Error): # logging_map[min(level, 'error')](message) #else: # logging_map[level](message) logging_map[level](message)
def gce(cred): """ :returns: list [{'region': str, 'timestamp': int, 'nodes': list}] """ result = list() project_id = cryptotool.decrypt_scalr(app.crypto_key, cred['project_id']) key = _gce_key(cred) conn, http = _gce_conn(cred, key=key) request = conn.zones().list(project=project_id) resp = request.execute(http=http) zones = [_['name'] for _ in resp['items']] if 'items' in resp else [] app.pool.wait() async_results = dict( (zone, app.pool.apply_async(_gce_zone, args=(zone, key, cred,))) for zone in zones ) gevent.sleep(0) # force switch for zone, async_result in async_results.iteritems(): try: zone_nodes = async_result.get(timeout=app.config['cloud_connection_timeout'] + 1) if zone_nodes: result.append(zone_nodes) except gevent.timeout.Timeout: async_result.kill() msg = 'platform: {platform}, zone: {zone}, env_id: {env_id}, reason: timeout' msg = msg.format(platform=cred.platform, zone=zone, env_id=cred.env_id) LOG.warning(msg) return result
def sorted_data_update(sorted_data): for region_data in sorted_data: for server in region_data['managed']: if server.get('os', None) is not None: continue query = ( "SELECT os_type os " "FROM servers " "WHERE server_id='{server_id}'" ).format(server_id=server['server_id']) result = app.scalr_db.execute(query, retries=1) if not result: query = ( "SELECT value AS os " "FROM server_properties " "WHERE server_id='{server_id}' " "AND name='os_type'" ).format(server_id=server['server_id']) result = app.scalr_db.execute(query, retries=1) if not result: server['os'] = 'linux' msg = "Can't detect OS type for server: {0}, set 'linux'".format( server['server_id']) LOG.warning(msg) else: server['os'] = result[0]['os'] for server in region_data['managed']: server['os'] = analytics.os_map[server.get('os', None)] for server in region_data['not_managed']: server['os'] = analytics.os_map[server.get('os', None)]
def _stop(self): LOG.debug(self._stopping_msg) try: if not os.path.exists(self.config['pid_file']): msg = "Can't stop, pid file %s doesn't exist\n" % self.config[ 'pid_file'] sys.stderr.write(helper.colorize(helper.Color.FAIL, msg)) return with file(self.config['pid_file'], 'r') as pf: pid = int(pf.read().strip()) for ps in psutil.process_iter(): if ps.name() == self.name[0:15]: # TODO # SIGINT helper.kill_children(pid) helper.kill(pid) break else: msg = "Process with name {0} doesn't exists".format(self.name) raise Exception(msg) LOG.info('Stopped') helper.delete_file(self.config['pid_file']) except: msg = "Can't stop, reason: {error}".format(error=helper.exc_info()) raise Exception(msg)
def __call__(self): self.change_permissions() while True: try: self.iteration_timestamp = time.time() self.before_iteration() g = self._do_iteration() try: g.get(timeout=self.iteration_timeout) except: self.on_iteration_error() raise finally: if not g.ready(): g.kill() self.after_iteration() except: try: helper.handle_error(message='Iteration failed') except (SystemExit, KeyboardInterrupt): return except: pass time.sleep(self.error_sleep) finally: iteration_time = time.time() - self.iteration_timestamp msg = 'End iteration: {0:.1f} seconds'.format(iteration_time) LOG.debug(msg) if self.config['interval']: next_iteration_time = self.iteration_timestamp + self.config[ 'interval'] sleep_time = next_iteration_time - time.time() if sleep_time: time.sleep(sleep_time)
def _ec2_region(region, cred): try: access_key = cryptotool.decrypt_scalr(app.crypto_key, cred["access_key"]) secret_key = cryptotool.decrypt_scalr(app.crypto_key, cred["secret_key"]) kwds = {"aws_access_key_id": access_key, "aws_secret_access_key": secret_key} proxy_settings = app.proxy_settings.get(cred.platform, {}) kwds["proxy"] = proxy_settings.get("host") kwds["proxy_port"] = proxy_settings.get("port") kwds["proxy_user"] = proxy_settings.get("user") kwds["proxy_pass"] = proxy_settings.get("pass") msg = "List nodes for platform: 'ec2', region: '{}', envs_ids: {}" msg = msg.format(region, cred.envs_ids) LOG.debug(msg) conn = boto.ec2.connect_to_region(region, **kwds) cloud_nodes = _ec2_get_only_instances(conn) timestamp = int(time.time()) nodes = list() for cloud_node in cloud_nodes: node = { "instance_id": cloud_node.id, "instance_type": cloud_node.instance_type, "os": cloud_node.platform if cloud_node.platform else "linux", } nodes.append(node) return {"region": region, "timestamp": timestamp, "nodes": nodes} if nodes else dict() except: e = sys.exc_info()[1] msg = "platform: '{platform}', region: '{region}', envs_ids: {envs_ids}. Reason: {error}" msg = msg.format( platform=cred.platform, region=region, envs_ids=cred.envs_ids, error=helper.exc_info(where=False) ) _handle_exception(e, msg)
def __call__(self): self.change_permissions() while True: try: self.iteration_timestamp = time.time() self.before_iteration() g = self._do_iteration() try: g.get(timeout=self.iteration_timeout) except: self.on_iteration_error() raise finally: if not g.ready(): g.kill() self.after_iteration() except: try: helper.handle_error(message='Iteration failed') except (SystemExit, KeyboardInterrupt): return except: pass time.sleep(self.error_sleep) finally: iteration_time = time.time() - self.iteration_timestamp msg = 'End iteration: {0:.1f} seconds'.format(iteration_time) LOG.debug(msg) if self.config['interval']: next_iteration_time = self.iteration_timestamp + self.config['interval'] sleep_time = next_iteration_time - time.time() if sleep_time: time.sleep(sleep_time)
def do_iteration(self): messages = self.get_messages() if not messages: raise exceptions.NothingToDoError() servers = self.get_servers(messages) servers_map = dict((server['server_id'], server) for server in servers) for message in messages: try: if message['server_id'] not in servers_map: msg = ( "Server '{server_id}' doesn't exist or not in right status, set message " "status to 3").format(server_id=message['server_id']) LOG.warning(msg) message['status'] = 3 self._pool.wait() self._pool.apply_async(self.update, (message, )) else: server = servers_map[message['server_id']] self._pool.wait() self._pool.apply_async(self.process_message, (message, server)) except: msg = "Unable to process message: {message_id}, reason: {error}" msg = msg.format(message_id=message['messageid'], error=helper.exc_info()) LOG.warning(msg) self._pool.join()
def handle_error(message=None, level='exception'): c, e, t = sys.exc_info() if message: message = message.rstrip().rstrip('.') + '. Reason: {}'.format(exc_info()) else: message = exc_info() if isinstance(e, ( KeyboardInterrupt, GeneratorExit, greenlet_mod.GreenletExit, gevent.Timeout, ) ): LOG.debug(message) raise if isinstance(e, SystemExit) and sys.exc_info()[1].args[0] == 0: raise logging_map = { 'debug': LOG.debug, 'info': LOG.info, 'warning': LOG.warning, 'error': LOG.error, 'critical': LOG.critical, 'exception': LOG.exception, } if isinstance(e, pymysql.err.Error): logging_map[min(level, 'error')](message) else: logging_map[level](message)
def delete_file(file_path): msg = "Deleting file: %s" % file_path LOG.debug(msg) if os.path.exists(file_path): try: os.remove(file_path) except: LOG.warning(exc_info())
def delete_file(file_path): msg = "Deleting file: %s" % file_path LOG.debug(msg) if os.path.exists(file_path): try: os.remove(file_path) except: handle_error()
def get_proxy_port(self): for szlr in self.szlrs: #if (szlr['uuid'] == uuid) and\ # (szlr['client_port'] == self.ctl_port): if szlr['client_port'] == str(self.ctl_port): return szlr['proxy_port'] LOG.error("==================get_proxy_port failed! --Chen Leji==================") return None
def wrapper(*args, **kwds): start_time = time.time() try: return f(*args, **kwds) finally: end_time = time.time() msg = 'TIMEIT %s.%s: %s' % (f.__module__, f.__name__, end_time - start_time) LOG.debug(msg)
def execute(self, query, retries=0, retry_timeout=10): while True: try: if self._autocommit or not self._connection: self._local.connection = self._get_connection_from_pool( timeout=30) self._local.connection.autocommit(self._autocommit) self._local.cursor = self._connection.cursor() start_time = time.time() if len(query) > 2000: msg = '%s...' % query[:2000] else: msg = query LOG.debug(msg) try: self._local.cursor.execute(query) results = self._local.cursor.fetchall() finally: end_time = time.time() try: if self._autocommit: self._connection_pool.put(self._local.connection) self._local.cursor.close() self._local.connection = None self._local.cursor = None except: msg = 'MySQL finalize connection error' helper.handle_error(message=msg, level='error') if end_time - start_time > 5: LOG.debug('Query too slow: %s\n%s...' % (end_time - start_time, query[:250])) if results is not None: results = tuple(results) else: results = tuple() return results except exceptions.TimeoutError as e: LOG.warning(e) except (pymysql.err.InternalError, pymysql.err.OperationalError, socket.timeout) as e: if isinstance(e, pymysql.err.InternalError) and e.args[0] == 1213: LOG.warning('MySQL 1213 error, retry') time.sleep(random.randint(0, 20) / 100.0) continue if isinstance( e, pymysql.err.OperationalError) and e.args[0] == 2013: LOG.warning('MySQL 2013 error during query: %s' % query[0:150]) if self._local.connection: self._connection_pool.remove(self._local.connection) self._local.connection.close() self._local.connection = None if not retries: raise retries -= 1 time.sleep(retry_timeout)
def update_nm_usage_h(self, record): try: query = NM_usage_h(record).update_query() self.analytics_db.execute(query, retries=1) except: msg = 'Unable to update nm_usage_h record: {record}, reason: {error}' msg = msg.format(record=record, error=helper.exc_info()) LOG.error(msg) raise
def _set_usage_cost(self, records): prices = self.analytics.get_prices(records) for record in records: cost = self.analytics.get_cost_from_prices(record, prices) or 0 try: record['cost'] = float(cost) * int(record['num']) except: msg = 'Unable to update usage cost, reason: {error}' msg = msg.format(error=helper.exc_info()) LOG.error(msg)
def process_aws_billing(self): if self.args['--recalculate']: return dtime_from, dtime_to = self.get_aws_billing_interval() msg = 'AWS billing interval: {0} - {1}' msg = msg.format(dtime_from, dtime_to) LOG.debug(msg) with self._lock: if not self.aws_billing_dtime_from: self.aws_billing_dtime_from = dtime_from else: self.aws_billing_dtime_from = min(self.aws_billing_dtime_from, dtime_from) for envs in self.analytics.load_envs(): unique = {} for env in envs: if env.get('ec2.detailed_billing.enabled', '0') != '1': continue bucket_name = env['ec2.detailed_billing.bucket'] creds = self.analytics.get_creds([env]) cred = next(cred for cred in creds if cred.platform == 'ec2') unique.setdefault(cred.unique, {'envs_ids': [], 'cred': cred, 'bucket_name': bucket_name}) unique[cred.unique]['envs_ids'].append(env['id']) for data in unique.values(): while len(self.pool) > self.config['pool_size'] * 5 / 10: gevent.sleep(0.1) self.pool.apply_async(self.process_aws_account, args=(data, dtime_from, dtime_to)) self.pool.join() if not self.aws_billing_dtime_from: return dtime_from = self.aws_billing_dtime_from if self.config['dtime_to']: dtime_to = self.config['dtime_to'] else: dtime_hour_ago = datetime.datetime.utcnow() - datetime.timedelta(hours=1) dtime_to = dtime_hour_ago.replace(minute=59, second=59, microsecond=999999) # fill farm_usage_d dtime_cur = dtime_from while dtime_cur <= dtime_to: date, hour = dtime_cur.date(), dtime_cur.hour try: self.analytics.fill_farm_usage_d(date, hour) except: msg = 'Unable to fill farm_usage_d table for date {0}, hour {1}'.format(date, hour) LOG.exception(msg) dtime_cur += datetime.timedelta(hours=1)