def process_envs(self, envs, dtime_from, dtime_to): envs_ids = list(set([env['id'] for env in envs])) try: self.load_access_token(envs[0]) subscription_id = envs[0]['azure.subscription_id'] access_token = envs[0]['azure.access_token'] begin_of_month = dtime_from.replace(day=1, hour=0) meters_ids_usage = self.get_meters_ids_usage(subscription_id, access_token, begin_of_month, dtime_from) for rows in self.get_usage(subscription_id, access_token, dtime_from, dtime_to): records = self.get_records(rows) records = [record for record in records if not (record.get('env_id') and record['env_id'] not in envs_ids)] self.load_records_data(records) records = [record for record in records if record['env_id'] in envs_ids] for record in records: meters_ids_usage.setdefault(record['meter_id'], {}).setdefault(record['dtime'].month, 0.0) self.set_cost(record, subscription_id, access_token, meters_ids_usage[record['meter_id']][record['dtime'].month]) meters_ids_usage[record['meter_id']][record['dtime'].month] += record['quantity'] for chunk in helper.chunks(records, insert_chunk_size): self.pool.wait() self.pool.apply_async(self.analytics.insert_records, (chunk,), {'callback': self.on_insert_records}) gevent.sleep(0) # force switch except: msg = 'Azure billing for environments {} failed' msg = msg.format(envs_ids) helper.handle_error(message=msg)
def __call__(self): try: dtime_from, dtime_to = self.get_billing_interval() msg = 'AWS billing interval: {} - {}'.format(dtime_from, dtime_to) LOG.info(msg) self._create_cache_dir() aws_accounts_ids = self.analytics.load_aws_accounts_ids() for chunk in helper.chunks(aws_accounts_ids, 100): envs = self.analytics.load_aws_accounts_ids_envs(chunk) envs = [env for env in envs if env.get('ec2.is_enabled', '0') == '1'] self.analytics.load_env_credentials(envs, platform='ec2') envs = [env for env in envs if env.get('ec2.detailed_billing.enabled', '0') == '1' and env.get('ec2.detailed_billing.payer_account') in (None, '')] if not envs: continue self._wait_pool() self.pool.apply_async(self.process_envs, args=(envs, dtime_from, dtime_to)) aws_payers_accounts = self.analytics.load_aws_payers_accounts() for chunk in helper.chunks(aws_payers_accounts, 100): envs = self.analytics.load_aws_payers_accounts_envs(chunk) envs = [env for env in envs if env.get('ec2.is_enabled', '0') == '1'] self.analytics.load_env_credentials(envs, platform='ec2') envs = [env for env in envs if env.get('ec2.detailed_billing.enabled', '0') == '1'] if not envs: continue self._wait_pool() self.pool.apply_async(self.process_envs, args=(envs, dtime_from, dtime_to)) self.pool.join() except: self.pool.kill() helper.handle_error(message='AWS billing failed') raise finally: self.downloading_locks = {} try: self._remove_cache_dir() except: msg = 'Unable to remove cache dir {}' msg = msg.format(self.cache_dir) helper.handle_error(message=msg, level='error')
def sort_nodes(cloud_data, cred, envs_ids): platform = cred.platform # gce if platform == 'gce': query = ("SELECT server_id " "FROM servers_history " "WHERE server_id IN ({})") for region_data in cloud_data: region_data['managed'] = list() region_data['not_managed'] = list() servers_ids = [ str(node['server_id']) for node in region_data['nodes'] if node.get('server_id') ] if servers_ids: results = [ result['server_id'] for result in app.scalr_db.execute( query.format(str(servers_ids)[1:-1])) ] else: results = [] for node in region_data['nodes']: if node.get('server_id') and node[ 'server_id'] in results and node['env_id'] in envs_ids: region_data['managed'].append(node) else: region_data['not_managed'].append(node) del region_data['nodes'] return cloud_data # all platforms exclude gce url_key = analytics.url_key_map[platform] url = cred[url_key] if url_key else '' for region_data in cloud_data: cloud_location = region_data['region'] for chunk in helper.chunks(region_data['nodes'], 200): app.analytics.get_server_id_by_instance_id(chunk, platform, cloud_location, envs_ids=envs_ids, url=url) region_data['managed'] = list() region_data['not_managed'] = list() for node in region_data['nodes']: if 'server_id' in node: region_data['managed'].append(node) else: region_data['not_managed'].append(node) del region_data['nodes'] return cloud_data
def sort_nodes(cloud_data, cred, envs_ids): platform = cred.platform # gce if platform == 'gce': query = ( "SELECT server_id " "FROM servers_history " "WHERE server_id IN ({})" ) for region_data in cloud_data: region_data['managed'] = list() region_data['not_managed'] = list() servers_ids = [str(node['server_id']) for node in region_data['nodes'] if node.get('server_id')] if servers_ids: results = [result['server_id'] for result in app.scalr_db.execute(query.format(str(servers_ids)[1:-1]))] else: results = [] for node in region_data['nodes']: if node.get('server_id') and node['server_id'] in results and node['env_id'] in envs_ids: region_data['managed'].append(node) else: region_data['not_managed'].append(node) del region_data['nodes'] return cloud_data # all platforms exclude gce url_key = analytics.url_key_map[platform] url = cred[url_key] if url_key else '' for region_data in cloud_data: cloud_location = region_data['region'] for chunk in helper.chunks(region_data['nodes'], 200): app.analytics.get_server_id_by_instance_id(chunk, platform, cloud_location, envs_ids=envs_ids, url=url) region_data['managed'] = list() region_data['not_managed'] = list() for node in region_data['nodes']: if 'server_id' in node: region_data['managed'].append(node) else: region_data['not_managed'].append(node) del region_data['nodes'] return cloud_data
def sort_nodes(cloud_data, cred, envs_ids): platform = cred.platform # gce if platform == 'gce': query = ("SELECT EXISTS " "(SELECT 1 FROM servers s " "JOIN servers_history h " "ON s.server_id=h.server_id " "WHERE s.server_id='{server_id}') AS value") for region_data in cloud_data: region_data['managed'] = list() region_data['not_managed'] = list() for node in region_data['nodes']: if node.get('server_id', '') and \ app.scalr_db.execute(query.format(**node))[0]['value'] and \ node['env_id'] in envs_ids: region_data['managed'].append(node) else: region_data['not_managed'].append(node) del region_data['nodes'] return cloud_data # all platforms exclude gce url_key = analytics.url_key_map[platform] url = cred[url_key] if url_key else '' for region_data in cloud_data: cloud_location = region_data['region'] for chunk in helper.chunks(region_data['nodes'], 200): app.analytics.get_server_id_by_instance_id(chunk, platform, cloud_location, envs_ids=envs_ids, url=url) region_data['managed'] = list() region_data['not_managed'] = list() for node in region_data['nodes']: if 'server_id' in node: region_data['managed'].append(node) else: region_data['not_managed'].append(node) del region_data['nodes'] return cloud_data
def sort_nodes(cloud_data, cred, envs_ids): platform = cred.platform # gce if platform == 'gce': query = ( "SELECT EXISTS " "(SELECT 1 FROM servers s " "JOIN servers_history h " "ON s.server_id=h.server_id " "WHERE s.server_id='{server_id}') AS value" ) for region_data in cloud_data: region_data['managed'] = list() region_data['not_managed'] = list() for node in region_data['nodes']: if node.get('server_id', '') and \ app.scalr_db.execute(query.format(**node))[0]['value'] and \ node['env_id'] in envs_ids: region_data['managed'].append(node) else: region_data['not_managed'].append(node) del region_data['nodes'] return cloud_data # all platforms exclude gce url_key = analytics.url_key_map[platform] url = cred[url_key] if url_key else '' for region_data in cloud_data: cloud_location = region_data['region'] for chunk in helper.chunks(region_data['nodes'], 200): app.analytics.get_server_id_by_instance_id(chunk, envs_ids, platform, cloud_location, url) region_data['managed'] = list() region_data['not_managed'] = list() for node in region_data['nodes']: if 'server_id' in node: region_data['managed'].append(node) else: region_data['not_managed'].append(node) del region_data['nodes'] return cloud_data
def process_csv_file(self, csv_file, envs, dtime_from=None, dtime_to=None): envs_ids = list(set(int(env['id']) for env in envs)) for rows in self.csv_reader(csv_file, envs, dtime_from=dtime_from, dtime_to=dtime_to): records = self.get_records(rows) self.fix_records_with_missing_server_id(records) records = [r for r in records if r.get('server_id') and not (r.get('env_id') and r['env_id'] not in envs_ids) and not (r['cost_distr_type'] == 1 and self.analytics.record_exists(r))] self.load_records_data(records) records = [record for record in records if record['env_id'] in envs_ids] # remove duplicates record with same record_id records = {record['record_id']: record for record in records}.values() for chunk in helper.chunks(records, insert_chunk_size): self.pool.wait() self.pool.apply_async(self.analytics.insert_records, (chunk,), {'callback': self.on_insert_records}) gevent.sleep(0) # force switch
def __call__(self): try: dtime_from, dtime_to = self.get_billing_interval() msg = 'Azure billing interval: {} - {}'.format(dtime_from, dtime_to) LOG.info(msg) azure_subscriptions_ids = self.analytics.load_azure_subscriptions_ids() for chunk in helper.chunks(azure_subscriptions_ids, 100): envs = self.analytics.load_azure_subscriptions_ids_envs(chunk) self.analytics.load_env_credentials(envs, platform='azure') if not envs: continue self._wait_pool() self.pool.apply_async(self.process_envs, args=(envs, dtime_from, dtime_to)) self.pool.join() except: self.pool.kill() helper.handle_error(message='Azure billing failed') raise
def __call__(self): try: dtime_from, dtime_to = self.get_billing_interval() LOG.info('Scalr Poller billing interval: {} - {}'.format(dtime_from, dtime_to)) dtime_cur = dtime_from while dtime_cur <= dtime_to: date, hour = dtime_cur.date(), dtime_cur.hour for platform in self.config['platform']: try: msg = "Process Scalr Poller data, date {}, hour {}, platform '{}'" msg = msg.format(date, hour, platform) LOG.debug(msg) for records in self.analytics.get_poller_servers(date, hour, platform=platform): LOG.debug('Scalr Poller records for processing: {}'.format(len(records))) prices = self.analytics.get_prices(records) for record in records: cost = self.analytics.get_cost_from_prices(record, prices) or 0 record['cost'] = cost record['num'] = 1.0 record['cost_distr_type'] = 1 for chunk in helper.chunks(records, insert_chunk_size): self.pool.wait() self.pool.apply_async(self.analytics.insert_records, (chunk,), {'callback': self.on_insert_records}) gevent.sleep(0) # force switch except: msg = "Scalr Poller billing unable to process date {}, hour {}, platform '{}'" msg = msg.format(date, hour, platform) helper.handle_error(message=msg) self.pool.join() dtime_cur += datetime.timedelta(hours=1) except: self.pool.kill() helper.handle_error(message='Scalr Poller billing failed') raise
def delete_data(self, csv_file, envs, period): envs_ids = list(set(int(env['id']) for env in envs)) dtime_from, dtime_to = period msg = 'Deleting AWS detailed billing data for environments: {}, period: {} - {}' msg = msg.format(envs_ids, dtime_from, dtime_to) LOG.info(msg) with self.analytics.lock: self.analytics.analytics_db.autocommit(False) try: # aws_billing_records for rows in self.csv_reader(csv_file, envs, dtime_from=dtime_from, dtime_to=dtime_to): records_ids = [row['RecordId'] for row in rows] for chunk in helper.chunks(records_ids, 1000): if chunk: query = ( "DELETE FROM aws_billing_records " "WHERE record_id IN ({record_id})" ).format(record_id=str(chunk)[1:-1]) self.analytics.analytics_db.execute(query) _dtime_from = dtime_from step_days = 15 while _dtime_from < dtime_to: _dtime_to = min(_dtime_from + datetime.timedelta(days=step_days), dtime_to) # usage_servers_h, usage_h query = ( "DELETE uh, us " "FROM usage_h uh " "LEFT JOIN usage_servers_h us ON uh.usage_id=us.usage_id " "WHERE uh.platform='ec2' " "AND uh.dtime BETWEEN '{dtime_from}' AND '{dtime_to}' " "AND uh.env_id IN ({env_id})" ).format(env_id=str(envs_ids)[1:-1], dtime_from=_dtime_from, dtime_to=_dtime_to) self.analytics.analytics_db.execute(query) # usage_d query = ( "DELETE FROM usage_d " "WHERE platform='ec2' " "AND date BETWEEN '{date_from}' AND '{date_to}' " "AND env_id IN ({env_id})" ).format(env_id=str(envs_ids)[1:-1], date_from=_dtime_from.date(), date_to=_dtime_to.date()) self.analytics.analytics_db.execute(query) # farm_usage_d query = ( "DELETE FROM farm_usage_d " "WHERE platform='ec2' " "AND date BETWEEN '{date_from}' AND '{date_to}' " "AND env_id IN ({env_id})" ).format(env_id=str(envs_ids)[1:-1], date_from=_dtime_from.date(), date_to=_dtime_to.date()) self.analytics.analytics_db.execute(query) _dtime_from += datetime.timedelta(days=step_days) self.analytics.analytics_db.commit() except: self.analytics.analytics_db.rollback() raise finally: self.analytics.analytics_db.autocommit(True)