def get_model_stats_from_es(cfg, model_name, model_version): ''' [{'date': '2020-01-17', 'model': {'name': 's32', 'version': 1}, 'stats': {'g_g_m': [0.32095959595959594, 0.4668649491714752], 'g_g_f': [0.3654040404040404, 0.4815635452904544], 'g_g_x': [0.31363636363636366, 0.46398999646418304], 'a_1': [0.198989898989899, 0.3992572317838901], 'a_2': [0.2474747474747475, 0.4315630593164027], 'a_3': [0.295959595959596, 0.45649211860504146], 'a_4': [0.25757575757575757, 0.43731748751040456], 't_3G': [0.0, 1.0], 't_4G': [0.0, 1.0], 'si_1': [0.37424242424242427, 0.4839470491115894], 'si_2': [0.4042929292929293, 0.49077533664980666], 'si_3': [0.22146464646464648, 0.4152500106648333], 'price_cat_0': [0.0, 1.0], 'price_cat_1': [0.3333333333333333, 0.4714243623012701], 'price_cat_2': [0.3333333333333333, 0.47142436230126994], 'price_cat_3': [0.3333333333333333, 0.47142436230126994], 'holiday_stats': [0.044444444444444446, 0.20723493215097805]}}] ''' es = ESClient(cfg['es_host'], cfg['es_port'], cfg['es_model_index'], cfg['es_model_type']) body = { "query": { "bool": { "must": [{ "match": { "model.name": model_name } }, { "match": { "model.version": model_version } }] } } } doc = es.search(body) if doc == None or len(doc) != 1: raise Exception('model/version {}/{} not valid'.format( model_name, model_version)) return doc[0]
def test_es_predictions_search(self): es_client_predictions = ESClient(self.cfg['es_host'], self.cfg['es_port'], self.cfg['es_predictions_index'], self.cfg['es_predictions_type']) predictions = es_client_predictions.search({"size": 100}) self.assertTrue(len(predictions) > 0) self.assertTrue(len(predictions) >= 40)
def test_4(cfg): es_client_booking = ESClient(cfg['es_host'], cfg['es_port'], cfg['es_booking_index'], cfg['es_booking_type']) bookings = es_client_booking.search({}) # get at most 1000 results for now bookings = optimizer.util.adjust_booking_dates(bookings) bookings_map = optimizer.util.get_bookings_map(bookings) ands = ['b1', 'b2'] day = cfg['today'] r = optimizer.main.get_bb_count(cfg, bookings_map)(ands, [], day) print(r)
def test_2(cfg): es_client_tbr = ESClient(cfg['es_host'], cfg['es_port'], cfg['es_tbr_index'], cfg['es_tbr_type']) es_client_booking = ESClient(cfg['es_host'], cfg['es_port'], cfg['es_booking_index'], cfg['es_booking_type']) bookings = es_client_booking.search({}) # get at most 1000 results for now bookings_map = optimizer.util.get_bookings_map(bookings) ands = ['b1', 'b2'] query = optimizer.dao.query_builder.get_tbr_ratio(ands, bookings_map, es_client_tbr) print(query)
def test_3(cfg): es_client_predictions = ESClient(cfg['es_host'], cfg['es_port'], cfg['es_predictions_index'], cfg['es_predictions_type']) es_client_booking = ESClient(cfg['es_host'], cfg['es_port'], cfg['es_booking_index'], cfg['es_booking_type']) bookings = es_client_booking.search({}) # get at most 1000 results for now bookings_map = optimizer.util.get_bookings_map(bookings) ands = ['b1', 'b2'] day = cfg['today'] (query, result) = optimizer.dao.query_builder.get_prediction_count( ands, [], bookings_map, day, es_client_predictions) print(query) print(result)
def test_1(cfg): es_client_predictions = ESClient(cfg['es_host'], cfg['es_port'], cfg['es_predictions_index'], cfg['es_predictions_type']) es_client_booking = ESClient(cfg['es_host'], cfg['es_port'], cfg['es_booking_index'], cfg['es_booking_type']) bookings = es_client_booking.search({}) # get at most 1000 results for now bookings_map = optimizer.util.get_bookings_map(bookings) ands = ['b6'] ors = ['b7'] day = cfg['today'] day = optimizer.util.convert_date(day) query = optimizer.dao.query_builder.get_prediction_count( ands, ors, bookings_map, day, es_client_predictions) print(query)
def get_model_stats(cfg, model_name, model_version): es = ESClient(cfg['es_host'], cfg['es_port'], cfg['es_model_index'], cfg['es_model_type']) body = { "query": {"bool": {"must": [ {"match": { "model.name": model_name }}, {"match": { "model.version": model_version }} ]}} } doc = es.search(body) if doc == None or len(doc) != 1: raise Exception( 'model/version {}/{} not valid'.format(model_name, model_version)) return doc[0]
def test_5(cfg): es_client_booking = ESClient(cfg['es_host'], cfg['es_port'], cfg['es_booking_index'], cfg['es_booking_type']) bookings = es_client_booking.search({}) # get at most 1000 results for now bookings = optimizer.util.adjust_booking_dates(bookings) bookings_map = optimizer.util.get_bookings_map(bookings) ands = ['b1', 'b2'] day = cfg['today'] df_day = day df_ands = ['b1'] df_allocated = {} # inventory of bb (df row) df_amount = 4000 booking = bookings[0] # total inventory of connected resources total_inventory = 10000 h, _ = optimizer.algo.hwm.update_allocation_for_booking( None, day, booking, total_inventory) r = h(df_day, df_ands, df_allocated, df_amount) print(r)
def run(cfg): global hive_context sc = SparkContext() hive_context = HiveContext(sc) sc.setLogLevel('WARN') # ESClient requires host ip es_client_booking = ESClient(cfg['es_host'], cfg['es_port'], cfg['es_booking_index'], cfg['es_booking_type']) bookings = es_client_booking.search({}) # get at most 1000 results for now bookings = optimizer.util.filter_valid_bookings(bookings) # adjust dates in bookings bookings = optimizer.util.adjust_booking_dates(bookings) bookings_map = optimizer.util.get_bookings_map(bookings) df = hive_context.createDataFrame(sc.emptyRDD(), optimizer.util.get_common_pyspark_schema()) today = cfg['today'] # YYYY-MM-DD days = optimizer.util.get_days_from_bookings(today, bookings) df = generate_resources(cfg, df, bookings_map, days, bookings, hive_context) # Row(day='2018-04-02', ands=['b1', 'b3', 'b2'], minus=[], allocated={}, amount=43562) print('defining resources') df.cache() print(df.take(1)) # run the allocation df = hwm_allocation(df, bookings, days) # Row(day='2018-04-02', ands=['b1', 'b3', 'b2'], minus=[], amount=43562, allocated={'b2': 800, 'b3': 1000, 'b1': 500}) print('bb-bookings allocation') df.cache() print(df.take(1)) # lock bookings lock_booking(es_client_booking, True) # remove bbs remove_booking_buckets(cfg, days) # save new booking-buckets into es df = save_booking_buckets_in_es(cfg, df) print('bbs saved') df.cache() print(df.take(1)) # unlock bookings lock_booking(es_client_booking, False) day = days[-1] tomorrow = optimizer.util.get_next_date(day) # use only tomorrow to create the allocation plan df = df.filter(df.day == tomorrow) # this method add the bbs ucdocs allocation_map with their values df = add_ucdoc_bb_allocation_map(cfg, df, bookings_map) # [Row(day='2018-04-02', ands=['b1', 'b3', 'b2'], minus=[], amount=43562, allocated={'b2': 800, 'b3': 1000, 'b1': 500}, allocation_map={'minusonepage,3,5G,g_x,2,pt,1002,icc': {'b2': 1, 'b3': 2, 'b1': 1}, 'magazinelock,2,3G,g_x,3,pt,1005,icc': {'b2': 56, 'b3': 70, 'b1': 35}, 'magazinelock,2,4G,g_x,3,pt,1005,icc': {'b2': 56, 'b3': 70, 'b1': 35}, 'minusonepage,3,5G,g_x,2,pt,1003,icc': {'b2': 6, 'b3': 8, 'b1': 4}, 'minusonepage,1,4G,g_x,2,pt,1003,icc': {'b2': 16, 'b3': 20, 'b1': 10}, 'minusonepage,2,4G,g_f,4,pt,1002,icc': {'b2': 12, 'b3': 15, 'b1': 8}, 'cloudFolder,2,5G,g_x,3,pt,1005,icc': {'b2': 57, 'b3': 72, 'b1': 36}, 'minusonepage,2,3G,g_x,3,pt,1002,icc': {'b2': 3, 'b3': 4, 'b1': 2}, 'minusonepage,1,3G,g_x,1,pt,1005,icc': {'b2': 27, 'b3': 33, 'b1': 17}, 'minusonepage,1,3G,g_x,4,pt,1004,icc': {'b2': 72, 'b3': 90, 'b1': 45}, 'magazinelock,2,5G,g_x,4,pt,1004,icc': {'b2': 32, 'b3': 40, 'b1': 20}, 'cloudFolder,2,3G,g_f,3,pt,1002,icc': {'b2': 16, 'b3': 20, 'b1': 10}, 'cloudFolder,3,5G,g_f,2,pt,1004,icc': {'b2': 27, 'b3': 34, 'b1': 17}})] print('ucdocs-bookings allocation') df.cache() print(df.take(1)) # at this point we have a df which is a allocation of bookings to bbs df = df.select(df.day, explode(df.allocation_map)) # Row(day='2018-04-02', key='magazinelock,3,5G,g_x,2,pt,1004,icc', value={'b2': 14, 'b3': 18, 'b1': 9}) print('exploded') df.cache() print(df.take(1)) # agg all the allocation maps for a ucdoc _map_type = MapType(StringType(), IntegerType()) _audf = udf(agg_allocation_maps, _map_type) df = df.groupBy('key').agg(_audf(collect_list('value')).alias('allmap')) # [Row(key='cloudFolder,3,5G,g_f,2,pt,1004,icc', allmap={'b2': 27, 'b3': 34, 'b1': 17})] print('final aggregation') df.cache() print(df.take(1)) # writing into hdfs filename = 'allmap-{}-{}'.format( optimizer.util.convert_date_remove_dash(day), str(int(time.time()))) df.write.save(filename, format='json')
from imscommon.es.ims_esclient import ESClient from pyspark import SparkContext, SparkConf, Row from pyspark.sql.functions import concat_ws, count, lit, col, udf, expr, collect_list from pyspark.sql import HiveContext from pyspark.sql.types import IntegerType, StringType import math # read es es_host = '10.193.217.111' es_port = '9200' es_index = 'predictions_02052020' es_type = 'doc' es = ESClient(es_host, es_port, es_index, es_type) hits = es.search({"size": 1000}) es_records = {} for ucdoc in hits: uckey = ucdoc['uckey'] predictions = ucdoc['ucdoc']['predictions'] for day, hours in predictions.items(): hour = -1 for hour_doc in hours: hour += 1 es_records[(uckey, day, hour, '0')] = hour_doc['h0'] es_records[(uckey, day, hour, '1')] = hour_doc['h1'] es_records[(uckey, day, hour, '2')] = hour_doc['h2'] es_records[(uckey, day, hour, '3')] = hour_doc['h3'] # print(next(iter(es_records.items()))) # print('************')