示例#1
0
def data_process(ecs_lines, input_lines):
    print '-----start data processing-----'
    # # ecs data
    time11 = time.time()

    # ecs_data_lg = {}
    ecs_data_lr = {}
    for line in ecs_lines:
        ecs_contents = line.split('\t')
        flavor = int(ecs_contents[1][6:])
        if flavor <= 15:
            date = ecs_contents[2].split('\r')[0][:10].replace('-', '')
            if date not in ecs_data_lr:
                # ecs_data_lg[date] = Tool.zeros(NUM_OF_FLAVOR)
                # ecs_data_lg[date][flavor - 1] += 1  # 大bug 大bug!!!  分更低

                ecs_data_lr[date] = Tool.zeros(NUM_OF_FLAVOR)
                ecs_data_lr[date][flavor - 1] += 1  # 大bug 大bug!!!  分更低
            else:
                # ecs_data_lg[date][flavor - 1] += 1

                ecs_data_lr[date][flavor - 1] += 1
    # print 'ecs data example: ', ecs_data_lg.items()[0]

    # input data
    input_contents = []
    for line in input_lines:
        input_contents.append(line)
    phy_server = [int(x) for x in input_contents[0][:-WIN_OR_LINUX].split(' ')]
    virtual_info = {}
    virtual_num = int(input_contents[2][:-WIN_OR_LINUX])
    for i in range(virtual_num):
        temp = input_contents[3 + i][:-WIN_OR_LINUX].split(' ')
        virtual_info[int(temp[0][6:])] = [int(temp[1]), int(temp[2]) // 1024]
    if input_contents[-4].split('\r')[0] == 'CPU':
        cpu_if = 1
    else:
        cpu_if = 0
    # start_date = time.mktime(time.strptime(input_contents[-2].split('\r')[0], '%Y-%m-%d %H:%M:%S'))
    # end_date = time.mktime(time.strptime(input_contents[-1].split('\r')[0], '%Y-%m-%d %H:%M:%S'))
    start_date_temp = input_contents[-2].split('\r')[0][:10]
    start_date = [int(start_date_temp[:4]), int(start_date_temp[5:7]), int(start_date_temp[8:])]
    end_date_temp = input_contents[-1].split('\r')[0][:10]
    end_date = [int(end_date_temp[:4]), int(end_date_temp[5:7]), int(end_date_temp[8:])]

    print 'physical server info : %s' % phy_server
    print 'number of virtual machine : %s' % virtual_num
    print 'info of virtual machine: %s' % virtual_info
    print 'cpu & mem switch : %s' % cpu_if
    print "start date is: {}\nend date is: {}".format(start_date, end_date)

    time12 = time.time()
    print 'data prcess time using: {}'.format(time12 - time11)
    print '-----end data processing-----'

    return ecs_data_lr, phy_server, virtual_num, virtual_info, \
           cpu_if, start_date_temp, end_date_temp
示例#2
0
def magic(ecs_data, start, early_round, interval):
    print '-----start magic-----'

    end_date = start
    start_date = end_date - datetime.timedelta(days=interval)

    count_list = [[0 for i in range(NUM_OF_FLAVOR)] for j in range(early_round)]
    count_list_sum = [0 for j in range(early_round)]
    for i in range(early_round):
        count_list[i] = Tool.zeros(NUM_OF_FLAVOR)
        end_date -= datetime.timedelta(days=1)
        for date, flavor in ecs_data.items():
            date = datetime.datetime.strptime(date, '%Y%m%d')
            j = 0
            if end_date >= date >= start_date:
                for temp in flavor:
                    count_list[i][j] += temp
                    j += 1
        count_list_sum[i] = sum(count_list[i])
        start_date -= datetime.timedelta(days=1)
    print '-----end magic-----'
    return count_list
示例#3
0
def predict(lg_count_list,
            lr_count_list,
            rf_count_list,
            es_count_list,
            virtual_info,
            lg_round,
            mix_rf,
            mix_lr,
            mix_es,
            rf_day_gap,
            lr_day_gap,
            es=3,
            alpha=0.5,
            seed=1000,
            floor=0.0,
            rf_diff=0):
    print '-----start predict-----'
    lg_predict_result = {}
    rf_predict_result = {}
    lr_predict_result = {}
    es_predict_result = {}

    for flavor, info in virtual_info.items():

        # -----------  拉格朗日  ----------
        lg_window = []
        for i in range(lg_round):
            lg_window.append(
                # Tool.mid([lg_count_list[j][flavor - 1] for j in range(i - 7, i + 7) if 0 <= j < lg_round]))
                Tool.mid([
                    lg_count_list[j][flavor - 1] for j in range(i - 3, i + 3)
                    if 0 <= j < lg_round
                ]))
        # window_list = [Tool.mean(lg_window[15:29]), Tool.mean(lg_window[0:29]), Tool.mean(lg_window[0:14])]
        window_list = [
            Tool.mean(lg_window[4:7]),
            Tool.mean(lg_window[0:7]),
            Tool.mean(lg_window[0:3])
        ]
        lg_predict_result[flavor] = max(
            int(Tool.LG(3, list(range(0, 3)), window_list)), 0)

        # -----------  随机森林  ----------
        rf_predict_list = []
        temp_list = []
        for i in range(len(rf_count_list)):
            # temp_list.append(Tool.mid([rf_count_list[j][flavor - 1] for j in range(i - 4, i + 4) if 0 <= j < len(rf_count_list)]))
            temp_list.append(rf_count_list[i][flavor - 1])
        test = temp_list[:rf_day_gap - 1][::-1]

        diff_list = Tool.line_diff(temp_list)

        if rf_diff == 0:
            for j in range(len(rf_count_list) - rf_day_gap):
                rf_predict_list.append(temp_list[j:j + rf_day_gap][::-1])
            my_labels = [i for i in range(rf_day_gap)]
            rf_result = rf_predict(rf_predict_list, my_labels, test, seed)
            rf_predict_result[flavor] = rf_result
        else:
            for j in range(len(rf_count_list) - rf_day_gap):
                rf_predict_list.append(diff_list[j:j + rf_day_gap][::-1])
            my_labels = [i for i in range(rf_day_gap)]
            rf_result = rf_predict(rf_predict_list, my_labels, test, seed)
            rf_predict_result[flavor] = rf_result + temp_list[-1]
        # -----------  线性回归  ----------
        lr_window = []
        lr_data = []
        for i in range(20 - lr_day_gap + 1):
            for j in range(1, lr_day_gap):
                # lr_window.append(lr_count_list[i + j][int(flavor) - 1])
                lr_window.append(
                    Tool.mid([
                        lr_count_list[i + jj][int(flavor) - 1]
                        for jj in range(j - 3, j + 3) if 0 <= jj < lr_day_gap
                    ]))
                # lr_window.append(Tool.mid([lr_count_list[i+jj][int(flavor) - 1] for jj in range(j - 3, j + 3) if 0 <= jj < lr_day_gap]))
            lr_window.append(lr_count_list[i][int(flavor) - 1])
            lr_data.append(lr_window)
            lr_window = []
        # print lr_data
        w = Tool.zeros(lr_day_gap)
        w = linear_regression(w, lr_data, 0.02, 700)  # 0.03 500
        x = [lr_data[0][lr_day_gap - 1]] + lr_data[0][:lr_day_gap - 2]
        x = x + [u * u for u in x]
        max_x = max(x)
        min_x = min(x)
        if max_x - min_x > 0:
            x = [1] + [(i - (sum(x) / len(x))) / (max_x - min_x) for i in x]
        else:
            x = [1] + x
        # lr_predict_result[flavor] = max(int(LR.w_mul_x(w, x)), 0)
        lr_predict_result[flavor] = max(w_mul_x(w, x), 0)

        # ---------- 指数平滑 -------------
        es_predict_result = es_predict(es_count_list, virtual_info, es, alpha)

    print 'LG预测结果:{}'.format(lg_predict_result)
    print 'LR预测结果:{}'.format(lr_predict_result)
    print 'RF预测结果:{}'.format(rf_predict_result)
    print 'ES预测结果:{}'.format(es_predict_result)

    predict_result = lg_predict_result.copy()
    for key, value in predict_result.items():
        # predict_result[key] = int(MIX_NUM*lg_predict_result[key] + (1-MIX_NUM)*lr_predict_result[key])  # 向下取整
        predict_result[key] = int(
            mix_lr * lr_predict_result[key] + mix_rf * rf_predict_result[key] +
            mix_es * es_predict_result[key] +
            (1 - mix_lr - mix_rf - mix_es) * lg_predict_result[key] +
            floor)  # 四舍五入
        # predict_result[key] = int(
        #     mix_lr * lr_predict_result[key] + mix_rf * rf_predict_result[key] + mix_es * es_predict_result[key] +
        #     (1 - mix_lr - mix_rf - mix_es) * lg_predict_result[key])  # 地板除

    print '最终预测结果:{}'.format(predict_result)
    print '-----end predict-----'
    return predict_result