def process4f(filename, date):
    # read target day's total logs: 2017-09-01.log
    with open(filename, 'r') as f:
        lines = f.readlines()
    data = np.array(lines)

    global res
    for i in range(len(data)):
        # data[i]: 2017-08-27(ds[0])  00:13:0(ds[1])  3997(ds[2])    0(ds[3])
        ds = str.split(data[i], '\t')

        tran = []

        # index: 当前分钟索引
        tran.append(i)

        # count: 交易量
        tran.append(float(ds[2]))

        # isWeekday: 工作日
        if func.isWeekday(ds[0]) == 1:
            tran.append(1)
        else:
            tran.append(0)

        # 每周的第几天
        func.appendWeekDayIndex(tran, ds[0])

        res.append(tran)
    return
def process8f(filename, date, sum_data, last_day):
    # read target day's total logs: 2017-09-01.log
    with open(filename, 'r') as f:
        lines = f.readlines()
    data = np.array(lines)

    global res
    for i in range(len(data)):
        # data[i]: 2017-08-27(ds[0])  00:13:0(ds[1])  3997(ds[2])    0(ds[3])
        ds = str.split(data[i], '\t')

        tran = []

        #1 index: 当前分钟索引
        tran.append(i)

        #2 count: 交易量
        tran.append(float(ds[2]))

        #3 isWeekday: 工作日
        if func.isWeekday(ds[0]) == 1:
            tran.append(1)
        else:
            tran.append(0)

        #4 每天的第几个小时
        func.appendHourDayIndex(tran, ds[1])

        #5 每周的第几天
        func.appendWeekDayIndex(tran, ds[0])

        #6 计算相应index分钟的5天或者2天的访问量的平均值
        mean_index = 0.0
        if sum_data is not None:
            if func.isWeekday(date) == 1:  # 前5个工作日的相应index分钟的平均访问量
                mean_index = sum_data[i] / 5.0
            else:  # 前2个周末日的相应index分钟的平均访问量
                mean_index = sum_data[i] / 2.0
            tran.append(float(mean_index))
        else:
            tran.append(float(ds[2]))

        #7 上一周该天相应index分钟的访问量
        if last_day is not None:
            tran.append(float(last_day[i]))
        else:
            tran.append(float(ds[2]))

        #8 是否是节假日
        holiday = func.isHoliday(ds[0])
        tran.append(holiday)

        res.append(tran)
    return
Пример #3
0
def create_next_min_8feature_vector(index, prev_seq, predict_count, count_mean,
                                    count_std, sum_data, last_weekday_data):

    today = datetime.datetime.now().strftime('%Y-%m-%d')
    s = str.split(today, '-')
    # 创造一天的开始时间, eg: 2018-01-01 00:00:0
    d0 = datetime.datetime(int(s[0]), int(s[1]), int(s[2]), 0, 0, 0)
    # 增加index分钟
    d1 = d0 + datetime.timedelta(minutes=index)
    ss = str.split((str)(d1), ' ')  # 2018-01-01(ss[0]) 00:00:0(ss[1])

    # 开始构造
    tran = []
    # index: 当前分钟索引
    tran.append(index)

    # count: 交易量
    tran.append(predict_count * count_std + count_mean)

    # isWeekday: 工作日
    if func.isWeekday(today) == 1:
        tran.append(1)
    else:
        tran.append(0)

    # 每天的第几个小时
    func.appendHourDayIndex(tran, ss[1])

    # 每周的第几天
    func.appendWeekDayIndex(tran, today)

    # 平均值
    mean_index = 0.0
    if func.isWeekday(today) == 1:  # 前5个工作日的相应index分钟的平均访问量
        mean_index = sum_data[index] / 5.0
    else:  # 前2个周末日的相应index分钟的平均访问量
        mean_index = sum_data[index] / 2.0
    tran.append(float(mean_index[0]))

    # 上一周该天相应index分钟的访问量
    if last_weekday_data is not None:
        tran.append(float(last_weekday_data[index]))
    else:
        print('haha')
        tran.append(float(data[index]['count']))

    # 是否是节假日
    holiday = func.isHoliday(today)
    tran.append(holiday)

    return tran
Пример #4
0
def create_features_vector(data, date, sum_data, last_weekday_data):
    res = []

    for i in range(len(data)):
        tran = []
        # index: 当前分钟索引
        index = min2index(data[i]['time'])
        tran.append(index)

        # count: 交易量
        tran.append(data[i]['count'])

        # isWeekday: 工作日
        if func.isWeekday(data[i]['date']) == 1:
            tran.append(1)
        else:
            tran.append(0)

        # 每天的第几个小时
        func.appendHourDayIndex(tran, data[i]['time'])
        # 每天的第几个两小时
        # func.handle12_1(tran, ds[1])

        # 每周的第几天
        func.appendWeekDayIndex(tran, data[i]['date'])

        # 计算相应index的访问量的平均值
        mean_index = 0.0
        if func.isWeekday(date) == 1:  # 前5个工作日的相应index分钟的平均访问量
            mean_index = sum_data[index] / 5.0
        else:  # 前2个周末日的相应index分钟的平均访问量
            mean_index = sum_data[index] / 2.0
        tran.append(mean_index[0])

        # 上一周该天相应index分钟的访问量
        if last_weekday_data is not None:
            tran.append(float(last_weekday_data[index]))
        else:
            tran.append(float(data[i]['count']))
            print('haha')

        # 是否是节假日
        holiday = func.isHoliday(data[i]['date'])
        tran.append(holiday)

        res.append(tran)
    return res
def process5f(filename, date, sum_data):
    # read target day's total logs: 2017-09-01.log
    with open(filename, 'r') as f:
        lines = f.readlines()
    data = np.array(lines)

    global res
    for i in range(len(data)):
        # data[i]: 2017-08-27(ds[0])  00:13:0(ds[1])  3997(ds[2])    0(ds[3])
        ds = str.split(data[i], '\t')

        tran = []

        # index: 当前分钟索引
        tran.append(i)

        # count: 交易量
        tran.append(float(ds[2]))

        # isWeekday: 工作日
        if func.isWeekday(ds[0]) == 1:
            tran.append(1)
        else:
            tran.append(0)

        # weekday_index:每周的第几天
        func.appendWeekDayIndex(tran, ds[0])

        # mean_index_count:计算相应index分钟的5天或者2天的访问量的平均值
        mean_index = 0.0
        if sum_data is not None:
            if func.isWeekday(date) == 1:  # 前5个工作日的相应index分钟的平均访问量
                mean_index = sum_data[i] / 5.0
            else:  # 前2个周末日的相应index分钟的平均访问量
                mean_index = sum_data[i] / 2.0
            tran.append(float(mean_index))
        else:
            tran.append(float(ds[2]))

        res.append(tran)
    return
Пример #6
0
def create_features_vector(data, date, sum_data):
    res = []

    for i in range(len(data)):
        tran = []
        # index: 当前分钟索引
        index = min2index(data[i]['time'])
        tran.append(index)
        # count: 交易量
        tran.append(data[i]['count'])
        # 星期几: isMon, isTue, isWed, isThu, isFri, isSat, isSun
        func.appendWeekDay(tran, data[i]['date'])

        # isWeekday: 工作日
        if func.isWeekday(data[i]['date']) == 1:
            tran.append(1)
        else:
            tran.append(0)

        # 时点24维: is24_0,is24_1,is24_2,is24_3,is24_4,is24_5,is24_6,
        # is24_7,is24_8,is24_9,is24_10,is24_11,is24_12,is24_13,is24_14,
        # is24_15,is24_16,is24_17,is24_18,is24_19,is24_20,is24_21,is24_22,is24_23
        func.handle24(tran, data[i]['time'])
        # 时点8维:is8_0,is8_3,is8_6,is8_9,is8_12,is8_15,is8_18,is8_21
        func.handle8(tran, data[i]['time'])
        # 时点4维: is4_0,is4_6,is4_12,is4_18
        func.handle4(tran, data[i]['time'])
        # 每天的第几个小时
        func.appendHourDayIndex(tran, data[i]['time'])
        # 每周的第几天
        func.appendWeekDayIndex(tran, data[i]['date'])
        # 时点12维:is12_0,is12_2,is12_4,is12_6,is12_8,is12_10,is12_12,is12_14,is12_16,is12_18,is12_20,is12_22
        func.handle12(tran, data[i]['time'])
        # 时点6维:is6_0,is6_4,is6_8,is6_12,is6_16,is6_20
        func.handle6(tran, data[i]['time'])
        '''
        # 前3分钟访问量
        if i >= 3:
            mean_3min = 0.0
            for j in range(3):
                mean_3min += float(data[i - j - 1]['count'])
            mean_3min /= 3.0
            tran.append(mean_3min)
        else:
            tran.append(float(data[i]['count']))
        # 前5分钟访问量
        if i >= 5:
            mean_5min = 0.0
            for j in range(5):
                mean_5min += float(data[i - j - 1]['count'])
            mean_5min /= 5.0
            tran.append(mean_5min)
        else:
            tran.append(float(data[i]['count']))
        # 前8分钟访问量
        if i >= 8:
            mean_8min = 0.0
            for j in range(8):
                mean_8min += float(data[i - j - 1]['count'])
            mean_8min /= 8.0
            tran.append(mean_8min)
        else:
            tran.append(float(data[i]['count']))
        # 前10分钟访问量
        if i >= 10:
            mean_10min = 0.0
            for j in range(10):
                mean_10min += float(data[i - j - 1]['count'])
            mean_10min /= 10.0
            tran.append(mean_10min)
        else:
            tran.append(float(data[i]['count']))
        '''
        # 计算相应index的访问量的平均值
        mean_index = 0.0
        if func.isWeekday(date) == 1:  # 前5个工作日的相应index分钟的平均访问量
            mean_index = sum_data[index] / 5.0
        else:  # 前2个周末日的相应index分钟的平均访问量
            mean_index = sum_data[index] / 2.0
        tran.append(mean_index[0])
        '''
        # 每月的第几周
        # 每月的第几天
        # isHoliday: 节假日
        # if func.isHoliday(ds[0]) == 1:
        #     tran.append(1)
        # else:
        #     tran.append(0)
        # isSpecial: 特殊日
        # if func.isSpecialDay(ds[0]) == 1:
        #     tran.append(1)
        # else:
        #     tran.append(0)
        '''

        res.append(tran)
    return res
Пример #7
0
def create_next_min_feature_vector(index, prev_seq, predict_count, count_mean,
                                   count_std, sum_data):

    datenow = datetime.datetime.now()
    date = datenow.strftime('%Y-%m-%d')
    s = str.split(date, '-')
    # 一天的开始, eg: 2018-01-01 00:00:0
    d0 = datetime.datetime(int(s[0]), int(s[1]), int(s[2]), 0, 0, 0)
    d1 = d0 + datetime.timedelta(minutes=index)  # 增加index分钟

    ss = str.split((str)(d1), ' ')  # 2018-01-01 00:00:0

    # 开始构造
    tran = []
    # index: 当前分钟索引
    tran.append(index)
    # count: 交易量
    tran.append(predict_count * count_std + count_mean)
    # 星期几: isMon, isTue, isWed, isThu, isFri, isSat, isSun
    func.appendWeekDay(tran, date)

    # isWeekday: 工作日
    if func.isWeekday(date) == 1:
        tran.append(1)
    else:
        tran.append(0)

    # 时点24维: is24_0,is24_1,is24_2,is24_3,is24_4,is24_5,is24_6,is24_7,is24_8,is24_9,is24_10,is24_11,is24_12,is24_13,is24_14,is24_15,is24_16,is24_17,is24_18,is24_19,is24_20,is24_21,is24_22,is24_23
    func.handle24(tran, ss[1])
    # 时点8维:is8_0,is8_3,is8_6,is8_9,is8_12,is8_15,is8_18,is8_21
    func.handle8(tran, ss[1])
    # 时点4维: is4_0,is4_6,is4_12,is4_18
    func.handle4(tran, ss[1])
    # 每天的第几个小时
    func.appendHourDayIndex(tran, ss[1])
    # 每周的第几天
    func.appendWeekDayIndex(tran, date)

    # 时点12维:is12_0,is12_2,is12_4,is12_6,is12_8,is12_10,is12_12,is12_14,is12_16,is12_18,is12_20,is12_22
    func.handle12(tran, ss[1])
    # 时点6维:is6_0,is6_4,is6_8,is6_12,is6_16,is6_20
    func.handle6(tran, ss[1])
    '''
    # mean_3min, mean_5min, mean_8min, mean_10min
    len_prev_seq = len(prev_seq) # 用来计算mean_3min, mean_5min, mean_8min, mean_10min 4个feature

    # 前3分钟访问量
    mean_3min = 0.0
    for j in range(3):
        mean_3min += (float(prev_seq[len_prev_seq - j - 1][1]) * count_std + count_mean)
    mean_3min /= 3.0
    tran.append(mean_3min)

    # 前5分钟访问量
    mean_5min = 0.0
    for j in range(5):
        mean_5min += (float(prev_seq[len_prev_seq - j - 1][1]) * count_std + count_mean)
    mean_5min /= 5.0
    tran.append(mean_5min)

    # 前8分钟访问量
    mean_8min = 0.0
    for j in range(8):
        mean_8min += (float(prev_seq[len_prev_seq - j - 1][1]) * count_std + count_mean)
    mean_8min /= 8.0
    tran.append(mean_8min)

    # 前10分钟访问量
    mean_10min = 0.0
    for j in range(10):
        mean_10min += (float(prev_seq[len_prev_seq - j - 1][1]) * count_std + count_mean)
    mean_10min /= 10.0
    tran.append(mean_10min)
    '''

    mean_index = 0.0
    if func.isWeekday(date) == 1:  # 前5个工作日的相应index分钟的平均访问量
        mean_index = sum_data[index] / 5.0
    else:  # 前2个周末日的相应index分钟的平均访问量
        mean_index = sum_data[index] / 2.0
    tran.append(mean_index[0])

    return tran
def process(filename, date, has_mean_index):
    with open(filename, 'r') as f:
        lines = f.readlines()
    data = np.array(lines)

    if has_mean_index is True:
        weekday_group = []
        weekend_group = []
        for eachnum in range(1, 8):
            datetmp = (datetime.datetime.strptime(date, "%Y-%m-%d") +
                       datetime.timedelta(days=0 - eachnum))
            if datetmp.weekday() < 5:
                weekday_group.append(datetmp.strftime('%Y-%m-%d'))
            else:
                weekend_group.append(datetmp.strftime('%Y-%m-%d'))
        sum_data = np.zeros([1440, 1])
        if func.isWeekday(date) == 1:
            # read 5 weekday
            for m in range(len(weekday_group)):
                filename_temp = data_dir + weekday_group[m] + '.log'
                with open(filename_temp, 'r') as f:
                    lines_temp = f.readlines()
                dtemp = np.array(lines_temp)
                for n in range(len(dtemp)):
                    stemp = str.split(dtemp[n], '\t')
                    sum_data[n] += float(stemp[2])
        else:
            # read 2 weekend
            for m in range(len(weekend_group)):
                filename_temp = data_dir + weekend_group[m] + '.log'
                with open(filename_temp, 'r') as f:
                    lines_temp = f.readlines()
                dtemp = np.array(lines_temp)
                for n in range(len(dtemp)):
                    stemp = str.split(dtemp[n], '\t')
                    sum_data[n] += float(stemp[2])

    global res
    for i in range(len(data)):
        # data[i]: 2017-08-27      00:13:0 3997    0
        ds = str.split(data[i], '\t')
        tran = []
        # index: 当前分钟索引
        tran.append(i)
        # count: 交易量
        tran.append(float(ds[2]))
        # 星期几: isMon, isTue, isWed, isThu, isFri, isSat, isSun
        # func.appendWeekDay(tran, ds[0])

        # isWeekday: 工作日
        if func.isWeekday(ds[0]) == 1:
            tran.append(1)
        else:
            tran.append(0)

        # 时点24维: is24_0,is24_1,is24_2,is24_3,is24_4,is24_5,is24_6,
        # is24_7,is24_8,is24_9,is24_10,is24_11,is24_12,is24_13,is24_14,
        # is24_15,is24_16,is24_17,is24_18,is24_19,is24_20,is24_21,is24_22,is24_23
        # func.handle24(tran, ds[1])
        # 时点8维:is8_0,is8_3,is8_6,is8_9,is8_12,is8_15,is8_18,is8_21
        func.handle8(tran, ds[1])
        # 时点4维: is4_0,is4_6,is4_12,is4_18
        func.handle4(tran, ds[1])
        # 每天的第几个小时
        func.appendHourDayIndex(tran, ds[1])
        # 每周的第几天
        func.appendWeekDayIndex(tran, ds[0])

        # 时点12维:is12_0,is12_2,is12_4,is12_6,is12_8,is12_10,is12_12,is12_14,is12_16,is12_18,is12_20,is12_22
        func.handle12(tran, ds[1])
        # 时点6维:is6_0,is6_4,is6_8,is6_12,is6_16,is6_20
        func.handle6(tran, ds[1])
        '''
        # mean_3min, mean_5min, mean_8min, mean_10min
        # 得到上一天的最后10个数据
        lastday_10data = []
        # 前3分钟访问量
        if i >= 3:
            mean_3min = 0.0
            for j in range(3):
                temp_data = str.split(data[i - j - 1], '\t')
                mean_3min += float(temp_data[2])
            mean_3min /= 3.0
            tran.append(float(mean_3min))
        else:
            tran.append(float(ds[2]))
        # 前5分钟访问量
        if i >= 5:
            mean_5min = 0.0
            for j in range(5):
                temp_data = str.split(data[i - j - 1], '\t')
                mean_5min += float(temp_data[2])
            mean_5min /= 5.0
            tran.append(float(mean_5min))
        else:
            tran.append(float(ds[2]))
        # 前8分钟访问量
        if i >= 8:
            mean_8min = 0.0
            for j in range(8):
                temp_data = str.split(data[i - j - 1], '\t')
                mean_8min += float(temp_data[2])
            mean_8min /= 8.0
            tran.append(float(mean_8min))
        else:
            tran.append(float(ds[2]))
        # 前10分钟访问量
        if i >= 10:
            mean_10min = 0.0
            for j in range(10):
                temp_data = str.split(data[i - j - 1], '\t')
                mean_10min += float(temp_data[2])
            mean_10min /= 10.0
            tran.append(float(mean_10min))
        else:
            tran.append(float(ds[2]))
        '''
        # 计算相应index的访问量的平均值
        mean_index = 0.0
        if has_mean_index is True:
            if func.isWeekday(date) == 1:  # 前5个工作日的相应index分钟的平均访问量
                mean_index = sum_data[i] / 5.0
            else:  # 前2个周末日的相应index分钟的平均访问量
                mean_index = sum_data[i] / 2.0
            tran.append(float(mean_index))
        else:
            tran.append(float(ds[2]))

        # 1. 每月的第几周: 该特征待加入
        # 2. 每月的第几天: 该特征待加入
        # 3. isHoliday: 节假日: 该特征待加入
        # if func.isHoliday(ds[0]) == 1:
        #     tran.append(1)
        # else:
        #     tran.append(0)
        # 4. isSpecial: 特殊日: 该特征待加入
        # if func.isSpecialDay(ds[0]) == 1:
        #     tran.append(1)
        # else:
        #     tran.append(0)

        res.append(tran)
    return