Exemplo n.º 1
0
def getShopFeatures(filename, gap):
    #lis,shop_info = shopInfoStat()
    group = getCityGroup()
    res, l, names = loadShopFeature(helper.get_feature_path(gap, filename),
                                    group)
    grp_feature = collections.defaultdict(dict)
    for grp, item in res.items():
        for date, features in item.items():
            #print features
            max_feature = list(np.max(features, axis=0))
            min_feature = list(np.min(features, axis=0))
            median_feature = list(np.median(features, axis=0))
            grp_feature[grp][date] = max_feature + min_feature + median_feature
    msg = map(lambda x: 'locate_max_' + x, names) + map(
        lambda x: 'locate_min_' + x, names) + map(
            lambda x: 'locate_median_' + x, names)
    with open(helper.get_feature_path(gap, 'locate_' + filename), 'w') as fout:
        flag = True
        for shop in xrange(1, 2001):
            shop = str(shop)
            grp = group[shop]
            if flag:
                fout.write("%s\n" % (",".join(msg)))
                flag = False
            for date in grp_feature[grp]:
                fea = grp_feature[grp][date]
                tmp = ",".join(map(lambda x: str(x), fea))
                fout.write("%s,%s,%s\n" % (tmp, shop, date))
Exemplo n.º 2
0
def getShopFeatures(gap):
    shop_info = shopInfo()
    with open(helper.get_feature_path(gap,'shop_s'),'w') as fout:
        fout.write("per_pay,score,comment_cnt,shop_level,id\n")    
        for shop in xrange(1,2001):
            tmp = ",".join(map(lambda x: str(x),shop_info[str(shop)]))
            fout.write("%s,%d,%d\n"%(tmp,shop,shop))
Exemplo n.º 3
0
def getShopFeatures(gap):
    airdata = loadAirData()
    shop_info = helper.loadShopInfo()

    with open(helper.get_feature_path(gap,'air_feature'),'w') as fout:
        fout.write("air\n")    
        for shop in xrange(1,2001):
            shop = str(shop)
            city = shop_info[shop][0]
            for date in util.getDate(start = config.LABELSTARTDATE):
                tmp = airdata[airdata.Date == date][city]  
                fout.write("%d,%s,%s\n"%(tmp,shop,date.strftime("%Y-%m-%d")))
Exemplo n.º 4
0
def getDateFeatures(gap):
    festivals = helper.loadFestival()

    with open(helper.get_feature_path(gap, 'date_d'), 'w') as fout:
        fout.write("if_work,is_fes,week_day\n")
        for date in util.getDate(start=config.LABELSTARTDATE):
            week_day = date.weekday()
            is_fes = 1 if date in festivals else 0
            tmp = 1 if week_day == 0 else 8 - week_day
            if_work = 1 if week_day < 5 else 0
            month_of_week = int(date.strftime("%W")) - int(
                datetime.datetime(date.year, date.month, 1).strftime("%W")) + 1
            fout.write("%d,%d,%d,%s\n" %
                       (if_work, is_fes, week_day, date.strftime("%Y-%m-%d")))
Exemplo n.º 5
0
def getShopFeatures(gap):
    shop_info,lis = shopInfo()
    with open(helper.get_feature_path(gap,'shop_cate_s'),'w') as fout:
        fout.write("city,locate,cate1,cate2,cate3\n")    
        for shop in xrange(1,2001):
            shop = str(shop)
            tmp = []
            tmp.append(lis[0].get(shop_info[shop][0],-1))
            tmp.append(lis[1].get(shop_info[shop][1],-1))
            tmp.append(lis[2].get(shop_info[shop][6],-1))
            tmp.append(lis[3].get(shop_info[shop][7],-1))
            tmp.append(lis[4].get(shop_info[shop][8],-1))
            tmp = ",".join(map(lambda x: str(x),tmp))
            
            fout.write("%s,%s\n"%(tmp,shop))
Exemplo n.º 6
0
def getShopFeatures(gap):
    weatherdata = loadWeatherData()
    shop_info = helper.loadShopInfo()

    flag = True
    with open(helper.get_feature_path(gap, 'weather_feature'), 'w') as fout:
        for shop in xrange(1, 2001):
            shop = str(shop)
            city = shop_info[shop][0]
            for date in util.getDate(start=config.LABELSTARTDATE):
                feature, msg = getFeature(weatherdata[city][date])
                if flag:
                    fout.write("%s\n" % (",".join(msg)))
                    flag = False
                tmp = ",".join(map(str, feature))
                fout.write("%s,%s,%s\n" %
                           (tmp, shop, date.strftime("%Y-%m-%d")))
Exemplo n.º 7
0
def run(gap):
    data = helper.loadShopPay()
    start_date = util.getLabelStartDate(data)

    fout = open(helper.get_feature_path(gap, 'same_day_stat'), 'w')
    flag = True
    for shop in xrange(1, 2001):
        shop = str(shop)
        print shop
        arr = util.toArray(data[shop])
        for date in util.getDate(start_date[shop]):
            feature, msg = getFeature(date, arr, gap)
            if flag:
                fout.write('%s\n' % (','.join(msg)))
                flag = False
            util.outputFeature(fout, date, shop, feature)
    fout.close()
Exemplo n.º 8
0
def run(gap):

    shopdata, shop1data = loadAllData()
    #data = helper.loadShopPay()
    #start_date = util.getLabelStartDate(data)

    fout = open(helper.get_feature_path(gap, 'day_feature'), 'w')
    flag = True
    for shop in xrange(1, 2001):
        shop = str(shop)
        print shop
        feature, msg = getFeature(shopdata[shop], shop1data[shop])
        if flag:
            fout.write('%s\n' % (','.join(msg)))
            flag = False
        tmp = ",".join(map(str, feature))
        fout.write("%s,%s\n" % (tmp, shop))
    fout.close()
Exemplo n.º 9
0
def run(gap):
    data = helper.loadShopPay()
    start_date = util.getLabelStartDate(data)

    fout = open(
        helper.get_feature_path(
            gap, 'time_win_diff_%s_%s_%s' %
            (sys.argv[2], sys.argv[3], sys.argv[4])), 'w')
    flag = True
    for shop in xrange(1, 2001):
        shop = str(shop)
        print shop

        arr = util.toArray(data[shop])
        #arr = util.fillNull(arr)
        for date in util.getDate(start_date[shop]):
            feature, msg = getFeature(date, arr, gap)
            if flag:
                fout.write('%s\n' % (','.join(msg)))
                flag = False
            util.outputFeature(fout, date, shop, feature)
    fout.close()
Exemplo n.º 10
0
def run(gap):

    helper.log_time('start loading...')
    date_feature = []
    shop_feature = []
    date_feature = ['date_d']
    shop_feature = ['shop_s', 'shop_cate_s', 'day_feature']
    #mix_feature = ['label','history','air_feature','weather_feature']
    mix_feature = [
        'label', 'same_day_hist', 'time_win_1_1_21', 'time_win_log_1_1_21',
        'time_win_2_2_7', 'time_win_3_6_7', 'time_win_7_14_7', 'weekend_stat',
        'weekday_stat', 'same_day_stat', 'min_max_stat', 'air_feature',
        'weather_feature'
    ]
    #mix_feature = ['label','same_day_hist','time_win_1_1_21','time_win_2_2_7','time_win_2_4_7','time_win_3_6_7','time_win_7_7_7','time_win_7_14_7','time_win_diff_1_1_21','weekend_stat','weekday_stat','time_win_diff_1_1_21','min_max_stat','air_feature','weather_feature','same_day_stat','city_cate2_same_day_hist','city_cate2_time_win_1_1_21']

    #mix_feature = ['label','same_day_hist','time_win_1_1_21','time_win_2_2_7','time_win_2_4_7','time_win_3_6_7','time_win_7_7_7','time_win_7_14_7','time_win_diff_1_1_21','time_win_diff_1_1_21']
    #mix_feature = ['label','same_day_hist','same_day_stat','same_day_diff_mean','week_hist','week_stat','weekend_stat','weekday_stat','min_max_stat','air_feature','weather_feature','city_cate2_same_day_hist','city_cate2_week_hist','locate_same_day_hist','locate_week_hist','week_hist_2_2','week_hist_2_4','week_hist_3_6','week_hist_4_8']
    #mix_feature = ['label','same_day_hist','same_day_stat','same_day_diff_mean','same_day_reg','weekday_stat','weekend_stat','week_hist','week_hist_diff','week_stat','min_max_stat','shop_cate_s','air_feature','weather_feature']
    data = {'date': [], 'shop': [], 'mix': []}
    for d in date_feature:
        data['date'].append(loadSingleFeature(helper.get_feature_path(gap, d)))
    for s in shop_feature:
        data['shop'].append(loadSingleFeature(helper.get_feature_path(gap, s)))
    for m in mix_feature:
        data['mix'].append(loadMixFeature(helper.get_feature_path(gap, m)))
    helper.log_time('finish loading...')

    helper.log_time('start mix join...')
    data_join = data['mix'][0][0]

    names = data['mix'][0][2][1:]
    for i in xrange(1, len(data['mix'])):

        m = data['mix'][i][0]
        l = data['mix'][i][1]
        names += data['mix'][i][2]
        for shop, shop_data in data_join.iteritems():
            for date, fea in shop_data.iteritems():
                if shop in m and date in m[shop]:
                    fea += m[shop][date]
                else:
                    fea += [np.nan] * l

    helper.log_time('start date join...')
    for d, l, n in data['date']:
        names += n
        for shop, shop_data in data_join.iteritems():
            for date, fea in shop_data.iteritems():
                if date in d:
                    fea += d[date]
                else:
                    fea += [np.nan] * l

    helper.log_time('start shop join...')
    for s, l, n in data['shop']:
        names += n
        print n
        for shop, shop_data in data_join.iteritems():
            for date, fea in shop_data.iteritems():
                if shop in s:
                    fea += s[shop]
                else:
                    fea += [np.nan] * l

    helper.log_time('start write...')
    output(data_join, gap)
    outputnames(names, gap)