def app_dlist(dlist, appID, clickTime, correlate, ad : Advertisement): result = -1 for d in dlist: if d['clickTime'] >= clickTime: break da = appID db = ad.get_value(d['creativeID'])['appID'] _prod = get_prod(correlate, da, db, d['label']) if _prod > result: result = _prod return result
import xgboost as xgb from conf.configure import Configure from reading.advertisement import Advertisement from reading.app import App from reading.dataset import Dataset from reading.position import Position from reading.user import User from reading.user_app_actions import User_App_Actions import matplotlib.pyplot as plt import numpy as np if __name__ == '__main__': debug = False print('reading data...') ad = Advertisement(Configure.ad_path, debug=debug) app = App(Configure.app_categories_path, debug=debug) data_set = Dataset(Configure.train_path, Configure.test_path, debug=debug) position = Position(Configure.position_path, debug=debug) user = User(Configure.user_path, debug=debug) user_app_actions = User_App_Actions(Configure.user_app_actions_path, debug=debug) days = [0] * 32 hours = [0] * 24 minutes = [0] * 60 conver_days = [0] * 32 conver_hours = [0] * 24 conver_minutes = [0] * 60 click_in_con_days= [0] * 32
def add_to_app_cat(self, ad: Advertisement, app: App): for record in self.data_list: creativeID = record.creativeID appID = ad.get_value(creativeID).appID appCategory = app.get_value(appID).appCategory app.add_dataset(record, appCategory)
def add_to_advertisement(self, ad: Advertisement): for record in self.data_list: ad.add_dataset(record) ad.fresh()
def app_dlist(d, ad: Advertisement): result = common_part(d) result['condition'] = ad.get_value(d['creativeID'])['appID'] return result
from conf.configure import Configure from reading.advertisement import Advertisement from reading.app import App from reading.dataset import Dataset from reading.position import Position from reading.user import User from reading.user_app_actions import User_App_Actions from reading.user_app_installed import User_App_Installed from feature.dmatrix import DMatrix from model.xgb_func import * from handle.handle import * if __name__ == '__main__': debug = False print('reading data...') ad = Advertisement(Configure.ad_path, debug=debug) data_set = Dataset(Configure.train_path, Configure.test_path, debug=debug) user = User(Configure.user_path, debug=debug) from_t = 290000 end_t = 310000 result = dict() for d in data_set.get_data_list(): if d['clickTime'] < from_t or d['clickTime'] >= end_t: continue userID = d['userID'] baby = user.get_value(userID)['haveBaby'] creativeID = d['creativeID'] appID = ad.get_value(creativeID)['appID'] label = d['label'] if baby not in result:
result[tmp[0]][tmp[1]] = tmp[2:] fin.close() return result def logloss(act, pred): epsilon = 1e-15 pred = sp.maximum(epsilon, pred) pred = sp.minimum(1 - epsilon, pred) ll = sum(act * sp.log(pred) + sp.subtract(1, act) * sp.log(sp.subtract(1, pred))) ll = ll * -1.0 / len(act) return ll if __name__ == '__main__': debug = False print('reading data...') ad = Advertisement(Configure.ad_path, debug=debug) app = App(Configure.app_categories_path, debug=debug) data_set = Dataset(Configure.train_path, Configure.test_path, debug=debug) position = Position(Configure.position_path, debug=debug) user = User(Configure.user_path, debug=debug) user_app_actions = User_App_Actions(Configure.user_app_actions_path, debug=debug) # user_app_installed = User_App_Installed(Configure.user_installedapps_path, debug=debug) data_set.add_to_position(position) data_set.add_to_advertisement(ad) data_set.add_to_app_cat(ad, app) func_dict = {'creativeID': creative_dlist, 'adID': ad_dlist, 'camgaignID': camgaign_dlist, 'advertiserID': advertiser_dlist, 'appID': app_dlist,
import pandas as pd from conf.configure import Configure from reading.advertisement import Advertisement from reading.dataset import Dataset from reading.user import User from reading.app import App if __name__ == '__main__': ad = Advertisement(Configure.ad_path) app = App(Configure.app_categories_path) data_set = Dataset(Configure.train_path, Configure.test_path) user = User(Configure.user_path) result0 = [0] * 100 result1 = [0] * 100 for d in data_set.get_data_list(): userID = d['userID'] age = user.get_value(userID)['age'] label = d['label'] if label < 0: continue if label == 0: result0[age] += 1 else: result1[age] += 1 for i in range(len(result0)): print(i, result0[i] + result1[i], result0[i], result1[i])
from feature_role.lda_role import Lda_Role from reading.dataset import Dataset from conf.configure import Configure from reading.advertisement import Advertisement import numpy as np if __name__ == '__main__': data_set = Dataset(Configure.train_path, Configure.test_path) ad = Advertisement(Configure.ad_path) param = {'userID': 311729, 'appID': 420} role = Lda_Role() result = dict() for d in data_set.get_data_list(): param = { 'userID': d['userID'], 'appID': ad.get_value(d['creativeID'])['appID'] } vec = role.run(param) clickDay = d['clickTime'] // 10000 label = d['label'] if label < 0: label = 0 if clickDay not in result: result[clickDay] = dict() result[clickDay][0] = dict() result[clickDay][1] = dict() result[clickDay][0]['vec'] = np.zeros([len(vec)]) result[clickDay][0]['include_c'] = 0 result[clickDay][0]['exclude_c'] = 0 result[clickDay][1]['vec'] = np.zeros([len(vec)]) result[clickDay][1]['include_c'] = 0