def GET(self): user = users.get_current_user() if user: e = util.get_user(user=user) f = profile_form() if e.bio: f = profile_form( nickname=e.nickname, first_name=e.bio.first_name, middle_name=e.bio.middle_name, last_name=e.bio.last_name, city=e.bio.city, state=e.bio.state, postal_code=e.bio.postal_code, country=e.bio.country, bio=e.bio.bio, ) return t.render( util.data( form=f, title="Edit Profile", instructions="""Please enter whatever information you feel comfortable sharing. (Please note that your information is not public until you grant us permission to share it in your Preferences)""", ) ) else: return t.render(util.data(title="Not Logged In!", instructions="Please Log in to Edit Your Profile"))
def POST(self, user_id): user = users.get_current_user() if user: d = web.input() f = contact_form(message=d.message) if f.validate() and util.user_exists(user_id.lower()): taskqueue.add( url="/task/send_mail", queue_name="email-throttle", params={"sender_id": user.user_id(), "recipient_id": user_id, "message": f.message.data}, ) raise web.seeother("/" + user_id + "#message_sent") elif f.validate() and user_id.lower() == "us": taskqueue.add( url="/task/send_mail", queue_name="email-throttle", params={"sender_id": user.user_id(), "recipient_id": "us", "message": f.message.data}, ) raise web.seeother("/" + user_id + "#message_sent") else: return t.render( util.data( title="Get in touch!", instructions="""You will always reveal your email address when you send a message!""", form=f, subject=" ".join([user.nickname(), "wants to get in touch!"]), ) ) else: return t.render(util.data(title="Not allowed!", instructions="You must be signed in to send messages!"))
def GET(self, name): t = template.env.get_template('user_list.html') list_list = [ # list_list should be generated somehow 'global', 'Lists all users from all locales.', ] user_list = [] if (name is None): return t.render(util.data( title='User Lists', instructions='''Users are grouped into "user lists" that group them geographically. These lists are automatically generated and will change based upon the relative size of various user populations.''', list_list=map( lambda (li): {'name': li[0], 'scope': li[1]}, zip(*[list_list[i::2] for i in range(2)]), ), )) elif (name.lower() in list_list[::2]): for i in User.all(): x = util.strip_private_data(i) if x is not None: user_list.append(x) else: raise web.notfound() return t.render(util.data( title='Display all members', instructions='''Public member listing''', users=user_list, ))
def GET(self): user = users.get_current_user() if user: e = util.get_user(user=user) if e.shared is not None: f = prefs_form( first_name="first_name" in e.shared.public, middle_name="middle_name" in e.shared.public, last_name="last_name" in e.shared.public, city="city" in e.shared.public, state="state" in e.shared.public, postal_code="postal_code" in e.shared.public, country="country" in e.shared.public, bio="bio" in e.shared.public, ) else: # If e.shared if empty then set default to False. f = prefs_form( first_name=False, middle_name=False, last_name=False, city=False, state=False, postal_code=False, country=False, bio=False, ) return t.render( util.data( form=f, title="Preferences", instructions="Please indicate which items you wish to make public." ) ) else: return t.render(util.data(title="Not Logged In", instructions="Please log in to have preferences!"))
def GET(self): q = web.input() t = template.env.get_template('search.html') f = search_form() try: if q.query: results = [] user_list = [] query = q.query.split(' ') for i in User.all(): x = util.strip_private_data(i) if x is not None: user_list.append(x) for p in user_list: for i in query: if i in dict(p).values(): results.append(p) return t.render(util.data( title='Find who you\'re looking for!', form=f, results=results if results else None, )) else: web.debug('q.query doesn\'t exist and it didn\'t thow an exception!') raise Warning('Odd, huh?') except: return t.render(util.data( title='Find who you\'re looking for!', form=f, ))
def GET(self, user_id): t = template.env.get_template("profile.html") if user_id.lower() == "us": user_info = { "nickname": "Mr. Roboto", "first_name": "The", "middle_name": "Connection", "last_name": "Machine", "city": "Google App Engine", "state": "The Internet", } else: try: e = util.get_user(user_id=user_id.lower()) user_info = util.strip_private_data(e) except AttributeError: user_info = { "nickname": "[deleted]", "first_name": "No", "middle_name": "such", "last_name": "user", "city": "reddit.com", "state": "The Internet", } return t.render(util.data(info=user_info, user_id=user_id))
def main(): sz = 12 dt_fh_list = [None for i in range(sz)] si_fh_list = [None for i in range(sz)] close_fn = [] for t, (data_type, rows, sinfo) in enumerate(data()): uid = int(sinfo["UserID"]) m = uid % sz for row in rows: if "IsClick" not in row: row["IsClick"] = 0 if "ID" in row: del row["ID"] if dt_fh_list[m] is None: fh = open("data/stream_%s.tsv" % m, "w") close_fn.append(fh) dt_fh_list[m] = DictWriter(fh, delimiter='\t', fieldnames=rows[0].keys()) dt_fh_list[m].writeheader() if si_fh_list[m] is None: fh = open("data/sinfo_%s.tsv" % m, "w") close_fn.append(fh) si_fh_list[m] = DictWriter(fh, delimiter='\t', fieldnames=sinfo.keys()) si_fh_list[m].writeheader() dt_fh, si_fh = dt_fh_list[m], si_fh_list[m] si_fh.writerow(sinfo) dt_fh.writerows(rows) for fh in close_fn: fh.close()
def main(): inp = data() p1result, p2result = bothParts(inp, debug=PRINT_ASM) print(f"Solution for part 1:\n{p1result}") print(f"Solution for part 2:\n{p2result}")
def main(): sz = 12 dt_fh_list = [None for i in range(sz)] si_fh_list = [None for i in range(sz)] close_fn = [] for t, (data_type, rows, sinfo) in enumerate(data()): uid = int(sinfo["UserID"]) m = uid % sz for row in rows: if "IsClick" not in row: row["IsClick"] = 0 if "ID" in row: del row["ID"] if dt_fh_list[m] is None: fh = open("data/stream_%s.tsv"%m, "w") close_fn.append(fh) dt_fh_list[m] = DictWriter(fh, delimiter='\t', fieldnames=rows[0].keys()) dt_fh_list[m].writeheader() if si_fh_list[m] is None: fh = open("data/sinfo_%s.tsv"%m, "w") close_fn.append(fh) si_fh_list[m] = DictWriter(fh, delimiter='\t', fieldnames=sinfo.keys()) si_fh_list[m].writeheader() dt_fh, si_fh = dt_fh_list[m], si_fh_list[m] si_fh.writerow(sinfo) dt_fh.writerows(rows) for fh in close_fn: fh.close()
def POST(self): user = users.get_current_user() d = web.input( first_name=False, middle_name=False, last_name=False, city=False, state=False, postal_code=False, country=False, bio=False, ) f = profile_form( first_name=d.first_name, middle_name=d.middle_name, last_name=d.last_name, city=d.city, state=d.state, postal_code=d.postal_code, country=d.country, bio=d.bio, ) if not f.validate(): return t.render( util.data( form=f, title="Preferences", instructions="Please indicate which items you wish to make public." ) ) else: prefs = [i.name for i in f if i.data] e = util.get_user(user=user) e.shared.public = prefs e.shared.put() mdel(key=user.user_id(), namespace="profile_data") raise web.seeother("/preferences")
def main(): inp = util.data() p1result = p1(inp, debug=PRINT_ASM) p2result = p2(inp, debug=PRINT_ASM) print(f"Solution for part 1:\n{p1result}") print(f"Solution for part 2:\n{p2result}")
def main(): random.seed(args.seed) data_iter = data(args.test, maxlines=args.maxl) print "sr: %s" % args.sr uid_cnt = defaultdict(int) ipid_cnt = defaultdict(int) adid_cnt = defaultdict(int) query_cnt = defaultdict(int) title_cnt = defaultdict(int) query_param_cnt = defaultdict(int) ad_param_cnt = defaultdict(int) for line_cnt, (data_type, rows, sinfo) in enumerate(data_iter): rows = filter(lambda x: filter_row(x, data_type, sr=args.sr), rows) if not rows: continue ipid, uid = map(int, (sinfo["IPID"], sinfo["UserID"])) uid_cnt[uid] += len(rows) ipid_cnt[ipid] += len(rows) query = unicode(sinfo["SearchQuery"], "utf-8") val = map(lambda x: hash_val(0, x), query.split()) for v in val: query_cnt[v] += len(rows) sid = int(sinfo["SearchID"]) for v in get_se_param(sid): query_param_cnt[v] += len(rows) for row in rows: aid = int(row["AdID"]) adid_cnt[aid] += 1 ad_info = get_ad_info(aid) for v in ad_info["Params"]: ad_param_cnt[v] += 1 title = ad_info["Title"] title_val = map(lambda x: hash_val(0, x), title.split()) for v in title_val: title_cnt[v] += 1 if line_cnt % 100000 == 0: print "uid_cnt: %s" % len(uid_cnt) print "ipid_cnt: %s" % len(ipid_cnt) print "adid_cnt: %s" % len(adid_cnt) print "query_cnt: %s" % len(query_cnt) print "title_cnt: %s" % len(title_cnt) print "query_param_cnt: %s" % len(query_param_cnt) print "ad_param_cnt: %s" % len(ad_param_cnt) write_dump("data/uid_cnt.dump", uid_cnt) write_dump("data/ipid_cnt.dump", ipid_cnt) write_dump("data/adid_cnt.dump", adid_cnt) write_dump("data/query_cnt.dump", query_cnt) write_dump("data/title_cnt.dump", title_cnt) write_dump("data/query_param_cnt.dump", query_param_cnt) write_dump("data/ad_param_cnt.dump", ad_param_cnt)
def main(): random.seed(args.seed) data_iter = data(args.test, maxlines=args.maxl) print "sr: %s"%args.sr uid_cnt = defaultdict(int) ipid_cnt = defaultdict(int) adid_cnt = defaultdict(int) query_cnt = defaultdict(int) title_cnt = defaultdict(int) query_param_cnt = defaultdict(int) ad_param_cnt = defaultdict(int) for line_cnt, (data_type, rows, sinfo) in enumerate(data_iter): rows = filter(lambda x: filter_row(x, data_type, sr=args.sr), rows) if not rows: continue ipid, uid = map(int, (sinfo["IPID"], sinfo["UserID"])) uid_cnt[uid] += len(rows) ipid_cnt[ipid] += len(rows) query = unicode(sinfo["SearchQuery"], "utf-8") val = map(lambda x : hash_val(0, x), query.split()) for v in val: query_cnt[v] += len(rows) sid = int(sinfo["SearchID"]) for v in get_se_param(sid): query_param_cnt[v] += len(rows) for row in rows: aid = int(row["AdID"]) adid_cnt[aid] += 1 ad_info = get_ad_info(aid) for v in ad_info["Params"]: ad_param_cnt[v] += 1 title = ad_info["Title"] title_val = map(lambda x : hash_val(0, x), title.split()) for v in title_val: title_cnt[v] += 1 if line_cnt % 100000 == 0: print "uid_cnt: %s"%len(uid_cnt) print "ipid_cnt: %s"%len(ipid_cnt) print "adid_cnt: %s"%len(adid_cnt) print "query_cnt: %s"%len(query_cnt) print "title_cnt: %s"%len(title_cnt) print "query_param_cnt: %s"%len(query_param_cnt) print "ad_param_cnt: %s"%len(ad_param_cnt) write_dump("data/uid_cnt.dump", uid_cnt) write_dump("data/ipid_cnt.dump", ipid_cnt) write_dump("data/adid_cnt.dump", adid_cnt) write_dump("data/query_cnt.dump", query_cnt) write_dump("data/title_cnt.dump", title_cnt) write_dump("data/query_param_cnt.dump", query_param_cnt) write_dump("data/ad_param_cnt.dump", ad_param_cnt)
def GET(self, user_id): if util.user_exists(user_id.lower()) or user_id.lower() == "us": t = template.env.get_template("contact.html") f = contact_form() user = users.get_current_user() if user: return t.render( util.data( title="Get in touch!", instructions="""You will always reveal your email address when you send a message!""", form=f, subject=" ".join([user.nickname(), "wants to get in touch!"]), ) ) else: return t.render(util.data(title="Not allowed!", instructions="You must be signed in to send messages!")) else: raise web.notfound()
def GET(self): input = web.input(provider=None) if input.provider: url = users.create_login_url('/', federated_identity=input.provider) raise web.redirect(web.ctx.homedomain + url) t = template.env.get_template('login.html') providers = {} for p in openid_providers: providers[p.split('.')[0].lower()] = users.create_login_url('/', federated_identity=p.lower()) return t.render(util.data( login=providers, ))
def main(): train_iter = next_row(read_tsv("data/stream_%s.tsv"%args.sz)) test_iter = iter([]) sinfo_iter = read_tsv("data/sinfo_%s.tsv"%args.sz) del_keys_set = ["HistCTR", "SearchID", "ObjectType"] for t, (data_type, rows, sinfo) in enumerate(data(train_iter=train_iter, test_iter=test_iter, sinfo_iter=sinfo_iter)): uid = int(sinfo["UserID"]) date_str = sinfo["SearchDate"] ts = convert_ts(datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S.0")) rows = filter(lambda x : int(x["ObjectType"]) == 3, rows) for row in rows: for key in del_keys_set: del row[key] for key in row: row[key] = int(row[key]) if row[key] != "" else 0 item = ( ts, int(sinfo["SearchID"]), tuple([(row["AdID"], row["IsClick"], row["Position"]) for row in rows]), ) uid_sid[uid].append(item) print "uid_sid: %s"%len(uid_sid) for uid in uid_sid: uid_sid[uid].sort() print "start user_cnt." file_name = "data/user_cnt_%s.csv"%args.sz with open(file_name, "w") as f: writer = DictWriter(f, fieldnames=["SearchID", "t_cnt", "bf_cnt", "af_cnt", "bf_3h_cnt", "af_3h_cnt", "bf_clk_cnt", "bag2", "bag1"]) writer.writeheader() for uid in uid_sid: all_se = uid_sid[uid] writer.writerows(get_rows(all_se)) os.system('sort -t"," -k1 -g -S 2G %s -o %s_sorted'%(file_name, file_name)) print "start user_aid_cnt." file_name = "data/user_aid_cnt_%s.csv"%args.sz with open(file_name, "w") as f: writer = DictWriter(f, fieldnames=["SearchID", "AdID", "clk_cnt", "show_cnt", "t_show_cnt", "pos_clk_cnt", "pos_show_cnt"]) writer.writeheader() for uid in uid_sid: all_se = uid_sid[uid] writer.writerows(get_aid_rows(uid, all_se)) os.system('sort -t"," -k1 -g -S 2G %s -o %s_sorted'%(file_name, file_name))
def scrape(pid, yid='', export=False): """ Scrapes and parses play by play data for player associated with player_id and, optionally, by season Parameters ---------- pid : str The player ID assigned by basketball-reference.com yid : str (optional) The year ID for a specific season Example: yid for 2019/20 season is '2020' Returns ------- DataFrame A data frame containing play by play data for player corresponding to pid """ href = '/play-index/shot_finder.cgi?request=1&match=play&player_id=' + pid + '&year_id=' + yid href += '&order_by=date_game' html = requests.get(url + href) soup = BeautifulSoup(html.content, 'html.parser') # Print retrieval info name = soup.find('div', id='pi').find('h1').getText().split(' ')[:2] season = 'all seasons' if yid == '' else 'the ' + str( int(yid) - 1) + '/' + yid + ' season' print('Retrieving data for ' + ' '.join(name) + " from " + season) # Collect tables tables = util.collect_tables(url, soup) # Create data frame df = pd.DataFrame(util.data(tables)) df.columns = ['home', 'distance', 'type', 'assisted', 'result'] # Export if needed if export: df.to_csv('data/' + pid + yid + '.csv') return df
def POST(self): user = users.get_current_user() d = web.input() f = profile_form( nickname=d.nickname, first_name=d.first_name, middle_name=d.middle_name, last_name=d.last_name, city=d.city, state=d.state, postal_code=d.postal_code, country=d.country, bio=d.bio, ) if not f.validate(): return t.render( util.data( form=f, title="Edit Profile", instructions="""Please enter whatever information you feel comfortable sharing. (Please note that your information is not shared.public until you grant us permission to share it in your Preferences)""", ) ) else: e = util.get_user(user=user) if e.nickname: e.nickname = f.nickname.data db.put(e) e.bio.first_name = f.first_name.data or "" e.bio.middle_name = f.middle_name.data or "" e.bio.last_name = f.last_name.data or "" e.bio.city = f.city.data or "" e.bio.state = f.state.data or "" e.bio.postal_code = f.postal_code.data or "" e.bio.country = f.country.data or "" e.bio.bio = f.bio.data or "" e.bio.put() mdel(key=user.user_id(), namespace="profile_data") raise web.seeother("/profile")
def main(): inp = data() print(f"Solution for part 1:\n{p1(inp, debug=PRINT_ASM)}") print(f"Solution for part 2:\n{p2(inp, debug=PRINT_ASM)}")
def main(): random.seed(args.seed) xgb_set =set([ "price_pos", "ot1_cnt", "bf_cnt", "bf_clk_cnt", "u_aid_ctr", "record_cnt", "show_cnt", "clk_cnt", "t_cnt", "qe_w_pos", "HistCTR", "qe_ng_min_pos", "t_show_cnt", "bf_ctr", "ot2_cnt", "Price", "qe_ng_cnt", "title_len", "hl_ucnt", "price_ratio", "hl_lcnt", "t_match", "qe_w_ratio", "qe_ng_ratio", "Position", "bf_3h_cnt", "qe_w_cnt", "af_cnt", "ot3_cnt", "af_3h_cnt", "adid_cnt", "IsUserLoggedOn", ]) xgb_sparse_set = set([ "pos_ot_type", "pos_type", "ca_match", "ca_pid_match", "CategoryID", "s_LocationID", "s_CategoryID", "UserAgentFamilyID", "UserAgentOSID", "UserDeviceID", "UserAgentID", "UserID", "IPID", "AdID", "SearchParams", "Params", "Title", "SearchQuery" ]) if args.test: fh_list = [ open("data/tr_%s.%s"%(args.test, args.type), "w"), open("data/cv_%s.%s"%(args.test, args.type), "w"), open("data/te_%s.%s"%(args.test, args.type), "w")] else: fh_list = [open("data/tr.%s"%(args.type), "w"), open("data/cv.%s"%(args.type), "w"), open("data/te.%s"%(args.type), "w")] data_iter = data(args.test, maxlines=args.maxl) print "sr: %s"%args.sr avg_ctr = defaultdict(lambda : [0, 0]) for line_cnt, (data_type, rows, sinfo) in enumerate(data_iter): sinfo["s_LocationID"] = int(sinfo["LocationID"]) sinfo["s_CategoryID"] = int(sinfo["CategoryID"]) extract_slot_feas(rows, sinfo) rows = filter(lambda x: filter_row(x, data_type, sr=args.sr), rows) if not rows: continue feature_map = get_features(sinfo, rows, data_type > 0) instances = extract(feature_map) if line_cnt == 0: for k, feas in feature_map.items(): print "-" * 80 print k print feas[0].keys() feas_name = sorted(instances[0].keys()) print len(feas_name), feas_name if args.sz is not None: write_dump("feas_name.dump", feas_name) elif args.test: write_dump("feas_name%s.dump"%args.test, feas_name) else: write_dump("feas_name.dump", feas_name) # date_str = sinfo["SearchDate"] # ts = convert_ts(datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S.0")) fh = fh_list[data_type] for ins_map, row in zip(instances, rows): y = int(row.get("IsClick", 0)) avg_ctr[data_type][0] += y avg_ctr[data_type][1] += 1 ins = [] for kt, k in enumerate(feas_name): if "xgb" in args.type: if k in xgb_set: hash_type = "xgb" elif k in xgb_sparse_set: hash_type = "xgb2" else: if line_cnt == 0: print "drop %s"%k continue else: hash_type = "" feas = ins_map[k] if line_cnt == 0: print kt, k, type(feas), feas if isinstance(feas, list) or isinstance(feas, tuple): for f in feas: ins.append(hash_val(kt + 1, f, hash_type)) else: ins.append(hash_val(kt + 1, feas, hash_type)) fh.write(unicode(y) + " " + " ".join(map(unicode, ins)) + "\n") for key, value in avg_ctr.items(): print "%s, %s"%(key, value[0] * 1. / value[1]) for fh in fh_list: fh.close()
def main(): random.seed(args.seed) xgb_set = set([ "pos_type", "price_pos", "ot1_cnt", "pos_ot_type", "bf_cnt", "bf_clk_cnt", "u_aid_ctr", "record_cnt", "show_cnt", "clk_cnt", "t_cnt", "qe_w_pos", "HistCTR", "qe_ng_min_pos", "t_show_cnt", "bf_ctr", "ot2_cnt", "Price", "qe_ng_cnt", "title_len", "hl_ucnt", "price_ratio", "hl_lcnt", "t_match", "qe_w_ratio", "qe_ng_ratio", "ca_match", "Position", "bf_3h_cnt", "qe_w_cnt", "af_cnt", "ot3_cnt", "ca_pid_match", "af_3h_cnt", ]) if args.test: fh_list = [ open("data/tr_%s.%s" % (args.test, args.type), "w"), open("data/cv_%s.%s" % (args.test, args.type), "w"), open("data/te_%s.%s" % (args.test, args.type), "w") ] else: fh_list = [ open("data/tr.%s" % (args.type), "w"), open("data/cv.%s" % (args.type), "w"), open("data/te.%s" % (args.type), "w") ] data_iter = data(args.test, maxlines=args.maxl) avg_ctr = defaultdict(lambda: [0, 0]) for line_cnt, (data_type, rows, sinfo) in enumerate(data_iter): sinfo["s_LocationID"] = int(sinfo["LocationID"]) sinfo["s_CategoryID"] = int(sinfo["CategoryID"]) extract_slot_feas(rows, sinfo) rows = filter(lambda x: filter_row(x, data_type), rows) if not rows: continue feature_map = get_features(sinfo, rows, data_type > 0) instances = extract(feature_map) if line_cnt == 0: for k, feas in feature_map.items(): print "-" * 80 print k print feas[0].keys() feas_name = sorted(instances[0].keys()) print len(feas_name), feas_name if args.sz is not None: write_dump("feas_name.dump", feas_name) elif args.test: write_dump("feas_name%s.dump" % args.test, feas_name) else: write_dump("feas_name.dump", feas_name) fh = fh_list[data_type] for ins_map, row in zip(instances, rows): y = int(row.get("IsClick", 0)) avg_ctr[data_type][0] += y avg_ctr[data_type][1] += 1 ins = [] for kt, k in enumerate(feas_name): if args.type == "xgb" and k not in xgb_set: continue feas = ins_map[k] if line_cnt == 0: print kt, k, type(feas), feas if isinstance(feas, list) or isinstance(feas, tuple): for f in feas: ins.append(hash_val(kt + 1, f, args.type)) else: ins.append(hash_val(kt + 1, feas, args.type)) fh.write(unicode(y) + " " + " ".join(map(unicode, ins)) + "\n") for key, value in avg_ctr.items(): print "%s, %s" % (key, value[0] * 1. / value[1]) for fh in fh_list: fh.close()
def main(): random.seed(args.seed) xgb_set = set([ "price_pos", "ot1_cnt", "bf_cnt", "bf_clk_cnt", "u_aid_ctr", "record_cnt", "show_cnt", "clk_cnt", "t_cnt", "qe_w_pos", "HistCTR", "qe_ng_min_pos", "t_show_cnt", "bf_ctr", "ot2_cnt", "Price", "qe_ng_cnt", "title_len", "hl_ucnt", "price_ratio", "hl_lcnt", "t_match", "qe_w_ratio", "qe_ng_ratio", "Position", "bf_3h_cnt", "qe_w_cnt", "af_cnt", "ot3_cnt", "af_3h_cnt", "adid_cnt", "IsUserLoggedOn", ]) xgb_sparse_set = set([ "pos_ot_type", "pos_type", "ca_match", "ca_pid_match", "CategoryID", "s_LocationID", "s_CategoryID", "UserAgentFamilyID", "UserAgentOSID", "UserDeviceID", "UserAgentID", "UserID", "IPID", "AdID", "SearchParams", "Params", "Title", "SearchQuery" ]) if args.test: fh_list = [ open("data/tr_%s.%s" % (args.test, args.type), "w"), open("data/cv_%s.%s" % (args.test, args.type), "w"), open("data/te_%s.%s" % (args.test, args.type), "w") ] else: fh_list = [ open("data/tr.%s" % (args.type), "w"), open("data/cv.%s" % (args.type), "w"), open("data/te.%s" % (args.type), "w") ] if args.sz is not None: train_iter = next_row(read_tsv("data/stream_%s.tsv" % args.sz)) test_iter = iter([]) sinfo_iter = read_tsv("data/sinfo_%s.tsv" % args.sz) data_iter = data(args.test, train_iter=train_iter, test_iter=test_iter, sinfo_iter=sinfo_iter, maxlines=args.maxl) else: data_iter = data(args.test, maxlines=args.maxl) print "sr: %s" % args.sr avg_ctr = defaultdict(lambda: [0, 0]) for line_cnt, (data_type, rows, sinfo) in enumerate(data_iter): sinfo["s_LocationID"] = int(sinfo["LocationID"]) sinfo["s_CategoryID"] = int(sinfo["CategoryID"]) extract_slot_feas(rows, sinfo) rows = filter(lambda x: filter_row(x, data_type, sr=args.sr), rows) if not rows: continue feature_map = get_features(sinfo, rows, data_type > 0) instances = extract(feature_map) if line_cnt == 0: for k, feas in feature_map.items(): print "-" * 80 print k print feas[0].keys() feas_name = sorted(instances[0].keys()) print len(feas_name), feas_name if args.sz is not None: write_dump("feas_name.dump", feas_name) elif args.test: write_dump("feas_name%s.dump" % args.test, feas_name) else: write_dump("feas_name.dump", feas_name) # date_str = sinfo["SearchDate"] # ts = convert_ts(datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S.0")) fh = fh_list[data_type] for ins_map, row in zip(instances, rows): y = int(row.get("IsClick", 0)) avg_ctr[data_type][0] += y avg_ctr[data_type][1] += 1 ins = [] for kt, k in enumerate(feas_name): if "xgb" in args.type: if k in xgb_set: hash_type = "xgb" elif k in xgb_sparse_set: hash_type = "xgb2" else: if line_cnt == 0: print "drop %s" % k continue else: hash_type = "" feas = ins_map[k] if line_cnt == 0: print kt, k, type(feas), feas if isinstance(feas, list) or isinstance(feas, tuple): for f in feas: ins.append(hash_val(kt + 1, f, hash_type)) else: ins.append(hash_val(kt + 1, feas, hash_type)) fh.write(unicode(y) + " " + " ".join(map(unicode, ins)) + "\n") for key, value in avg_ctr.items(): print "%s, %s" % (key, value[0] * 1. / value[1]) for fh in fh_list: fh.close()
def main(): inp = data() print(f"Solution for part 1:\n{p1(inp)}") print(f"Solution for part 2:\n{p2(inp)}")
def main(): inp = lmap(int, data().strip()) inp = np.array(inp, dtype=np.int8) print(f"Solution for part 1:\n{p1(inp)}") print(f"Solution for part 2:\n{p2(inp)}")
def main(): inp = data() p1result, p2result = p1(inp), p2(inp) print(f"Solution for part 1:\n{p1result[-1]}") print(f"Solution for part 2:\n{p2result[-1]}")
def GET(self, name): t = template.env.get_template('user_map.html') return t.render(util.data())
def main(): random.seed(args.seed) xgb_set =set([ "pos_type", "price_pos", "ot1_cnt", "pos_ot_type", "bf_cnt", "bf_clk_cnt", "u_aid_ctr", "record_cnt", "show_cnt", "clk_cnt", "t_cnt", "qe_w_pos", "HistCTR", "qe_ng_min_pos", "t_show_cnt", "bf_ctr", "ot2_cnt", "Price", "qe_ng_cnt", "title_len", "hl_ucnt", "price_ratio", "hl_lcnt", "t_match", "qe_w_ratio", "qe_ng_ratio", "ca_match", "Position", "bf_3h_cnt", "qe_w_cnt", "af_cnt", "ot3_cnt", "ca_pid_match", "af_3h_cnt", ]) if args.test: fh_list = [ open("data/tr_%s.%s"%(args.test, args.type), "w"), open("data/cv_%s.%s"%(args.test, args.type), "w"), open("data/te_%s.%s"%(args.test, args.type), "w")] else: fh_list = [open("data/tr.%s"%(args.type), "w"), open("data/cv.%s"%(args.type), "w"), open("data/te.%s"%(args.type), "w")] data_iter = data(args.test, maxlines=args.maxl) avg_ctr = defaultdict(lambda : [0, 0]) for line_cnt, (data_type, rows, sinfo) in enumerate(data_iter): sinfo["s_LocationID"] = int(sinfo["LocationID"]) sinfo["s_CategoryID"] = int(sinfo["CategoryID"]) extract_slot_feas(rows, sinfo) rows = filter(lambda x: filter_row(x, data_type), rows) if not rows: continue feature_map = get_features(sinfo, rows, data_type > 0) instances = extract(feature_map) if line_cnt == 0: for k, feas in feature_map.items(): print "-" * 80 print k print feas[0].keys() feas_name = sorted(instances[0].keys()) print len(feas_name), feas_name if args.sz is not None: write_dump("feas_name.dump", feas_name) elif args.test: write_dump("feas_name%s.dump"%args.test, feas_name) else: write_dump("feas_name.dump", feas_name) fh = fh_list[data_type] for ins_map, row in zip(instances, rows): y = int(row.get("IsClick", 0)) avg_ctr[data_type][0] += y avg_ctr[data_type][1] += 1 ins = [] for kt, k in enumerate(feas_name): if args.type == "xgb" and k not in xgb_set: continue feas = ins_map[k] if line_cnt == 0: print kt, k, type(feas), feas if isinstance(feas, list) or isinstance(feas, tuple): for f in feas: ins.append(hash_val(kt + 1, f, args.type)) else: ins.append(hash_val(kt + 1, feas, args.type)) fh.write(unicode(y) + " " + " ".join(map(unicode, ins)) + "\n") for key, value in avg_ctr.items(): print "%s, %s"%(key, value[0] * 1. / value[1]) for fh in fh_list: fh.close()
def handle_read(self): data = self.recv(1024) if data == '': self.handle_close() return log.l.LogIt('RTC006', 'D', 'cmd i: %s', (data)) args = string.split(data) if len(args) == 0: return; self.mode = args[0] self.target = args[1] args = args[2:] log.l.LogIt('RTC007', '1', '%s', (str(args))) if len(args) == 0: return #-------------------- if args[0] == 'close': self.senddata.append(util.close(self.mode == 'router', self.target, args[1:])) #-------------------- elif args[0] == 'data': if self.mode == 'domain': self.senddata.append('command: '+args[0]+': address only to router') if self.mode == 'router': self.senddata.append(util.data(args[1:])) elif args[0] == 'dump': if self.mode == 'domain': self.senddata.append('command: '+args[0]+': address only to router') if self.mode == 'router': self.senddata.append(util.dump()) #-------------------- elif args[0] == 'event': if self.mode == 'domain': self.senddata.append('command: '+args[0]+': address only to router') if self.mode == 'router': self.senddata.append(util.event(args[1:])) #-------------------- elif args[0] == 'fb' or args[0] == 'fallback' or args[0] == 'secondary': if len(args) > 1 and args[1] == 'auto': auto = 1 else: auto = 0 if self.mode == 'domain': cfg.domain[self.target]['fallback'] = 1 self.senddata.append(util.switch_secondary(self.target, auto)) if self.mode == 'router': for i in cfg.domain.keys(): if cfg.domain[i]['apr'] == cfg.name: cfg.domain[i]['fallback'] = 1 self.senddata.append(i+' '+util.switch_secondary(i, auto)) #-------------------- elif args[0] == 'ff' or args[0] == 'fallforward' or args[0] == 'primary': if len(args) > 1 and args[1] == 'auto': auto = 1 else: auto = 0 if self.mode == 'domain': cfg.domain[self.target]['fallback'] = 0 self.senddata.append(util.switch_primary(self.target, auto)) if self.mode == 'router': for i in cfg.domain.keys(): if cfg.domain[i]['apr'] == cfg.name: cfg.domain[i]['fallback'] = 0 self.senddata.append(i+' '+util.switch_primary(i, auto)) #-------------------- elif args[0] == 'pvc': if self.mode == 'domain': self.senddata.append('command: '+args[0]+': address only to router') if self.mode == 'router': self.senddata.append(util.pvc(args[1:])) #-------------------- elif args[0] == 'refresh': evt_hdlr.refresh() self.senddata.append('status refreshed') #-------------------- elif args[0] == 'set': self.senddata = util.set(self.mode == 'router', self.target, args[1:]) #-------------------- elif args[0] == 'sna': if self.mode == 'domain': self.senddata.append('command: '+args[0]+': address only to router') if self.mode == 'router': self.senddata.append(util.sna(args[1:])) #-------------------- elif args[0] == 'status': if self.mode == 'domain': self.senddata.append(util.status(self.target)) if self.mode == 'router': for i in cfg.domain.keys(): if cfg.domain[i]['apr'] == cfg.name: self.senddata.append(i+' '+util.status(i)) if len(self.senddata) == 0: self.senddata.append('not active') #-------------------- elif args[0] == 'stop': if self.mode == 'domain': self.senddata.append('command: '+args[0]+': address only to router') if self.mode == 'router': log.l.LogIt('RTC008', 'I', 'command termination', ()) cfg.stopping = 1 msg = '%s terminating' % self.target self.senddata.append(msg) for i in cfg.domain.keys(): util.closeall(i) #-------------------- elif args[0] == 'trace': if self.mode == 'domain': self.senddata.append('command: '+args[0]+': address only to router') if self.mode == 'router': if len(args) > 1: log.l.SetTraceLevel(int(args[1])) self.senddata.append('trace level %s' % (args[1])) log.l.LogIt('RTC009', 'I', 'command trace %s', (args[1])) else: level = log.l.GetTraceLevel() self.senddata.append('trace level %d' % (level)) log.l.LogIt('RTC010', 'I', 'command get trace: %d', (level)) #-------------------- elif args[0] == 'version': msg = ver.getVersion() if cfg.snasrv_version != '': msg = msg + ' snasrv: ' + cfg.snasrv_version self.senddata.append(msg) #-------------------- else: self.senddata.append('command: '+args[0]+': not implemented')
def GET(self): t = template.env.get_template('faq.html') return t.render(util.data())