def process(path): clicked, lines_evaluated, lines_total = 0, 0, 0 with file(sys.argv[2]) as inf: for line in inf: lines_total += 1 # Parsing the log line. logline = line.strip().split() chosen = int(logline.pop(7)) user_action = int(logline.pop(7)) time = int(logline[0]) user_features = [float(x) for x in logline[1:7]] articles = [int(x) for x in logline[7:]] # Getting the recommended article. calculated = policy.reccomend(time, user_features, articles) if not calculated in articles: raise Exception("Article was not in the list.") # Updating the policy. if calculated == chosen: policy.update(user_action) clicked += user_action lines_evaluated += 1 else: policy.update(-1) print "Evaluated %d/%d lines." % (lines_evaluated, lines_total) print "CTR=%f" % (float(clicked) / lines_evaluated)
def process(path): clicked, lines_evaluated, lines_total, time, positive_clicked, positive_reward = 0, 0, 0, 0, 0, 0 for i in range(10): with gzip.open(path % (i+1), 'rb') as inf: for line in inf: lines_total += 1 # Parsing the log line. logline = line.strip().split() chosen = int(logline.pop(1)) user_action = int(logline.pop(1)) time = int(logline[0]) if chosen == 109528: # doesn't exist! continue user_features = [None] * 6 for feat in logline[2:8]: user_features[int(feat[0]) - 1] = float(feat[2:]) articles = [] for feat in logline[8:]: if feat[0] == '|': article_id = int(feat[1:]) if article_id != 109528: articles.append(article_id) # Getting the recommended article. calculated = policy.reccomend(time, user_features, articles) if not calculated in articles: raise Exception("Article was not in the list.") # Updating the policy. if calculated == chosen: policy.update(user_action) clicked += user_action lines_evaluated += 1 positive_reward += user_action else: policy.update(-1) positive_clicked += user_action if lines_total % 10000 == 0: tstamp = datetime.datetime.fromtimestamp(time).strftime('%Y-%m-%d %H:%M:%S') print "%s\t\t+clicked %d/%d\t\tEvaluated %d/%d lines.\tCTR = %f" % (tstamp, positive_reward, positive_clicked, lines_evaluated, lines_total, float(clicked) / lines_evaluated) positive_clicked = 0 positive_reward = 0 print "Evaluated %d/%d lines." % ( lines_evaluated, lines_total) print "CTR=%f" % (float(clicked) / lines_evaluated)