def evaluate(threadfile, model, extractor, window_size = 1, bandwidth = 1000000, LAG_TIME = 10, offset=0): posts_log, visit_log, result_log = timestamp_log( 'posts', 'visit', 'sliding_window') try: time = 0 d_visit = LAG_TIME time_visit = time time_visit += d_visit post_buffer = [] visits = 0 visit_times = [] posts_times = [] for window,d_t in windowed([threadfile],window_size,offset): #post being made print "%d\t-->"%time posts_log.write("%d\n"%time) posts_times.append(time) assert(time_visit - time > 0) time_post = time + d_t post_buffer.append(window) last_post_time = time while time_visit <= time_post: #visit being made time = time_visit print "%d\t<--"%time visits += 1 visit_log.write("%d\n"%time) visit_times.append(time) if post_buffer: feature_vec = extractor.extract(post_buffer[-1]) d_visit = model.predict(feature_vec,d_t) post_buffer = [] else: d_visit = model.repredict() p_from_last_post = last_post_time + d_visit if time < p_from_last_post: time_visit = p_from_last_post else: d_visit = model.repredict() time_visit = time + d_visit time = time_post k = 120 N = int(max(visit_times[-1],posts_times[-1])) sum_Phi = 0 sum_Psi = 0 sum_ref = 0 for i in range(N-k): r = len([j for j in posts_times if j >= i and j < i + k ]) h = len([j for j in visit_times if j >= i and j < i + k ]) if r > 0: sum_ref += 1 if r > h: sum_Phi += 1 elif r < h: sum_Psi += 1 Pr_miss = float(sum_Phi)/sum_ref Pr_fa = float(sum_Psi)/float(N-k) Pr_error = 0.5*Pr_miss + 0.5*Pr_fa result_log.write(str(Pr_miss) + ' , ' + str(Pr_fa) + '\n') model.add_experiment('prerror_test',threadfile,Pr_error) model.save() return Pr_error,visits except Exception: raise finally: posts_log.close() visit_log.close() result_log.close()
def evaluate(threadfile, model, extractor, window_size=1, bandwidth=1000000, LAG_TIME=10, offset=0): posts_log, visit_log, result_log = timestamp_log('posts', 'visit', 'sliding_window') try: time = 0 d_visit = LAG_TIME time_visit = time time_visit += d_visit post_buffer = [] visits = 0 visit_times = [] posts_times = [] for window, d_t in windowed([threadfile], window_size, offset): #post being made print "%d\t-->" % time posts_log.write("%d\n" % time) posts_times.append(time) assert (time_visit - time > 0) time_post = time + d_t post_buffer.append(window) last_post_time = time while time_visit <= time_post: #visit being made time = time_visit print "%d\t<--" % time visits += 1 visit_log.write("%d\n" % time) visit_times.append(time) if post_buffer: feature_vec = extractor.extract(post_buffer[-1]) d_visit = model.predict(feature_vec, d_t) post_buffer = [] else: d_visit = model.repredict() p_from_last_post = last_post_time + d_visit if time < p_from_last_post: time_visit = p_from_last_post else: d_visit = model.repredict() time_visit = time + d_visit time = time_post k = 120 N = int(max(visit_times[-1], posts_times[-1])) sum_Phi = 0 sum_Psi = 0 sum_ref = 0 for i in range(N - k): r = len([j for j in posts_times if j >= i and j < i + k]) h = len([j for j in visit_times if j >= i and j < i + k]) if r > 0: sum_ref += 1 if r > h: sum_Phi += 1 elif r < h: sum_Psi += 1 Pr_miss = float(sum_Phi) / sum_ref Pr_fa = float(sum_Psi) / float(N - k) Pr_error = 0.5 * Pr_miss + 0.5 * Pr_fa result_log.write(str(Pr_miss) + ' , ' + str(Pr_fa) + '\n') model.add_experiment('prerror_test', threadfile, Pr_error) model.save() return Pr_error, visits except Exception: raise finally: posts_log.close() visit_log.close() result_log.close()
def evaluate(threadfile, model, extractor, window_size = 1, bandwidth = 1000000, LAG_TIME = 10, offset = 0, sliding_window_size = 120, verbose = False ): posts_log, visit_log, result_log_tscore,result_log_window = timestamp_log( 'posts', 'visit', 't_score', 'sliding_window') try: time = 0 d_visit = LAG_TIME time_visit = time time_visit += d_visit post_buffer = [] t_score_cum = 0 count = 0 visits = 0 correct_count,wrong_count = 0,0 w = SlidingWindow(K = 20, alpha = 0.5) ps = PairwiseScoring() for window,d_t in windowed([threadfile],window_size, offset): #post being made if verbose: print "%d\t-->"%time posts_log.write("%d\n"%time) w.event('post',time) ps.event('post',time) assert(time_visit - time > 0) t_score_cum += time_visit-time count += 1 time_post = time + d_t post_buffer.append((extractor.extract(window),d_t)) last_post_time = time while time_visit <= time_post: #visit being made time = time_visit if verbose: print "%d\t<--"%time visits += 1 visit_log.write("%d\n"%time) w.event('visit',time) ps.event('visit',time) #start correction d_visit = None if post_buffer: feature_vec,_ = post_buffer[-1] d_visit = model.predict( feature_vec,d_t, current_d_t = time - last_post_time, unseen = post_buffer[:-1] ) if post_buffer: post_buffer = [] time_visit = last_post_time + d_visit assert(time < time_visit) #end correction time = time_post Pr_miss, Pr_fa, Pr_error = w.pr_error() result_log_window.write(str(Pr_miss) + ' , ' + str(Pr_fa) + '\n') model.add_experiment('prerror_test',threadfile,Pr_error) model.add_experiment('pairwise_scoring',threadfile,ps.score()) t_score = t_score_cum/float(count) result_log_tscore.write(str(t_score)+'\n') model.add_experiment('t-score_test',threadfile,t_score) #save_model(pickle_file,model) model.save() return { 'T-score': t_score, 'Pr_error': (Pr_miss,Pr_fa,Pr_error), 'Visits': visits, 'Posts': count, 'Pairwise': ps.score() #'Invalid Predictions': (correct_count+wrong_count, # wrong_count/float(correct_count+wrong_count)) } except Exception: raise finally: posts_log.close() visit_log.close() result_log_tscore.close() result_log_window.close()