def read_log(log_file): par_adv = [] measured = False sys.stdout.write("Reading log") fo = open(log_file, "r") for line in fo: # print line tim, linetype, linename, value, unit_id, treatment_id = interpret_log_line( line) if (linetype == 'meta'): if (linename == 'agents'): num_agents = int(value) elif (linename == 'treatnames'): treatnames = re.split("\@\|", value) # print "Treatments: ", treatnames elif (linename == 'block_id start'): sys.stdout.write(".") sys.stdout.flush() block_id = int(value) adv = [] ints = [] newsv = [] for i in range(0, num_agents): adv.append(adVector.AdVector()) ints.append(interest.Interests()) newsv.append(news.NewsVector()) # print block_id elif (linename == 'assignment'): assignment = [int(x) for x in re.split("\@\|", value)] elif (linename == 'block_id end'): apply_labels_to_vecs(adv, ints, newsv, assignment, num_agents, len(treatnames)) par_adv.append({ 'advector': adv, 'newsvector': newsv, 'assignment': assignment, 'intvector': ints }) elif (linetype == 'treatment'): pass elif (linetype == 'measurement'): if (linename == 'ad'): ind_ad = ad.Ad(value, treatment_id) adv[int(unit_id)].add(ind_ad) if (linename == 'interest'): ints[int(unit_id)].set_from_string(value) if (linename == 'news'): ind_news = news.News(value, treatment_id) newsv[int(unit_id)].add(ind_news) elif (linetype == 'error'): # print "Error in block", block_id, ": ", line.strip() pass sys.stdout.write(".Reading complete\n") print "Treatments: ", treatnames return par_adv, treatnames
def interest_vectors( list ): # returns a frequency vector of interests, when input a list of interessts int_union = interest.Interests() for ints in list: int_union = int_union.union(ints) i_list = [] labels = [] for ints in list: i_list.append(int_union.gen_int_vec(ints)) labels.append(ints.label) return i_list, labels, int_union
def read_log(log_file): # check treatnames = [] fo = open(log_file, "r") line = fo.readline() chunks = re.split("\|\|", line) if (chunks[0] == 'g'): old = True gmarker = 'g' treatments = 2 treatnames = ['0', '1'] samples = len(chunks) - 1 else: old = False gmarker = 'assign' treatments = int(chunks[2]) samples = int(chunks[1]) line = fo.readline() chunks = re.split("\|\|", line) for i in range(1, len(chunks)): treatnames.append(chunks[i].strip()) fo.close() assert treatments == len(treatnames) for i in range(0, treatments): print "Treatment ", i, " = ", treatnames[i] adv = [] ints = [] newsv = [] for i in range(0, samples): adv.append(adVector.AdVector()) ints.append(interest.Interests()) newsv.append(news.NewsVector()) loadtimes = [timedelta(minutes=0)] * samples reloads = [0] * samples errors = [0] * samples xvfbfails = [] breakout = False par_adv = [] ass = [] fo = open(log_file, "r") r = 0 sys.stdout.write("Scanning ads") for line in fo: chunks = re.split("\|\|", line) chunks[len(chunks) - 1] = chunks[len(chunks) - 1].rstrip() if (chunks[0] == gmarker and r == 0): r += 1 ass = chunks[2:] if (old): ass = chunks[1:] assert len(ass) == samples apply_labels_to_vecs(adv, ints, newsv, ass, samples, treatments) #print ass elif (chunks[0] == gmarker and r > 0): r += 1 par_adv.append({ 'adv': adv, 'newsv': newsv, 'ass': ass, 'xf': xvfbfails, 'interests': ints, 'break': breakout, 'loadtimes': loadtimes, 'reloads': reloads, 'errors': errors }) sys.stdout.write(".") sys.stdout.flush() adv = [] ints = [] newsv = [] for i in range(0, samples): adv.append(adVector.AdVector()) ints.append(interest.Interests()) newsv.append(news.NewsVector()) loadtimes = [timedelta(minutes=0)] * samples reloads = [0] * samples errors = [0] * samples xvfbfails = [] breakout = False ass = chunks[2:] if (old): ass = chunks[1:] assert len(ass) == samples apply_labels_to_vecs(adv, ints, newsv, ass, samples, treatments) elif (chunks[0] == 'Xvfbfailure'): xtreat, xid = chunks[1], chunks[2] xvfbfails.append(xtreat) elif (chunks[1] == 'breakingout'): breakout = True elif (chunks[1] == 'loadtime'): t = (datetime.strptime(chunks[2], "%H:%M:%S.%f")) delta = timedelta(hours=t.hour, minutes=t.minute, seconds=t.second) id = int(chunks[3]) loadtimes[id] += delta elif (chunks[1] == 'reload'): id = int(chunks[2]) reloads[id] += 1 elif (chunks[1] == 'errorcollecting'): id = int(chunks[2]) errors[id] += 1 elif (chunks[1] == 'prepref'): id = int(chunks[4]) ints[id].remove_interest() elif (chunks[1] == 'pref'): id = int(chunks[4]) int_str = chunks[3] ints[id].set_from_string(int_str) elif (chunks[0] == 'news'): ind_news = news.News({ 'Time': datetime.strptime(chunks[3], "%Y-%m-%d %H:%M:%S.%f"), 'Title': chunks[4], 'Agency': chunks[5], 'Ago': chunks[6], 'Body': chunks[7].rstrip(), 'Label': chunks[2] }) newsv[int(chunks[1])].add(ind_news) elif (chunks[0] == 'ad'): ind_ad = ad.Ad({ 'Time': datetime.strptime(chunks[3], "%Y-%m-%d %H:%M:%S.%f"), 'Title': chunks[4], 'URL': chunks[5], 'Body': chunks[6].rstrip(), 'cat': "", 'Label': chunks[2] }) adv[int(chunks[1])].add(ind_ad) else: # to analyze old log files try: ind_ad = ad.Ad({ 'Time': datetime.strptime(chunks[2], "%Y-%m-%d %H:%M:%S.%f"), 'Title': chunks[3], 'URL': chunks[4], 'Body': chunks[5].rstrip(), 'cat': "", 'label': chunks[1] }) # ind_ad = ad.Ad({'Time':datetime.strptime(chunks[1], "%Y-%m-%d %H:%M:%S.%f"), 'Title':chunks[2], # 'URL': chunks[3], 'Body': chunks[4].rstrip(), 'cat': "", 'label':""}) adv[int(chunks[0])].add(ind_ad) except: pass r += 1 par_adv.append({ 'adv': adv, 'newsv': newsv, 'ass': ass, 'xf': xvfbfails, 'interests': ints, 'break': breakout, 'loadtimes': loadtimes, 'reloads': reloads, 'errors': errors }) sys.stdout.write(".Scanning complete\n") sys.stdout.flush() return par_adv, treatnames