def test_update_visit(self): test_url = "http://example.com" start_time = strftime("%Y%m%d-%H%M%S") be = cm.BrowserEvent() be.event_type = cm.EVENT_NEW_VISIT vi = cm.VisitInfo() vi.url = test_url vi.start_time = start_time vi.out_db = self.test_db vi.duration = 0 vi.incomplete = 1 vi.visit_id = dbu.insert_to_db(dbu.DBCmd.ADD_VISIT, be, vi) vi_read = dbu.get_db_entry(self.test_db, dbu.DBCmd.VISIT_BY_ID, vi.visit_id) self.assertEqual(vi.duration, vi_read.duration) self.assertEqual(vi.incomplete, vi_read.incomplete) vi.duration = 33 vi.incomplete = 0 dbu.insert_to_db(dbu.DBCmd.UPDATE_VISIT, be, vi) vi_read = dbu.get_db_entry(self.test_db, dbu.DBCmd.VISIT_BY_ID, vi.visit_id) self.assertEqual(vi.duration, vi_read.duration) self.assertEqual(vi.incomplete, vi_read.incomplete)
def get_flash_evercookies(db1, db2, other_prof_dbs, seed_prof): seed_prof_cookie_db = j(seed_prof, "cookies.sqlite") lsos_by_visit = defaultdict(list) lsos_db1 = split_lsos(get_distinct_items(db1, ("content", "flash_cookies"))) lsos_db2 = split_lsos(get_distinct_items(db2, ("content", "flash_cookies"))) lsos_db3 = get_flash_cookies_from_dbs(other_prof_dbs) # lsos_db3 = get_distinct_items(db3, ("content", "flash_cookies")) print len(lsos_db2), "lsos in db 2", len(lsos_db3), "lsos in db 3" print len(lsos_db1 & lsos_db2), "common lsos in db 1 & 2" for lso_item in dbu.get_db_entry(db1, dbu.DBCmd.GET_FLASH_COOKIES, None): content = lso_item[7] v_id = lso_item[1] lso_id = lso_item[0] splitted = split_lso(content) for item in splitted: if (item and len(item) > 5 and item in lsos_db2 and item not in lsos_db3): if "es|utmccn" in content: print "*******", content, lso_id, splitted lsos_by_visit[(v_id, lso_id)].append({"item": item, "path": lso_item[5], "content": content, "key": lso_item[6], "domain": lso_item[3]}) print len(lsos_by_visit), "lsos in db 1" return grep_in_visit_and_profile_data(seed_prof_cookie_db, (db1, db2), lsos_by_visit, dbu.DBTable.LSO, other_prof_dbs)
def grep_in_visit_and_profile_data(seed_cookie_db, visit_dbs, lsos_by_visit, exclude_table, other_prof_dbs): looked_items = Set() for (v_id, _), lso_dicts in lsos_by_visit.iteritems(): for lso_dict in lso_dicts: match = lso_dict["item"] if match in looked_items: continue looked_items.add(match) # find the cookies in the original seeded profile that match the common LSO IDs. cookies = dbu.get_db_entry(seed_cookie_db, dbu.DBCmd.GREP_IN_PROFILE_DATA, (match, v_id, exclude_table)) # since the cookies are removed from the profile before seeding, # the cookies in found in the subsequent visits must have been # respawned. By "seeding" we mean copying the LSOs from a profile # to another computer to allow sites to exploit LSOs but nothing # else (e.g. cookies). See Section 4 of the paper for a detailed # explanation of the method. # https://securehomes.esat.kuleuven.be/~gacar/persistent/the_web_never_forgets.pdf if len(cookies["cookie"]): vis1_cookies = dbu.get_db_entry(visit_dbs[0], dbu.DBCmd.GREP_IN_VISIT_COOKIES, match) vis2_cookies = dbu.get_db_entry(visit_dbs[1], dbu.DBCmd.GREP_IN_VISIT_COOKIES, match) if len(vis1_cookies) and len(vis2_cookies): # the cookie should not be found in visit data from an # unrelated profile. other_vis_cookies = dbu.get_db_entry(other_prof_dbs[0], dbu.DBCmd.GREP_IN_VISIT_COOKIES, match) if not len(other_vis_cookies): prof_cookies_ul =\ get_html_from_moz_cookies(cookies["cookie"]) visit1_cookies_ul =\ get_html_from_visit_cookies(vis1_cookies) visit2_cookies_ul =\ get_html_from_visit_cookies(vis2_cookies) yield match, lso_dict["key"], lso_dict["content"],\ lso_dict["domain"], lso_dict["path"],\ prof_cookies_ul, visit1_cookies_ul,\ visit2_cookies_ul else: print "Found in other db", match
def grep_in_visit_and_profile_data(seed_cookie_db, visit_dbs, lsos_by_visit, exclude_table, other_prof_dbs): looked_items = Set() for (v_id, _), lso_dicts in lsos_by_visit.iteritems(): for lso_dict in lso_dicts: match = lso_dict["item"] if match in looked_items: continue looked_items.add(match) # find the cookies in the original seeded profile that match the common LSO IDs. cookies = dbu.get_db_entry(seed_cookie_db, dbu.DBCmd.GREP_IN_PROFILE_DATA, (match, v_id, exclude_table)) # since the cookies are removed from the profile before seeding, # the cookies in found in the subsequent visits must have been # respawned. By "seeding" we mean copying the LSOs from a profile # to another computer to allow sites to exploit LSOs but nothing # else (e.g. cookies). See Section 4 of the paper for a detailed # explanation of the method. # https://securehomes.esat.kuleuven.be/~gacar/persistent/the_web_never_forgets.pdf if len(cookies["cookie"]): vis1_cookies = dbu.get_db_entry( visit_dbs[0], dbu.DBCmd.GREP_IN_VISIT_COOKIES, match) vis2_cookies = dbu.get_db_entry( visit_dbs[1], dbu.DBCmd.GREP_IN_VISIT_COOKIES, match) if len(vis1_cookies) and len(vis2_cookies): # the cookie should not be found in visit data from an # unrelated profile. other_vis_cookies = dbu.get_db_entry( other_prof_dbs[0], dbu.DBCmd.GREP_IN_VISIT_COOKIES, match) if not len(other_vis_cookies): prof_cookies_ul =\ get_html_from_moz_cookies(cookies["cookie"]) visit1_cookies_ul =\ get_html_from_visit_cookies(vis1_cookies) visit2_cookies_ul =\ get_html_from_visit_cookies(vis2_cookies) yield match, lso_dict["key"], lso_dict["content"],\ lso_dict["domain"], lso_dict["path"],\ prof_cookies_ul, visit1_cookies_ul,\ visit2_cookies_ul else: print "Found in other db", match
def check_localstorage_db_ops(self, ls_items): # dbu.insert_to_db(dbu.DBCmd.ADD_LOCALSTORAGE_ITEMS, ls_items, self.vi) ls_items_db = dbu.get_db_entry(self.vi.out_db, dbu.DBCmd.LOCALSTORAGE_BY_VISIT_ID, self.vi.visit_id).fetchall() self.assertEqual(len(ls_items_db), 1) for ls_row in ls_items_db: _, _, url, scope, key, value = ls_row self.assertEqual(scope, EXPECTED_LS_ORIGIN) self.assertEqual(key, EXPECTED_LS_KEY) self.assertEqual(value, EXPECTED_LS_VALUE) self.assertEqual(url, LS_TEST_URL)
def count_inclusion(db_file, domain): # % of sites that include a domain includers = set() db_rows = dbu.get_db_entry(db_file, dbu.DBCmd.GREP_IN_REQ_URLS, domain) for db_row in db_rows: rank = db_row[9] if rank not in includers: if get_tld(db_row[3]) == domain: includers.add(rank) # rank # print rank, db_row[3] print len(includers), "includes", domain return includers
def test_r_w_visit_to_db(self): test_url = "http://example.com" start_time = strftime("%Y%m%d-%H%M%S") be = cm.BrowserEvent() be.event_type = cm.EVENT_NEW_VISIT vi = cm.VisitInfo() vi.url = test_url vi.start_time = start_time vi.out_db = self.test_db visit_id = dbu.insert_to_db(dbu.DBCmd.ADD_VISIT, be, vi) vi_read = dbu.get_db_entry(self.test_db, dbu.DBCmd.VISIT_BY_ID, visit_id) self.assertEqual(vi.url, vi_read.url) self.assertEqual(vi.start_time, vi_read.start_time)
def test_lso_db_ops(self): lso_events = lso.parse_strace_logs(self.vi, test_lso=self.lso_file) dbu.insert_to_db(dbu.DBCmd.ADD_LSO_ITEMS, lso_events, self.vi) lso_items_db = dbu.get_db_entry(self.vi.out_db, dbu.DBCmd.GET_FLASH_COOKIES, self.vi.visit_id).fetchall() self.assertEqual(len(lso_items_db), 1) lso_event = lso_items_db[0] page_url, domain, filename, local_path, key, content = lso_event[2:8] self.assertEqual(page_url, self.vi.url) self.assertEqual(domain, cm.ONLINE_TEST_HOST) self.assertEqual(filename, TEST_LSO_FILENAME) self.assertEqual(local_path, TEST_LSO_REL_PATH) self.assertEqual(key, TEST_LSO_KEYNAME) self.assertEqual(content, TEST_LSO_VALUE)
def test_r_w_canvas_to_db(self): be = cm.BrowserEvent() be.event_type = cm.EVENT_TODATAURL be.url = "http://example.com" be.js_file = "http://example.com/fp.js" be.js_line = 5 be.txt = "data:asdsads" vi = cm.VisitInfo() vi.visit_id = 1 vi.out_db = self.test_db canvas_ev_id = dbu.insert_to_db(dbu.DBCmd.ADD_CANVAS, be, vi) self.assertGreater(canvas_ev_id, 0) visit_id, data_url_id, event_time, be_db = \ dbu.get_db_entry(self.test_db, dbu.DBCmd.CANVAS_BY_ID, canvas_ev_id) self.assertEqual(vi.visit_id, visit_id) self.assertEqual(data_url_id, 1) self.assertEqual(be_db.event_type, be.event_type) self.assertEqual(be_db.url, be.url) self.assertEqual(be_db.js_file, be.js_file) self.assertEqual(be_db.js_line, be.js_line) self.assertEqual(event_time, 0)
def get_flash_evercookies(db1, db2, other_prof_dbs, seed_prof): seed_prof_cookie_db = j(seed_prof, "cookies.sqlite") lsos_by_visit = defaultdict(list) lsos_db1 = split_lsos(get_distinct_items(db1, ("content", "flash_cookies"))) lsos_db2 = split_lsos(get_distinct_items(db2, ("content", "flash_cookies"))) lsos_db3 = get_flash_cookies_from_dbs(other_prof_dbs) # lsos_db3 = get_distinct_items(db3, ("content", "flash_cookies")) print len(lsos_db2), "lsos in db 2", len(lsos_db3), "lsos in db 3" print len(lsos_db1 & lsos_db2), "common lsos in db 1 & 2" for lso_item in dbu.get_db_entry(db1, dbu.DBCmd.GET_FLASH_COOKIES, None): content = lso_item[7] v_id = lso_item[1] lso_id = lso_item[0] splitted = split_lso(content) for item in splitted: if (item and len(item) > 5 and item in lsos_db2 and item not in lsos_db3): if "es|utmccn" in content: print "*******", content, lso_id, splitted lsos_by_visit[(v_id, lso_id)].append({ "item": item, "path": lso_item[5], "content": content, "key": lso_item[6], "domain": lso_item[3] }) print len(lsos_by_visit), "lsos in db 1" return grep_in_visit_and_profile_data(seed_prof_cookie_db, (db1, db2), lsos_by_visit, dbu.DBTable.LSO, other_prof_dbs)
def gen_crawl_report(db_file, db_pass2=None, db_other_profs=None, prof_dir=None): """ visits_cnt, cookies, localstorage, flash cookies, cache, indexeddb, http reqs/resps canvas: list distinct FPers, linked to the sites that include this FPer evercookie: list potential evercookies by searching ID-like common strings among different vectors""" out_dir = os.path.dirname(db_file) crawl_name = os.path.basename(os.path.dirname(db_file)) figs = [] # figures to be plotted, removed for now. respawned = [] if db_pass2 and db_other_profs and prof_dir: respawned = ev.get_flash_evercookies(db_file, db_pass2, db_other_profs, prof_dir) start, end = dbu.get_db_entry(db_file, dbu.DBCmd.GET_VISIT_DATES, False) visits_cnt = dbu.get_db_entry(db_file, dbu.DBCmd.COUNT_VISITS, False)[0] completed_visits_cnt = dbu.get_db_entry(db_file, dbu.DBCmd.COUNT_VISITS, True)[0] cookies = dbu.get_db_entry(db_file, dbu.DBCmd.COUNT_COOKIES, 0) localstorage = dbu.get_db_entry(db_file, dbu.DBCmd.COUNT_LOCALSTORAGE, 0) print "genreport len(localstorage)", len(localstorage) xsite_flash_cookies = get_xsite_flash_cookies(db_file) xsite_local_storage = get_xsite_local_storage(db_file) try: flash_cookie_count = dbu.get_db_entry(db_file, dbu.DBCmd.COUNT_LSO, 0) except: flash_cookie_count = [""] canvas_meta_rows = dbu.get_db_entry(db_file, dbu.DBCmd.GET_CANVAS_META, 0) canvas_scr_domains = {} canvas_events_per_script = {} canvas_url_counts = {} canvas_domain_counts = {} canvas_script_urls = dbu.get_db_entry(db_file, dbu.DBCmd.GET_CANVAS_SCRIPTS, 0) false_positives = [] for canvas_script_url_tup in canvas_script_urls: canvas_script_url = canvas_script_url_tup[0] canvas_events = dbu.get_db_entry(db_file, dbu.DBCmd.GET_CANVAS_EVENTS_BY_SCRIPT, canvas_script_url) if not ca.is_canvas_false_positive(canvas_events): scr_evs = dbu.get_db_entry(db_file, dbu.DBCmd.GET_CANVAS_EVENTS_BY_SCRIPT, canvas_script_url) canvas_events_per_script[canvas_script_url] = scr_evs url_cnts = dbu.get_db_entry(db_file, dbu.DBCmd.COUNT_SITES_BY_CANVAS_SCRIPT, canvas_script_url) canvas_url_counts[canvas_script_url] = url_cnts domain = cu.extract_domain(canvas_script_url) if domain in canvas_scr_domains: canvas_scr_domains[domain].append(canvas_script_url) else: canvas_scr_domains[domain] = [canvas_script_url] else: false_positives.append(canvas_script_url_tup) # print canvas_script_url_tup # Remove false positives for false_positive in false_positives: canvas_script_urls.remove(false_positive) # total_canvas_fp_count = sum() all_canvasfp_ranks = {} all_canvasfp_ranks_urls = {} for canvas_scr_domain, canvas_scr_urls in canvas_scr_domains.iteritems(): script_ranks_and_urls =\ dbu.get_db_entry(db_file, dbu.DBCmd.GET_RANK_AND_URLS_BY_CANVAS_SCRIPTS, canvas_scr_urls) canvas_domain_counts[canvas_scr_domain] = len(script_ranks_and_urls) all_canvasfp_ranks[canvas_scr_domain] = map(lambda x: x[0], script_ranks_and_urls) all_canvasfp_ranks_urls[canvas_scr_domain] = script_ranks_and_urls # print all_canvasfp_ranks # fu.write_to_file(j(out_dir, "%s-canvas.json" % crawl_name), # json.dumps(all_canvasfp_ranks)) total_canvas_fp_count = sum(canvas_domain_counts.itervalues()) # print "Total canvas FP count", total_canvas_fp_count rank_set = set() for _, v in all_canvasfp_ranks.iteritems(): for rank in v: rank_set.add(rank) # print "Total canvas FP count - uniq", len(rank_set) nameSpace = { 'title': "Crawl Report", 'visits_cnt': visits_cnt, 'completed_visits_cnt': completed_visits_cnt, 'cookies': cookies[0], 'localstorage': localstorage[0], 'flash_cookie_count': flash_cookie_count[0], 'canvas_meta_rows': canvas_meta_rows, 'start': start, 'end': end, 'canvas_domain_counts': canvas_domain_counts, 'canvas_url_counts': canvas_url_counts, 'canvas_events_per_script': canvas_events_per_script, 'canvas_scr_domains': canvas_scr_domains, 'total_canvas_fp_count': total_canvas_fp_count, 'canvas_script_urls': canvas_script_urls, 'get_tld': cu.extract_domain, 'xsite_flash_cookies': xsite_flash_cookies, 'xsite_local_storages': xsite_local_storage, 'respawned': respawned, 'figs': figs, 'canvasfp_ranks_urls': all_canvasfp_ranks_urls, # '3rdp_cookies': 3rdp_cookies, } report_template = Template(template_str, searchList=[nameSpace]) fu.write_to_file(j(out_dir, "%s-report.html" % crawl_name), str(report_template))
def get_xsite_local_storage(db_file): ls_candidates = dbu.get_db_entry(db_file, dbu.DBCmd.GET_XSITE_LOCALSTORAGE, False) # for ls_candidate in ls_candidates: # print list(ls_candidates) return ls_candidates
def get_xsite_flash_cookies(db_file): ec_candidates = dbu.get_db_entry(db_file, dbu.DBCmd.GET_XSITE_FLASH_COOKIES, False) for ec_candidate in ec_candidates: print list(ec_candidate) return ec_candidates
def gen_crawl_report(db_file, db_pass2=None, db_other_profs=None, prof_dir=None): """ visits_cnt, cookies, localstorage, flash cookies, cache, indexeddb, http reqs/resps canvas: list distinct FPers, linked to the sites that include this FPer evercookie: list potential evercookies by searching ID-like common strings among different vectors""" out_dir = os.path.dirname(db_file) crawl_name = os.path.basename(os.path.dirname(db_file)) figs = [] # figures to be plotted, removed for now. respawned = [] if db_pass2 and db_other_profs and prof_dir: respawned = ev.get_flash_evercookies(db_file, db_pass2, db_other_profs, prof_dir) start, end = dbu.get_db_entry(db_file, dbu.DBCmd.GET_VISIT_DATES, False) visits_cnt = dbu.get_db_entry(db_file, dbu.DBCmd.COUNT_VISITS, False)[0] completed_visits_cnt = dbu.get_db_entry(db_file, dbu.DBCmd.COUNT_VISITS, True)[0] cookies = dbu.get_db_entry(db_file, dbu.DBCmd.COUNT_COOKIES, 0) localstorage = dbu.get_db_entry(db_file, dbu.DBCmd.COUNT_LOCALSTORAGE, 0) print "genreport len(localstorage)", len(localstorage) xsite_flash_cookies = get_xsite_flash_cookies(db_file) xsite_local_storage = get_xsite_local_storage(db_file) try: flash_cookie_count = dbu.get_db_entry(db_file, dbu.DBCmd.COUNT_LSO, 0) except: flash_cookie_count = [""] canvas_meta_rows = dbu.get_db_entry(db_file, dbu.DBCmd.GET_CANVAS_META, 0) canvas_scr_domains = {} canvas_events_per_script = {} canvas_url_counts = {} canvas_domain_counts = {} canvas_script_urls = dbu.get_db_entry(db_file, dbu.DBCmd.GET_CANVAS_SCRIPTS, 0) false_positives = [] for canvas_script_url_tup in canvas_script_urls: canvas_script_url = canvas_script_url_tup[0] canvas_events = dbu.get_db_entry(db_file, dbu.DBCmd.GET_CANVAS_EVENTS_BY_SCRIPT, canvas_script_url) if not ca.is_canvas_false_positive(canvas_events): scr_evs = dbu.get_db_entry(db_file, dbu.DBCmd.GET_CANVAS_EVENTS_BY_SCRIPT, canvas_script_url) canvas_events_per_script[canvas_script_url] = scr_evs url_cnts = dbu.get_db_entry(db_file, dbu.DBCmd.COUNT_SITES_BY_CANVAS_SCRIPT, canvas_script_url) canvas_url_counts[canvas_script_url] = url_cnts domain = cu.extract_domain(canvas_script_url) if domain in canvas_scr_domains: canvas_scr_domains[domain].append(canvas_script_url) else: canvas_scr_domains[domain] = [canvas_script_url] else: false_positives.append(canvas_script_url_tup) # print canvas_script_url_tup # Remove false positives for false_positive in false_positives: canvas_script_urls.remove(false_positive) # total_canvas_fp_count = sum() all_canvasfp_ranks = {} all_canvasfp_ranks_urls = {} for canvas_scr_domain, canvas_scr_urls in canvas_scr_domains.iteritems(): script_ranks_and_urls =\ dbu.get_db_entry(db_file, dbu.DBCmd.GET_RANK_AND_URLS_BY_CANVAS_SCRIPTS, canvas_scr_urls) canvas_domain_counts[canvas_scr_domain] = len(script_ranks_and_urls) all_canvasfp_ranks[canvas_scr_domain] = map(lambda x: x[0], script_ranks_and_urls) all_canvasfp_ranks_urls[canvas_scr_domain] = script_ranks_and_urls # print all_canvasfp_ranks # fu.write_to_file(j(out_dir, "%s-canvas.json" % crawl_name), # json.dumps(all_canvasfp_ranks)) total_canvas_fp_count = sum(canvas_domain_counts.itervalues()) # print "Total canvas FP count", total_canvas_fp_count rank_set = set() for _, v in all_canvasfp_ranks.iteritems(): for rank in v: rank_set.add(rank) # print "Total canvas FP count - uniq", len(rank_set) nameSpace = {'title': "Crawl Report", 'visits_cnt': visits_cnt, 'completed_visits_cnt': completed_visits_cnt, 'cookies': cookies[0], 'localstorage': localstorage[0], 'flash_cookie_count': flash_cookie_count[0], 'canvas_meta_rows': canvas_meta_rows, 'start': start, 'end': end, 'canvas_domain_counts': canvas_domain_counts, 'canvas_url_counts': canvas_url_counts, 'canvas_events_per_script': canvas_events_per_script, 'canvas_scr_domains': canvas_scr_domains, 'total_canvas_fp_count': total_canvas_fp_count, 'canvas_script_urls': canvas_script_urls, 'get_tld': cu.extract_domain, 'xsite_flash_cookies': xsite_flash_cookies, 'xsite_local_storages': xsite_local_storage, 'respawned': respawned, 'figs': figs, 'canvasfp_ranks_urls': all_canvasfp_ranks_urls, # '3rdp_cookies': 3rdp_cookies, } report_template = Template(template_str, searchList=[nameSpace]) fu.write_to_file(j(out_dir, "%s-report.html" % crawl_name), str(report_template))
def get_distinct_items(db, item_type): return Set([ item[0] for item in dbu.get_db_entry( db, dbu.DBCmd.GET_DISTINCT_FROM_DB, item_type) ])
def get_distinct_items(db, item_type): return Set([item[0] for item in dbu.get_db_entry(db, dbu.DBCmd.GET_DISTINCT_FROM_DB, item_type)])