示例#1
0
    def test_rand_str(self):
        # Test default parameters
        random_str = ut.rand_str()
        self.assertEqual(ut.DEFAULT_RAND_STR_SIZE, len(random_str),
                         "rand_str does not return string with default size!")
        self.failIf(set(random_str) - set(ut.DEFAULT_RAND_STR_CHARS),
                    "Unexpected characters in string!")

        # Test with different sizes and charsets
        sizes = [1, 2, 10, 100]
        charsets = (ut.DEFAULT_RAND_STR_CHARS, ut.DIGITS)
        for size in sizes:
            for charset in charsets:
                random_str = ut.rand_str(size, charset)
                self.assertEqual(len(random_str), size,
                                 "Unexpected random string size!")
                self.failIf(set(random_str) - set(ut.DEFAULT_RAND_STR_CHARS),
                            "Unexpected characters in string!")
示例#2
0
 def test_disable_flash(self):
     lso_value = rand_str()
     qry_str = '?lso_test_key=%s&lso_test_value=%s' % ("test_key",
                                                       lso_value)
     test_url = cm.BASE_TEST_URL + '/evercookie/lso/setlso.html' + qry_str
     results = ffm.visit_page(test_url, wait_on_site=3,
                              flash_support=cm.FLASH_DISABLE)
     lso_items = results["flash_cookies"]
     self.assertEqual(len(lso_items), 0)
示例#3
0
 def test_disable_flash(self):
     lso_value = rand_str()
     qry_str = '?lso_test_key=%s&lso_test_value=%s' % ("test_key",
                                                       lso_value)
     test_url = cm.BASE_TEST_URL + '/evercookie/lso/setlso.html' + qry_str
     results = ffm.visit_page(test_url,
                              wait_on_site=3,
                              flash_support=cm.FLASH_DISABLE)
     lso_items = results["flash_cookies"]
     self.assertEqual(len(lso_items), 0)
示例#4
0
def get_ff_cache(profile_dir, store_body=False):
    cache_dir = os.path.join(profile_dir, "Cache")
    if not os.path.isdir(cache_dir):
        return []  # Firefox updated the cache dir structure since our study
    cache_map = os.path.join(cache_dir, "_CACHE_MAP_")
    cache_dump = os.path.join(BASE_TMP_DIR, append_timestamp("cache") +
                              rand_str())
    create_dir(cache_dump)
    subprocess.call([PERL_PATH, CACHE_PERL_SCRIPT, cache_map, "--recover=" +
                     cache_dump])
    cache_items = []
    db_items = ("Etag", "Request String", "Expires", "Cache-Control")
    for fname in glob(os.path.join(cache_dump, "*_metadata")):
        item = {}
        try:
            with open(fname) as f:
                metadata = f.read()
                item = parse_metadata(metadata)
                for db_item in db_items:
                    if db_item not in item:
                        item[db_item] = ""

                # If a response includes both an Expires header and a max-age
                # directive, the max-age directive overrides the Expires header
                # (http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html)
                expiry_delta_sec = 0
                if "Expires" in item:
                    # parse expiry date
                    expiry = parse_date(item["Expires"])
                    if expiry:
                        expiry_delta = expiry - datetime.now()
                        expiry_delta_sec = expiry_delta.total_seconds()
                if "Cache-Control:" in item:
                    # parse max-age directive
                    cache_directives =\
                        parse_cache_control_header(item["Cache-Control"],
                                                   cls=ResponseCacheControl)
                    if "max-age" in cache_directives:
                        expiry_delta_sec = cache_directives["max-age"]
                if expiry_delta_sec < DELTA_MONTH:
                    continue
                item["Expiry-Delta"] = expiry_delta_sec

            with open(fname[:-9]) as f:
                data = f.read()
                item["Body"] = data if store_body else ""  # store as BLOB
                item["Hash"] = hash_text(base64.b64encode(data))
        except IOError as exc:
            print "Error processing cache: %s: %s" % (exc,
                                                      traceback.format_exc())

        cache_items.append(item)
    if os.path.isdir(cache_dump):
        shutil.rmtree(cache_dump)
    return cache_items
示例#5
0
def get_ff_cache(profile_dir, store_body=False):
    cache_dir = os.path.join(profile_dir, "Cache")
    if not os.path.isdir(cache_dir):
        return []  # Firefox updated the cache dir structure since our study
    cache_map = os.path.join(cache_dir, "_CACHE_MAP_")
    cache_dump = os.path.join(BASE_TMP_DIR,
                              append_timestamp("cache") + rand_str())
    create_dir(cache_dump)
    subprocess.call(
        [PERL_PATH, CACHE_PERL_SCRIPT, cache_map, "--recover=" + cache_dump])
    cache_items = []
    db_items = ("Etag", "Request String", "Expires", "Cache-Control")
    for fname in glob(os.path.join(cache_dump, "*_metadata")):
        item = {}
        try:
            with open(fname) as f:
                metadata = f.read()
                item = parse_metadata(metadata)
                for db_item in db_items:
                    if db_item not in item:
                        item[db_item] = ""

                # If a response includes both an Expires header and a max-age
                # directive, the max-age directive overrides the Expires header
                # (http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html)
                expiry_delta_sec = 0
                if "Expires" in item:
                    # parse expiry date
                    expiry = parse_date(item["Expires"])
                    if expiry:
                        expiry_delta = expiry - datetime.now()
                        expiry_delta_sec = expiry_delta.total_seconds()
                if "Cache-Control:" in item:
                    # parse max-age directive
                    cache_directives =\
                        parse_cache_control_header(item["Cache-Control"],
                                                   cls=ResponseCacheControl)
                    if "max-age" in cache_directives:
                        expiry_delta_sec = cache_directives["max-age"]
                if expiry_delta_sec < DELTA_MONTH:
                    continue
                item["Expiry-Delta"] = expiry_delta_sec

            with open(fname[:-9]) as f:
                data = f.read()
                item["Body"] = data if store_body else ""  # store as BLOB
                item["Hash"] = hash_text(base64.b64encode(data))
        except IOError as exc:
            print "Error processing cache: %s: %s" % (exc,
                                                      traceback.format_exc())

        cache_items.append(item)
    if os.path.isdir(cache_dump):
        shutil.rmtree(cache_dump)
    return cache_items
示例#6
0
    def test_rand_str(self):
        # Test default parameters
        random_str = ut.rand_str()
        self.assertEqual(ut.DEFAULT_RAND_STR_SIZE, len(random_str),
                         "rand_str does not return string with default size!")
        self.failIf(
            set(random_str) - set(ut.DEFAULT_RAND_STR_CHARS),
            "Unexpected characters in string!")

        # Test with different sizes and charsets
        sizes = [1, 2, 10, 100]
        charsets = (ut.DEFAULT_RAND_STR_CHARS, ut.DIGITS)
        for size in sizes:
            for charset in charsets:
                random_str = ut.rand_str(size, charset)
                self.assertEqual(len(random_str), size,
                                 "Unexpected random string size!")
                self.failIf(
                    set(random_str) - set(ut.DEFAULT_RAND_STR_CHARS),
                    "Unexpected characters in string!")
示例#7
0
    def test_get_lso_from_visit(self):
        lso_found = False
        lso_value = rand_str()
        qry_str = '?lso_test_key=%s&lso_test_value=%s' % ("test_key",
                                                          lso_value)
        test_url = cm.BASE_TEST_URL + '/evercookie/lso/setlso.html' + qry_str
        results = ffm.visit_page(test_url, wait_on_site=3)
        lso_items = results["flash_cookies"]
        self.failUnless(len(lso_items))

        for test_lso in lso_items:
            self.assertEqual(test_lso.event_type, cm.EVENT_FLASH_LSO)
            self.assertIn(cm.ONLINE_TEST_HOST, test_lso.initiator)
            if TEST_LSO_KEYNAME == test_lso.key:
                self.assertEqual(lso_value, test_lso.log_text)
                lso_found = True
        self.failUnless(lso_found, "Cannot find LSO with the value %s in %s" %
                        (lso_value, lso_items))
示例#8
0
    def test_get_lso_from_visit(self):
        lso_found = False
        lso_value = rand_str()
        qry_str = '?lso_test_key=%s&lso_test_value=%s' % ("test_key",
                                                          lso_value)
        test_url = cm.BASE_TEST_URL + '/evercookie/lso/setlso.html' + qry_str
        results = ffm.visit_page(test_url, wait_on_site=3)
        lso_items = results["flash_cookies"]
        self.failUnless(len(lso_items))

        for test_lso in lso_items:
            self.assertEqual(test_lso.event_type, cm.EVENT_FLASH_LSO)
            self.assertIn(cm.ONLINE_TEST_HOST, test_lso.initiator)
            if TEST_LSO_KEYNAME == test_lso.key:
                self.assertEqual(lso_value, test_lso.log_text)
                lso_found = True
        self.failUnless(
            lso_found,
            "Cannot find LSO with the value %s in %s" % (lso_value, lso_items))
示例#9
0
def open_log_file(out_dir, url):
    if not os.path.isdir(out_dir):
        os.makedirs(out_dir)
    basename = get_basename_from_url(url, "ff-%s" % ut.rand_str())
    return join(out_dir, '%s.log' % (basename))
示例#10
0
def visit_page(url_tuple,
               timeout=cm.HARD_TIME_OUT,
               wait_on_site=cm.WAIT_ON_SITE,
               pre_crawl_sleep=False,
               out_dir=cm.BASE_TMP_DIR,
               flash_support=cm.FLASH_ENABLE,
               cookie_support=cm.COOKIE_ALLOW_ALL):
    driver = None
    visit_info = cm.VisitInfo()
    try:
        visit_info.rank, visit_info.url = url_tuple
    except:
        # When rank of the page is not provided, we'll use rank=0
        visit_info.rank, visit_info.url = 0, url_tuple

    visit_info.sys_log = join(
        out_dir, "syscall-%s-%s.log" % (visit_info.rank, ut.rand_str()))
    visit_info.http_log = join(
        out_dir, "http-%s-%s.log" % (visit_info.rank, ut.rand_str()))
    visit_info.http_dump = join(
        out_dir, "mitm-%s-%s.dmp" % (visit_info.rank, ut.rand_str()))
    visit_info.start_time = strftime("%Y%m%d-%H%M%S")
    visit_info.out_dir = out_dir
    visit_info.out_db = join(visit_info.out_dir, cm.DB_FILENAME)
    visit_info.err_log = join(out_dir, "error.log")
    visit_info.debug_log = join(out_dir, "debug.log")

    be = cm.BrowserEvent()
    be.event_type = cm.EVENT_NEW_VISIT

    visit_info.ff_log = open_log_file(out_dir, visit_info.url)

    if not visit_info.url[:5] in ('data:', 'http:', 'https', 'file:'):
        visit_info.url = 'http://' + visit_info.url

    try:
        visit_info.visit_id = dbu.insert_to_db(dbu.DBCmd.ADD_VISIT, be,
                                               visit_info)
        cm.print_debug(
            visit_info, "Visiting: %s %s (%s)" %
            (visit_info.visit_id, visit_info.url, visit_info.rank))
        setup_nspr_logging(visit_info.http_log)
        visit_info.vdisplay = start_xvfb()
        port, visit_info.mitm_proc = start_mitm_capture(visit_info.http_dump)
        driver, visit_info.profile_dir, visit_info.sel_proc =\
            get_browser(visit_info.ff_log, port, flash_support, cookie_support)
        if flash_support:
            visit_info.strace_proc = log_syscalls(visit_info.sel_proc,
                                                  visit_info.sys_log)

        #############################################################
        driver_get(driver, visit_info, cm.SOFT_TIMEOUT)  # real visit
        #############################################################
        time.sleep(wait_on_site)
        close_driver(driver, timeout=10)
        stop_strace(visit_info.strace_proc)
        result_dict = process_crawler_output(visit_info.ff_log, visit_info,
                                             flash_support)
        cm.print_debug(
            visit_info, "Visit OK: %s %s (%s)" %
            (visit_info.visit_id, visit_info.url, visit_info.rank))
        visit_info.incomplete = 0
        dbu.insert_to_db(dbu.DBCmd.UPDATE_VISIT, be, visit_info)
        quit_driver(driver)
        stop_xvfb(visit_info.vdisplay)
        remove_visit_files(visit_info)
    except (cm.TimeExceededError, sel_exceptions.TimeoutException) as texc:
        err_str = "Visit to %s(%s) timed out %s" % \
            (visit_info.url, visit_info.rank, texc)
        cm.print_error(visit_info, err_str)
        clean_up(visit_info, driver)
        return None
    except Exception as exc:
        err_str = "Exception visiting %s(%s) %s %s" % \
            (visit_info.url, visit_info.rank, exc, traceback.format_exc())
        cm.print_error(visit_info, err_str)
        clean_up(visit_info, driver)
        return None
    else:
        return result_dict
 def test_write_to_file(self):
     filename = self.new_temp_file('write_test.txt')
     random_str = ut.rand_str(100)
     fu.write_to_file(filename, random_str)
     self.assertEqual(random_str, fu.read_file(filename))
示例#12
0
def open_log_file(out_dir, url):
    if not os.path.isdir(out_dir):
        os.makedirs(out_dir)
    basename = get_basename_from_url(url, "ff-%s" % ut.rand_str())
    return join(out_dir, '%s.log' % (basename))
示例#13
0
def visit_page(url_tuple, timeout=cm.HARD_TIME_OUT,
               wait_on_site=cm.WAIT_ON_SITE, pre_crawl_sleep=False,
               out_dir=cm.BASE_TMP_DIR, flash_support=cm.FLASH_ENABLE,
               cookie_support=cm.COOKIE_ALLOW_ALL):
    driver = None
    visit_info = cm.VisitInfo()
    try:
        visit_info.rank, visit_info.url = url_tuple
    except:
        # When rank of the page is not provided, we'll use rank=0
        visit_info.rank, visit_info.url = 0, url_tuple

    visit_info.sys_log = join(out_dir, "syscall-%s-%s.log" %
                              (visit_info.rank, ut.rand_str()))
    visit_info.http_log = join(out_dir, "http-%s-%s.log" %
                               (visit_info.rank, ut.rand_str()))
    visit_info.http_dump = join(out_dir, "mitm-%s-%s.dmp" %
                                (visit_info.rank, ut.rand_str()))
    visit_info.start_time = strftime("%Y%m%d-%H%M%S")
    visit_info.out_dir = out_dir
    visit_info.out_db = join(visit_info.out_dir, cm.DB_FILENAME)
    visit_info.err_log = join(out_dir, "error.log")
    visit_info.debug_log = join(out_dir, "debug.log")

    be = cm.BrowserEvent()
    be.event_type = cm.EVENT_NEW_VISIT

    visit_info.ff_log = open_log_file(out_dir, visit_info.url)

    if not visit_info.url[:5] in ('data:', 'http:', 'https', 'file:'):
        visit_info.url = 'http://' + visit_info.url

    try:
        visit_info.visit_id = dbu.insert_to_db(dbu.DBCmd.ADD_VISIT, be,
                                               visit_info)
        cm.print_debug(visit_info, "Visiting: %s %s (%s)" %
                       (visit_info.visit_id, visit_info.url, visit_info.rank))
        setup_nspr_logging(visit_info.http_log)
        visit_info.vdisplay = start_xvfb()
        port, visit_info.mitm_proc = start_mitm_capture(visit_info.http_dump)
        driver, visit_info.profile_dir, visit_info.sel_proc =\
            get_browser(visit_info.ff_log, port, flash_support, cookie_support)
        if flash_support:
            visit_info.strace_proc = log_syscalls(visit_info.sel_proc,
                                                  visit_info.sys_log)

        #############################################################
        driver_get(driver, visit_info, cm.SOFT_TIMEOUT)  # real visit
        #############################################################
        time.sleep(wait_on_site)
        close_driver(driver, timeout=10)
        stop_strace(visit_info.strace_proc)
        result_dict = process_crawler_output(visit_info.ff_log, visit_info,
                                             flash_support)
        cm.print_debug(visit_info, "Visit OK: %s %s (%s)" %
                       (visit_info.visit_id, visit_info.url, visit_info.rank))
        visit_info.incomplete = 0
        dbu.insert_to_db(dbu.DBCmd.UPDATE_VISIT, be, visit_info)
        quit_driver(driver)
        stop_xvfb(visit_info.vdisplay)
        remove_visit_files(visit_info)
    except (cm.TimeExceededError, sel_exceptions.TimeoutException) as texc:
        err_str = "Visit to %s(%s) timed out %s" % \
            (visit_info.url, visit_info.rank, texc)
        cm.print_error(visit_info, err_str)
        clean_up(visit_info, driver)
        return None
    except Exception as exc:
        err_str = "Exception visiting %s(%s) %s %s" % \
            (visit_info.url, visit_info.rank, exc, traceback.format_exc())
        cm.print_error(visit_info, err_str)
        clean_up(visit_info, driver)
        return None
    else:
        return result_dict