Exemplo n.º 1
0
def get_ff_cache(profile_dir, store_body=False):
    cache_dir = os.path.join(profile_dir, "Cache")
    if not os.path.isdir(cache_dir):
        return []  # Firefox updated the cache dir structure since our study
    cache_map = os.path.join(cache_dir, "_CACHE_MAP_")
    cache_dump = os.path.join(BASE_TMP_DIR, append_timestamp("cache") +
                              rand_str())
    create_dir(cache_dump)
    subprocess.call([PERL_PATH, CACHE_PERL_SCRIPT, cache_map, "--recover=" +
                     cache_dump])
    cache_items = []
    db_items = ("Etag", "Request String", "Expires", "Cache-Control")
    for fname in glob(os.path.join(cache_dump, "*_metadata")):
        item = {}
        try:
            with open(fname) as f:
                metadata = f.read()
                item = parse_metadata(metadata)
                for db_item in db_items:
                    if db_item not in item:
                        item[db_item] = ""

                # If a response includes both an Expires header and a max-age
                # directive, the max-age directive overrides the Expires header
                # (http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html)
                expiry_delta_sec = 0
                if "Expires" in item:
                    # parse expiry date
                    expiry = parse_date(item["Expires"])
                    if expiry:
                        expiry_delta = expiry - datetime.now()
                        expiry_delta_sec = expiry_delta.total_seconds()
                if "Cache-Control:" in item:
                    # parse max-age directive
                    cache_directives =\
                        parse_cache_control_header(item["Cache-Control"],
                                                   cls=ResponseCacheControl)
                    if "max-age" in cache_directives:
                        expiry_delta_sec = cache_directives["max-age"]
                if expiry_delta_sec < DELTA_MONTH:
                    continue
                item["Expiry-Delta"] = expiry_delta_sec

            with open(fname[:-9]) as f:
                data = f.read()
                item["Body"] = data if store_body else ""  # store as BLOB
                item["Hash"] = hash_text(base64.b64encode(data))
        except IOError as exc:
            print "Error processing cache: %s: %s" % (exc,
                                                      traceback.format_exc())

        cache_items.append(item)
    if os.path.isdir(cache_dump):
        shutil.rmtree(cache_dump)
    return cache_items
Exemplo n.º 2
0
def get_ff_cache(profile_dir, store_body=False):
    cache_dir = os.path.join(profile_dir, "Cache")
    if not os.path.isdir(cache_dir):
        return []  # Firefox updated the cache dir structure since our study
    cache_map = os.path.join(cache_dir, "_CACHE_MAP_")
    cache_dump = os.path.join(BASE_TMP_DIR,
                              append_timestamp("cache") + rand_str())
    create_dir(cache_dump)
    subprocess.call(
        [PERL_PATH, CACHE_PERL_SCRIPT, cache_map, "--recover=" + cache_dump])
    cache_items = []
    db_items = ("Etag", "Request String", "Expires", "Cache-Control")
    for fname in glob(os.path.join(cache_dump, "*_metadata")):
        item = {}
        try:
            with open(fname) as f:
                metadata = f.read()
                item = parse_metadata(metadata)
                for db_item in db_items:
                    if db_item not in item:
                        item[db_item] = ""

                # If a response includes both an Expires header and a max-age
                # directive, the max-age directive overrides the Expires header
                # (http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html)
                expiry_delta_sec = 0
                if "Expires" in item:
                    # parse expiry date
                    expiry = parse_date(item["Expires"])
                    if expiry:
                        expiry_delta = expiry - datetime.now()
                        expiry_delta_sec = expiry_delta.total_seconds()
                if "Cache-Control:" in item:
                    # parse max-age directive
                    cache_directives =\
                        parse_cache_control_header(item["Cache-Control"],
                                                   cls=ResponseCacheControl)
                    if "max-age" in cache_directives:
                        expiry_delta_sec = cache_directives["max-age"]
                if expiry_delta_sec < DELTA_MONTH:
                    continue
                item["Expiry-Delta"] = expiry_delta_sec

            with open(fname[:-9]) as f:
                data = f.read()
                item["Body"] = data if store_body else ""  # store as BLOB
                item["Hash"] = hash_text(base64.b64encode(data))
        except IOError as exc:
            print "Error processing cache: %s: %s" % (exc,
                                                      traceback.format_exc())

        cache_items.append(item)
    if os.path.isdir(cache_dump):
        shutil.rmtree(cache_dump)
    return cache_items
Exemplo n.º 3
0
def insert_canvas_event(cursor, visit_info, be):
    meta_id = 0
    if be.event_type in cm.CANVAS_ALL_EVENTS:
        metadata_hash = hash_text(be.log_text)
        cursor.execute("SELECT id FROM metadata WHERE hash=:Hash and "
                       "event_type=:event_type",
                       {"Hash": metadata_hash, "event_type": be.event_type})
        meta_id = cursor.fetchone()

        if meta_id is None:
            cursor.execute("INSERT INTO metadata VALUES (?, ?, ?, ?)",
                           (None, be.event_type, be.log_text, metadata_hash))
            meta_id = cursor.lastrowid
        else:
            meta_id = meta_id[0]  # fetchone returns a tuple

    cursor.execute("INSERT INTO canvas VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
                   (None, visit_info.visit_id, be.url, be.js_file,
                    be.js_line, be.event_type, 0, meta_id))
    return cursor.lastrowid
Exemplo n.º 4
0
def insert_canvas_event(cursor, visit_info, be):
    meta_id = 0
    if be.event_type in cm.CANVAS_ALL_EVENTS:
        metadata_hash = hash_text(be.log_text)
        cursor.execute(
            "SELECT id FROM metadata WHERE hash=:Hash and "
            "event_type=:event_type", {
                "Hash": metadata_hash,
                "event_type": be.event_type
            })
        meta_id = cursor.fetchone()

        if meta_id is None:
            cursor.execute("INSERT INTO metadata VALUES (?, ?, ?, ?)",
                           (None, be.event_type, be.log_text, metadata_hash))
            meta_id = cursor.lastrowid
        else:
            meta_id = meta_id[0]  # fetchone returns a tuple

    cursor.execute("INSERT INTO canvas VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
                   (None, visit_info.visit_id, be.url, be.js_file, be.js_line,
                    be.event_type, 0, meta_id))
    return cursor.lastrowid