예제 #1
0
def dump_profile(browser_profile_folder,
                 tar_location,
                 close_webdriver,
                 webdriver=None,
                 browser_settings=None,
                 save_flash=False,
                 full_profile=True):
    """
    dumps a browser profile currently stored in <browser_profile_folder> to
    <tar_location> in which both folders are absolute paths.
    if <browser_settings> exists they are also saved
    <full_profile> specifies to save the entire profile directory (not just cookies)
    <save_flash> specifies whether to dump flash files
    """
    # ensures that folder paths end with slashes
    browser_profile_folder = browser_profile_folder if browser_profile_folder.endswith("/")\
        else browser_profile_folder + "/"
    tar_location = tar_location if tar_location.endswith(
        "/") else tar_location + "/"

    if not os.path.exists(tar_location):
        os.makedirs(tar_location)

    if full_profile:
        tar_name = 'full_profile.tar.gz'
    else:
        tar_name = 'profile.tar.gz'

    # see if this file exists first
    # if it does, delete it before we try to save the current session
    if os.path.isfile(tar_location + tar_name):
        subprocess.call(["rm", tar_location + tar_name])

    # if this is a dump on close, close the webdriver and wait for checkpoint
    if close_webdriver:
        webdriver.quit()
        sleep_until_sqlite_checkpoint(browser_profile_folder)

    # backup and tar profile
    tar = tarfile.open(tar_location + tar_name, 'w:gz')
    if full_profile:  # backup all storage vectors
        storage_vector_files = [
            'cookies.sqlite',
            'cookies.sqlite-shm',
            'cookies.sqlite-wal',  # cookies
            'places.sqlite',
            'places.sqlite-shm',
            'places.sqlite-wal',  # history
            'webappsstore.sqlite',
            'webappsstore.sqlite-shm',
            'webappsstore.sqlite-wal',  # localStorage
        ]
        storage_vector_dirs = [
            'webapps',  # related to localStorage?
            'storage'  # directory for IndexedDB
        ]
        for item in storage_vector_files:
            full_path = os.path.join(browser_profile_folder, item)
            if os.path.isfile(full_path):
                tar.add(full_path, arcname=item)
        for item in storage_vector_dirs:
            full_path = os.path.join(browser_profile_folder, item)
            if os.path.isdir(full_path):
                tar.add(full_path, arcname=item)

    else:  # only backup cookies and history
        for db in [
                "cookies.sqlite", "cookies.sqlite-shm", "cookies.sqlite-wal",
                "places.sqlite", "places.sqlite-shm", "places.sqlite-wal"
        ]:
            if os.path.isfile(browser_profile_folder + db):
                tar.add(browser_profile_folder + db, arcname=db)
    tar.close()

    # save flash cookies
    if save_flash:
        save_flash_files(tar_location)

    # save the browser settings
    if browser_settings is not None:
        save_browser_settings(tar_location, browser_settings)
def dump_profile(browser_profile_folder, tar_location, close_webdriver, webdriver=None, browser_settings=None,
                 save_flash=False, full_profile=True):
    """
    dumps a browser profile currently stored in <browser_profile_folder> to
    <tar_location> in which both folders are absolute paths.
    if <browser_settings> exists they are also saved
    <full_profile> specifies to save the entire profile directory (not just cookies)
    <save_flash> specifies whether to dump flash files
    """
    # ensures that folder paths end with slashes
    browser_profile_folder = browser_profile_folder if browser_profile_folder.endswith("/")\
        else browser_profile_folder + "/"
    tar_location = tar_location if tar_location.endswith("/") else tar_location + "/"

    if not os.path.exists(tar_location):
        os.makedirs(tar_location)

    if full_profile:
        tar_name = 'full_profile.tar.gz'
    else:
        tar_name = 'profile.tar.gz'

    # see if this file exists first
    # if it does, delete it before we try to save the current session
    if os.path.isfile(tar_location + tar_name):
        subprocess.call(["rm", tar_location + tar_name])

    # if this is a dump on close, close the webdriver and wait for checkpoint
    if close_webdriver:
        webdriver.close()
        sleep_until_sqlite_checkpoint(browser_profile_folder)

    # backup and tar profile
    tar = tarfile.open(tar_location + tar_name, 'w:gz')
    if full_profile:  # backup all storage vectors
        storage_vector_files = [
            'cookies.sqlite', 'cookies.sqlite-shm', 'cookies.sqlite-wal',  # cookies
            'places.sqlite', 'places.sqlite-shm', 'places.sqlite-wal',  # history
            'webappsstore.sqlite', 'webappsstore.sqlite-shm', 'webappsstore.sqlite-wal',  # localStorage
        ]
        storage_vector_dirs = [
            'webapps',  # related to localStorage?
            'storage'  # directory for IndexedDB
        ]
        for item in storage_vector_files:
            full_path = os.path.join(browser_profile_folder, item)
            if os.path.isfile(full_path):
                tar.add(full_path, arcname=item)
        for item in storage_vector_dirs:
            full_path = os.path.join(browser_profile_folder, item)
            if os.path.isdir(full_path):
                tar.add(full_path, arcname=item)

    else:  # only backup cookies and history
        for db in ["cookies.sqlite", "cookies.sqlite-shm", "cookies.sqlite-wal",
                   "places.sqlite", "places.sqlite-shm", "places.sqlite-wal"]:
            if os.path.isfile(browser_profile_folder + db):
                tar.add(browser_profile_folder + db, arcname=db)
    tar.close()

    # save flash cookies
    if save_flash:
        save_flash_files(tar_location)
    
    # save the browser settings
    if browser_settings is not None:
        save_browser_settings(tar_location, browser_settings)
def dump_profile(browser_profile_folder, manager_params, browser_params, tar_location,
                 close_webdriver, webdriver=None, browser_settings=None, save_flash=False,
                 compress=False):
    """
    dumps a browser profile currently stored in <browser_profile_folder> to
    <tar_location> in which both folders are absolute paths.
    if <browser_settings> exists they are also saved
    <save_flash> specifies whether to dump flash files
    """
    # Connect to logger
    logger = loggingclient(*manager_params['logger_address'])

    # ensures that folder paths end with slashes
    browser_profile_folder = browser_profile_folder if browser_profile_folder.endswith("/")\
        else browser_profile_folder + "/"
    tar_location = tar_location if tar_location.endswith("/") else tar_location + "/"

    if not os.path.exists(tar_location):
        os.makedirs(tar_location)

    if compress:
        tar_name = 'profile.tar.gz'
    else:
        tar_name = 'profile.tar'

    # see if this file exists first
    # if it does, delete it before we try to save the current session
    if os.path.isfile(tar_location + tar_name):
        os.remove(tar_location + tar_name)

    # if this is a dump on close, close the webdriver and wait for checkpoint
    if close_webdriver:
        webdriver.close()
        sleep_until_sqlite_checkpoint(browser_profile_folder)

    # backup and tar profile
    if compress:
        tar = tarfile.open(tar_location + tar_name, 'w:gz', errorlevel=1)
    else:
        tar = tarfile.open(tar_location + tar_name, 'w', errorlevel=1)
    logger.debug("BROWSER %i: Backing up full profile from %s to %s" % (browser_params['crawl_id'], browser_profile_folder, tar_location + tar_name))
    storage_vector_files = [
        'cookies.sqlite', 'cookies.sqlite-shm', 'cookies.sqlite-wal',  # cookies
        'places.sqlite', 'places.sqlite-shm', 'places.sqlite-wal',  # history
        'webappsstore.sqlite', 'webappsstore.sqlite-shm', 'webappsstore.sqlite-wal',  # localStorage
    ]
    storage_vector_dirs = [
        'webapps',  # related to localStorage?
        'storage'  # directory for IndexedDB
    ]
    for item in storage_vector_files:
        full_path = os.path.join(browser_profile_folder, item)
        if not os.path.isfile(full_path) and full_path[-3:] != 'shm' and full_path[-3:] != 'wal':
            logger.critical("BROWSER %i: %s NOT FOUND IN profile folder, skipping." % (browser_params['crawl_id'], full_path))
        elif not os.path.isfile(full_path) and (full_path[-3:] == 'shm' or full_path[-3:] == 'wal'):
            continue # These are just checkpoint files
        tar.add(full_path, arcname=item)
    for item in storage_vector_dirs:
        full_path = os.path.join(browser_profile_folder, item)
        if not os.path.isdir(full_path):
            logger.warning("BROWSER %i: %s NOT FOUND IN profile folder, skipping." % (browser_params['crawl_id'], full_path))
            continue
        tar.add(full_path, arcname=item)
    tar.close()

    # save flash cookies
    if save_flash:
        save_flash_files(logger, browser_params, tar_location)

    # save the browser settings
    if browser_settings is not None:
        save_browser_settings(tar_location, browser_settings)