def backup_folder(folder_to_backup: str, cache_folder: str): """Backup a folder to the user project.""" if not os.path.isdir(folder_to_backup): log.warning("Folder does not exist: " + folder_to_backup) # heck if dir size has changed metadata_dict = { "folder_size": 0, "last_backup": 0, "folder": folder_to_backup } metadata_file = os.path.join(cache_folder, os.path.basename(folder_to_backup) + ".json") if os.path.isfile(metadata_file): metadata_dict = file_utils.load_dict_json(metadata_file) current_folder_size = file_utils.folder_size(folder_to_backup) if metadata_dict["folder_size"] == current_folder_size: log.info("No Backup since folder size has not changed.") return metadata_dict["folder_size"] = current_folder_size metadata_dict["last_backup"] = int(round(time.time() * 1000)) # Initialize environment with user project and in temp directory env = Environment(project=None, root_folder=Environment._TEMP_ROOT_FOLDER) # Only backup if environment is connected if not env.is_connected(): log.warning("Failed to connect to Lab Instance. Cannot backup folder.") env.print_info() return archive_file_path = None backup_key = None try: # If so, package folder to temp dir but ignore files with more than 50 MB and environment directory archive_file_path = file_handler_utils.tar_folder(folder_to_backup, max_file_size=MAX_FILE_SIZE_IN_MB, exclude=["**/environment/*"]) backup_key = env.upload_file(archive_file_path, data_type=env.DataType.BACKUP, track_event=False) except Exception as e: # Failsafe backup print("Failed to backup workspace") print(e) pass if archive_file_path: os.remove(archive_file_path) # remove zip file after upload if backup_key: print(backup_key) # Backup successful # Save folder metadata file_utils.save_dict_json(metadata_file, metadata_dict) # 4. delete backups, keep 3 latest versions env.file_handler.delete_remote_file(env.file_handler.remove_version_from_key(backup_key), keep_latest_versions=KEEP_N_LATEST_BACKUPS)
try: # Wait for random time (up to 1 hour) so that not all workspaces check at the same time time.sleep(random.randint(0, 60) * 60) log.info("Run storage cleanup check.") max_disk_storage_gb = int(WORKSPACE_STORAGE_LIMIT) inactive_days = jupyterdiskcheck_plugin.get_inactive_days() size_in_gb = jupyterdiskcheck_plugin.get_workspace_size() if inactive_days <= 1: # Backup workspace metadata if user is active -> used in Lab for tracking of activity try: from lab_client import Environment env = Environment(project=None, root_folder=Environment._TEMP_ROOT_FOLDER) # Only backup if environment is connected if not env.is_connected(): log.warning("Failed to connect to Lab Instance. Cannot upload metadata backup file.") env.print_info() else: env.upload_file(os.path.join(WORKSPACE_CONFIG_FOLDER , "metadata.json"), data_type=env.DataType.BACKUP, track_event=False) except Exception as e: # Failsafe backup print("Failed to backup workspace metadata.") print(e) pass # only use inactive cleanup if more than 50% of actual limit if size_in_gb and size_in_gb > (max_disk_storage_gb * STORAGE_CLEANUP_THRESHOLD) and inactive_days and inactive_days > LAST_USER_ACTIVITY: # Automatic cleanup log.info("Automatic storage cleanup. Workspace size: " + str(round(size_in_gb)) + " GB. " "Max size: " + str(max_disk_storage_gb) + " GB. Last activity: " + str(inactive_days) + " days ago.")