def complete_batch(cls, labbook: LabBook, txid: str, cancel: bool = False, rollback: bool = False) -> None: """ Indicate a batch upload is finished and sweep all new files. Args: labbook: Subject labbook txid: Transaction id (correlator) cancel: Indicate transaction finished but due to cancellation rollback: Undo all local changes if cancelled (default False) Returns: None """ if cancel and rollback: logger.warning(f"Cancelled tx {txid}, doing git reset") call_subprocess(['git', 'reset', '--hard'], cwd=labbook.root_dir) else: logger.info(f"Done batch upload {txid}, cancelled={cancel}") if cancel: logger.warning("Sweeping aborted batch upload.") m = "Cancelled upload `{txid}`. " if cancel else '' labbook.sweep_uncommitted_changes(upload=True, extra_msg=m, show=True)
def delete_files(cls, labbook: LabBook, section: str, relative_paths: List[str]) -> None: """Delete file (or directory) from inside lb section. The list of paths is deleted in series. Only provide "parent" nodes in the file tree. This is because deletes on directories will remove all child objects, so subsequent deletes of individual files will then fail. Args: labbook(LabBook): Subject LabBook section(str): Section name (code, input, output) relative_paths(list(str)): a list of relative paths from labbook root to target Returns: None """ labbook.validate_section(section) if not isinstance(relative_paths, list): raise ValueError("Must provide list of paths to remove") for file_path in relative_paths: relative_path = LabBook.make_path_relative(file_path) target_path = os.path.join(labbook.root_dir, section, relative_path) if not os.path.exists(target_path): raise ValueError( f"Attempted to delete non-existent path at `{target_path}`" ) else: labbook.git.remove(target_path, force=True, keep_file=False) if os.path.exists(target_path): raise IOError(f"Failed to delete path: {target_path}") labbook.sweep_uncommitted_changes(show=True)
def stop_container(cls, labbook: LabBook, username: Optional[str] = None) -> Tuple[LabBook, bool]: """ Stop the given labbook. Returns True in the second field if stopped, otherwise False (False can simply imply no container was running). Args: labbook: Subject labbook username: Optional username of active user Returns: A tuple of (Labbook, boolean indicating whether a container was successfully stopped). """ owner = InventoryManager().query_owner(labbook) n = infer_docker_image_name(labbook_name=labbook.name, owner=owner, username=username) logger.info(f"Stopping {str(labbook)} ({n})") try: stopped = stop_labbook_container(n) finally: # Save state of LB when container turned off. labbook.sweep_uncommitted_changes() return labbook, stopped
def process(self, result_obj: ActivityRecord, data: List[ExecutionData], status: Dict[str, Any], metadata: Dict[str, Any]) -> ActivityRecord: """Method to update a result object based on code and result data Args: result_obj(ActivityNote): An object containing the note data(list): A list of ExecutionData instances containing the data for this record status(dict): A dict containing the result of git status from gitlib metadata(str): A dictionary containing Dev Env specific or other developer defined data Returns: ActivityRecord """ for cnt, filename in enumerate(status['untracked']): # skip any file in .git or .gigantum dirs if ".git" in filename or ".gigantum" in filename: continue activity_type, activity_detail_type, section = LabBook.infer_section_from_relative_path(filename) adr = ActivityDetailRecord(activity_detail_type, show=False, importance=max(255-cnt, 0), action=ActivityAction.CREATE) # We use a "private" attribute here, but it's better than the silent breakage that happened before # cf. https://github.com/gigantum/gigantum-client/issues/436 if section == LabBook._default_activity_section: msg = f'Created new file `{filename}` in the Project Root. Note, it is best practice to use the Code, ' \ 'Input, and Output sections exclusively. ' else: msg = f"Created new {section} file `{filename}`" adr.add_value('text/markdown', msg) result_obj.add_detail_object(adr) cnt = 0 for filename, change in status['unstaged']: # skip any file in .git or .gigantum dirs if ".git" in filename or ".gigantum" in filename: continue activity_type, activity_detail_type, section = LabBook.infer_section_from_relative_path(filename) if change == "deleted": action = ActivityAction.DELETE elif change == "added": action = ActivityAction.CREATE elif change == "modified": action = ActivityAction.EDIT elif change == "renamed": action = ActivityAction.EDIT else: action = ActivityAction.NOACTION adr = ActivityDetailRecord(activity_detail_type, show=False, importance=max(255-cnt, 0), action=action) adr.add_value('text/markdown', f"{change[0].upper() + change[1:]} {section} file `{filename}`") result_obj.add_detail_object(adr) cnt += 1 return result_obj
def walkdir(cls, labbook: LabBook, section: str, show_hidden: bool = False) -> List[Dict[str, Any]]: """Return a list of all files and directories in a section of the labbook. Never includes the .git or .gigantum directory. Args: labbook: Subject LabBook section(str): The labbook section (code, input, output) to walk show_hidden(bool): If True, include hidden directories (EXCLUDING .git and .gigantum) Returns: List[Dict[str, str]]: List of dictionaries containing file and directory metadata """ labbook.validate_section(section) keys: List[str] = list() # base_dir is the root directory to search, to account for relative paths inside labbook. base_dir = os.path.join(labbook.root_dir, section) if not os.path.isdir(base_dir): raise ValueError( f"Labbook walkdir base_dir {base_dir} not an existing directory" ) for root, dirs, files in os.walk(base_dir): # Remove directories we ignore so os.walk does not traverse into them during future iterations if '.git' in dirs: del dirs[dirs.index('.git')] if '.gigantum' in dirs: del dirs[dirs.index('.gigantum')] # For more deterministic responses, sort resulting paths alphabetically. # Store directories then files, so pagination loads things in an intuitive order dirs.sort() keys.extend( sorted([ os.path.join(root.replace(base_dir, ''), d) for d in dirs ])) keys.extend( sorted([ os.path.join(root.replace(base_dir, ''), f) for f in files ])) # Create stats stats: List[Dict[str, Any]] = list() for f_p in keys: if not show_hidden and any( [len(p) and p[0] == '.' for p in f_p.split(os.path.sep)]): continue stats.append(cls.get_file_info(labbook, section, f_p)) return stats
def put_file(cls, labbook: LabBook, section: str, src_file: str, dst_path: str, txid: Optional[str] = None) -> Dict[str, Any]: """Move the file at `src_file` to `dst_dir`. Filename removes upload ID if present. This operation does NOT commit or create an activity record. Args: labbook: Subject LabBook section: Section name (code, input, output) src_file: Full path of file to insert into dst_path: Path within section to insert `src_file` txid: Optional transaction id Returns: Full path to inserted file. """ if not os.path.abspath(src_file): raise ValueError(f"Source file `{src_file}` not an absolute path") if not os.path.isfile(src_file): raise ValueError(f"Source file does not exist at `{src_file}`") labbook.validate_section(section) r = call_subprocess( ['git', 'check-ignore', os.path.basename(dst_path)], cwd=labbook.root_dir, check=False) if dst_path and r and os.path.basename(dst_path) in r: logger.warning(f"File {dst_path} matches gitignore; " f"not put into {str(labbook)}") raise FileOperationsException(f"`{dst_path}` matches " f"ignored pattern") mdst_dir = _make_path_relative(dst_path) full_dst = os.path.join(labbook.root_dir, section, mdst_dir) full_dst = full_dst.replace('..', '') full_dst = full_dst.replace('~', '') # Force overwrite if file already exists if os.path.isfile(os.path.join(full_dst, os.path.basename(src_file))): os.remove(os.path.join(full_dst, os.path.basename(src_file))) if not os.path.isdir(os.path.dirname(full_dst)): os.makedirs(os.path.dirname(full_dst), exist_ok=True) fdst = shutil.move(src_file, full_dst) relpath = fdst.replace(os.path.join(labbook.root_dir, section), '') return cls.get_file_info(labbook, section, relpath)
def migrate_labbook_untracked_space(labbook: LabBook) -> None: gitignore_path = os.path.join(labbook.root_dir, '.gitignore') gitignored_lines = open(gitignore_path).readlines() has_untracked_dir = any( ['output/untracked' in l.strip() for l in gitignored_lines]) if not has_untracked_dir: with open(gitignore_path, 'a') as gi_file: gi_file.write('\n\n# Migrated - allow untracked area\n' 'output/untracked\n') labbook.sweep_uncommitted_changes(extra_msg="Added untracked area") # Make the untracked directory -- makedirs is no-op if already exists untracked_path = os.path.join(labbook.root_dir, 'output/untracked') if not os.path.exists(untracked_path): os.makedirs(untracked_path, exist_ok=True)
def insert_file(cls, labbook: LabBook, section: str, src_file: str, dst_path: str = '') -> Dict[str, Any]: """ Move the file at `src_file` into the `dst_dir`, overwriting if a file already exists there. This calls `copy_into_container()` under- the-hood, but will create an activity record. Args: labbook: Subject labbook section: Section name (code, input, output) src_file: Full path of file to insert into dst_path: Relative path within labbook where `src_file` should be copied to Returns: dict: The inserted file's info """ finfo = FileOperations.put_file(labbook=labbook, section=section, src_file=src_file, dst_path=dst_path) rel_path = os.path.join(section, finfo['key']) # If we are setting this section to be untracked activity_type, activity_detail_type, section_str = \ labbook.get_activity_type_from_section(section) commit_msg = f"Added new {section_str} file {rel_path}" try: labbook.git.add(rel_path) commit = labbook.git.commit(commit_msg) except Exception as x: logger.error(x) os.remove(dst_path) raise FileOperationsException(x) # Create Activity record and detail _, ext = os.path.splitext(rel_path) or 'file' adr = ActivityDetailRecord(activity_detail_type, show=False, importance=0, action=ActivityAction.CREATE) adr.add_value('text/plain', commit_msg) ar = ActivityRecord(activity_type, message=commit_msg, show=True, importance=255, linked_commit=commit.hexsha, tags=[ext]) ar.add_detail_object(adr) ars = ActivityStore(labbook) ars.create_activity_record(ar) return finfo
def write_function(filename: str, delay: int, value: str, labbook: LabBook) -> None: """ A test function that appends to a file after a delay """ time.sleep(delay) with labbook.lock(): with open(filename, 'at') as f: f.write(value) time.sleep(2)
def listdir(cls, labbook: LabBook, section: str, base_path: Optional[str] = None, show_hidden: bool = False) -> List[Dict[str, Any]]: """Return a list of all files and directories in a directory. Never includes the .git or .gigantum directory. Args: labbook: Subject labbook section(str): the labbook section to start from base_path(str): Relative base path, if not listing from labbook's root. show_hidden(bool): If True, include hidden directories (EXCLUDING .git and .gigantum) Returns: List[Dict[str, str]]: List of dictionaries containing file and directory metadata """ labbook.validate_section(section) # base_dir is the root directory to search, to account for relative paths inside labbook. base_dir = os.path.join(labbook.root_dir, section, base_path or '') if not os.path.isdir(base_dir): raise ValueError( f"Labbook listdir base_dir {base_dir} not an existing directory" ) stats: List[Dict[str, Any]] = list() for item in os.listdir(base_dir): if item in ['.git', '.gigantum']: # Never include .git or .gigantum continue if not show_hidden and any( [len(p) and p[0] == '.' for p in item.split('/')]): continue # Create tuple (isDir, key) stats.append( cls.get_file_info(labbook, section, os.path.join(base_path or "", item))) # For more deterministic responses, sort resulting paths alphabetically. return sorted(stats, key=lambda a: a['key'])
def test_make_path_relative(self): vectors = [ # In format of input: expected output (None, None), ('', ''), ('/', ''), ('//', ''), ('/////cats', 'cats'), ('//cats///', 'cats///'), ('cats', 'cats'), ('/cats/', 'cats/'), ('complex/.path/.like/this', 'complex/.path/.like/this'), ('//complex/.path/.like/this', 'complex/.path/.like/this') ] for sample_input, expected_output in vectors: assert LabBook.make_path_relative(sample_input) == expected_output
def _update_branch_description(cls, lb: LabBook, description: str): # Update the description on branch creation lb.description = description lb.git.add(lb.config_path) commit = lb.git.commit('Updating description') adr = ActivityDetailRecord(ActivityDetailType.LABBOOK, show=False) adr.add_value('text/plain', description) ar = ActivityRecord(ActivityType.LABBOOK, message="Updated description of Project", linked_commit=commit.hexsha, tags=["labbook"], show=False) ar.add_detail_object(adr) ars = ActivityStore(lb) ars.create_activity_record(ar)
def test_makedir_simple(self, mock_labbook): # Note that "score" refers to the count of .gitkeep files. lb = mock_labbook[2] long_dir = "code/non/existant/dir/should/now/be/made" dirs = ["code/cat_dir", "code/dog_dir", "code/mouse_dir/", "code/mouse_dir/new_dir", long_dir] for d in dirs: FO.makedir(lb, d) assert os.path.isdir(os.path.join(lb.root_dir, d)) assert os.path.isfile(os.path.join(lb.root_dir, d, '.gitkeep')) score = 0 for root, dirs, files in os.walk(os.path.join(lb.root_dir, 'code', 'non')): for f in files: if f == '.gitkeep': score += 1 # Ensure that count of .gitkeep files equals the number of subdirs, excluding the code dir. assert score == len(LabBook.make_path_relative(long_dir).split(os.sep)) - 1
def mutate_and_get_payload(cls, root, info, owner, labbook_name, remote_url, client_mutation_id=None): username = get_logged_in_username() logger.info(f"Importing remote labbook from {remote_url}") lb = LabBook(author=get_logged_in_author()) default_remote = lb.client_config.config['git']['default_remote'] admin_service = None for remote in lb.client_config.config['git']['remotes']: if default_remote == remote: admin_service = lb.client_config.config['git']['remotes'][ remote]['admin_service'] break # Extract valid Bearer token if hasattr(info.context, 'headers' ) and "HTTP_AUTHORIZATION" in info.context.headers.environ: token = parse_token( info.context.headers.environ["HTTP_AUTHORIZATION"]) else: raise ValueError( "Authorization header not provided. Must have a valid session to query for collaborators" ) gl_mgr = GitLabManager(default_remote, admin_service=admin_service, access_token=token) gl_mgr.configure_git_credentials(default_remote, username) job_metadata = {'method': 'import_labbook_from_remote'} job_kwargs = {'remote_url': remote_url, 'username': username} dispatcher = Dispatcher() job_key = dispatcher.dispatch_task(jobs.import_labbook_from_remote, metadata=job_metadata, kwargs=job_kwargs) logger.info( f"Dispatched import_labbook_from_remote({remote_url}) to Job {job_key}" ) return ImportRemoteLabbook(job_key=job_key.key_str)
def mutate_and_get_payload(cls, root, info, owner, labbook_name, remote_url, client_mutation_id=None): username = get_logged_in_username() logger.info(f"Importing remote labbook from {remote_url}") lb = LabBook(author=get_logged_in_author()) default_remote = lb.client_config.config['git']['default_remote'] admin_service = None for remote in lb.client_config.config['git']['remotes']: if default_remote == remote: admin_service = lb.client_config.config['git']['remotes'][ remote]['admin_service'] break # Extract valid Bearer token if hasattr(info.context, 'headers' ) and "HTTP_AUTHORIZATION" in info.context.headers.environ: token = parse_token( info.context.headers.environ["HTTP_AUTHORIZATION"]) else: raise ValueError( "Authorization header not provided. Must have a valid session to query for collaborators" ) gl_mgr = GitLabManager(default_remote, admin_service=admin_service, access_token=token) gl_mgr.configure_git_credentials(default_remote, username) wf = LabbookWorkflow.import_from_remote(remote_url, username=username) import_owner = InventoryManager().query_owner(wf.labbook) # TODO: Fix cursor implementation, this currently doesn't make sense cursor = base64.b64encode(f"{0}".encode('utf-8')) lbedge = LabbookConnection.Edge(node=Labbook(owner=import_owner, name=wf.labbook.name), cursor=cursor) return ImportRemoteLabbook(new_labbook_edge=lbedge)
def _make_move_activity_record(cls, labbook: LabBook, section: str, dst_abs_path: str, commit_msg: str) -> None: if os.path.isdir(dst_abs_path): labbook.git.add_all(dst_abs_path) else: labbook.git.add(dst_abs_path) commit = labbook.git.commit(commit_msg) activity_type, activity_detail_type, section_str = labbook.get_activity_type_from_section( section) adr = ActivityDetailRecord(activity_detail_type, show=False, importance=0, action=ActivityAction.EDIT) adr.add_value('text/markdown', commit_msg) ar = ActivityRecord(activity_type, message=commit_msg, linked_commit=commit.hexsha, show=True, importance=255, tags=['file-move']) ar.add_detail_object(adr) ars = ActivityStore(labbook) ars.create_activity_record(ar)
def move_file(cls, labbook: LabBook, section: str, src_rel_path: str, dst_rel_path: str) \ -> List[Dict[str, Any]]: """Move a file or directory within a labbook, but not outside of it. Wraps underlying "mv" call. Args: labbook: Subject LabBook section(str): Section name (code, input, output) src_rel_path(str): Source file or directory dst_rel_path(str): Target file name and/or directory """ # Start with Validations labbook.validate_section(section) if not src_rel_path: raise ValueError("src_rel_path cannot be None or empty") if dst_rel_path is None: raise ValueError("dst_rel_path cannot be None or empty") src_rel_path = LabBook.make_path_relative(src_rel_path) dst_rel_path = LabBook.make_path_relative(dst_rel_path) src_abs_path = os.path.join(labbook.root_dir, section, src_rel_path.replace('..', '')) dst_abs_path = os.path.join(labbook.root_dir, section, dst_rel_path.replace('..', '')) if not os.path.exists(src_abs_path): raise ValueError(f"No src file exists at `{src_abs_path}`") try: src_type = 'directory' if os.path.isdir(src_abs_path) else 'file' logger.info( f"Moving {src_type} `{src_abs_path}` to `{dst_abs_path}`") labbook.git.remove(src_abs_path, keep_file=True) final_dest = shutil.move(src_abs_path, dst_abs_path) commit_msg = f"Moved {src_type} `{src_rel_path}` to `{dst_rel_path}`" cls._make_move_activity_record(labbook, section, dst_abs_path, commit_msg) if os.path.isfile(final_dest): t = final_dest.replace(os.path.join(labbook.root_dir, section), '') return [cls.get_file_info(labbook, section, t or "/")] else: moved_files = list() t = final_dest.replace(os.path.join(labbook.root_dir, section), '') moved_files.append( cls.get_file_info(labbook, section, t or "/")) for root, dirs, files in os.walk(final_dest): rt = root.replace(os.path.join(labbook.root_dir, section), '') rt = _make_path_relative(rt) for d in sorted(dirs): dinfo = cls.get_file_info(labbook, section, os.path.join(rt, d)) moved_files.append(dinfo) for f in filter(lambda n: n != '.gitkeep', sorted(files)): finfo = cls.get_file_info(labbook, section, os.path.join(rt, f)) moved_files.append(finfo) return moved_files except Exception as e: logger.critical( "Failed moving file in labbook. Repository may be in corrupted state." ) logger.exception(e) raise
def makedir(cls, labbook: LabBook, relative_path: str, make_parents: bool = True, create_activity_record: bool = False) -> None: """Make a new directory inside the labbook directory. Args: labbook: Subject LabBook relative_path(str): Path within the labbook to make directory make_parents(bool): If true, create intermediary directories create_activity_record(bool): If true, create commit and activity record Returns: str: Absolute path of new directory """ if not relative_path: raise ValueError("relative_path argument cannot be None or empty") relative_path = LabBook.make_path_relative(relative_path) new_directory_path = os.path.join(labbook.root_dir, relative_path) if os.path.exists(new_directory_path): return else: logger.info(f"Making new directory in `{new_directory_path}`") os.makedirs(new_directory_path, exist_ok=make_parents) new_dir = '' for d in relative_path.split(os.sep): new_dir = os.path.join(new_dir, d) full_new_dir = os.path.join(labbook.root_dir, new_dir) gitkeep_path = os.path.join(full_new_dir, '.gitkeep') if not os.path.exists(gitkeep_path): with open(gitkeep_path, 'w') as gitkeep: gitkeep.write( "This file is necessary to keep this directory tracked by Git" " and archivable by compression tools. Do not delete or modify!" ) labbook.git.add(gitkeep_path) if create_activity_record: # Create detail record activity_type, activity_detail_type, section_str = labbook.infer_section_from_relative_path( relative_path) adr = ActivityDetailRecord(activity_detail_type, show=False, importance=0, action=ActivityAction.CREATE) msg = f"Created new {section_str} directory `{relative_path}`" commit = labbook.git.commit(msg) adr.add_value('text/markdown', msg) # Create activity record ar = ActivityRecord(activity_type, message=msg, linked_commit=commit.hexsha, show=True, importance=255, tags=['directory-create']) ar.add_detail_object(adr) # Store ars = ActivityStore(labbook) ars.create_activity_record(ar)