def backend_config(self, data: dict) -> None: """Save storage config data""" if self._backend: self._backend.configuration = {**self._backend.configuration, **data} # Remove defaults set at runtime that shouldn't be persisted if "username" in data: del data["username"] if "gigantum_bearer_token" in data: del data["gigantum_bearer_token"] if "gigantum_id_token" in data: del data["gigantum_id_token"] config_file = os.path.join(self.root_dir, ".gigantum", "backend.json") with open(config_file, 'wt') as sf: json.dump(data, sf, indent=2) self.git.add(config_file) cm = self.git.commit("Updating backend config") ar = ActivityRecord(ActivityType.DATASET, message="Updated Dataset storage backend configuration", show=True, importance=255, linked_commit=cm.hexsha, tags=['config']) adr = ActivityDetailRecord(ActivityDetailType.DATASET, show=False, importance=255, action=ActivityAction.EDIT) d = json.dumps(data, indent=2) adr.add_value('text/markdown', f"Updated dataset storage backend configuration:\n\n ```{d}```") ar.add_detail_object(adr) ars = ActivityStore(self) ars.create_activity_record(ar)
def remove_docker_snippet(self, name: str) -> None: """Remove a custom docker snippet Args: name: Name or identifer of snippet to remove Returns: None """ docker_dir = os.path.join(self.labbook.root_dir, '.gigantum', 'env', 'docker') docker_file = os.path.join(docker_dir, f'{name}.yaml') if not os.path.exists(docker_file): raise ValueError(f'Docker snippet name `{name}` does not exist') self.labbook.git.remove(docker_file, keep_file=False) short_message = f"Removed custom Docker snippet `{name}`" logger.info(short_message) commit = self.labbook.git.commit(short_message) adr = ActivityDetailRecord(ActivityDetailType.ENVIRONMENT, show=False, action=ActivityAction.DELETE) adr.add_value('text/plain', short_message) ar = ActivityRecord(ActivityType.ENVIRONMENT, message=short_message, show=False, linked_commit=commit.hexsha, tags=["environment", "docker", "snippet"]) ar.add_detail_object(adr) ars = ActivityStore(self.labbook) ars.create_activity_record(ar)
def mutate_and_get_payload(cls, root, info, name, description, repository, base_id, revision, is_untracked=False, client_mutation_id=None): username = get_logged_in_username() inv_manager = InventoryManager() if is_untracked: lb = inv_manager.create_labbook_disabled_lfs( username=username, owner=username, labbook_name=name, description=description, author=get_logged_in_author()) else: lb = inv_manager.create_labbook(username=username, owner=username, labbook_name=name, description=description, author=get_logged_in_author()) if is_untracked: FileOperations.set_untracked(lb, 'input') FileOperations.set_untracked(lb, 'output') input_set = FileOperations.is_set_untracked(lb, 'input') output_set = FileOperations.is_set_untracked(lb, 'output') if not (input_set and output_set): raise ValueError( f'{str(lb)} untracking for input/output in malformed state' ) if not lb.is_repo_clean: raise ValueError( f'{str(lb)} should have clean Git state after setting for untracked' ) adr = ActivityDetailRecord(ActivityDetailType.LABBOOK, show=False, importance=0) adr.add_value('text/plain', f"Created new LabBook: {username}/{name}") # Create activity record ar = ActivityRecord(ActivityType.LABBOOK, message=f"Created new LabBook: {username}/{name}", show=True, importance=255, linked_commit=lb.git.commit_hash) ar.add_detail_object(adr) store = ActivityStore(lb) store.create_activity_record(ar) cm = ComponentManager(lb) cm.add_base(repository, base_id, revision) return CreateLabbook(labbook=Labbook(owner=username, name=lb.name))
def remove_all_bases(self, base_paths: List[Path], detail_records: List[ActivityDetailRecord]) -> str: """Remove all files listed in `matching_fnames` and append records to detail_records for later use Removing files isn't hard. The main point of this method is to provide detail records that make sense in the context of a misconfigured project. Args: base_paths: List of matched YAML files for base images detail_records: we'll append details here that will be added to an ActivityRecord by the caller Returns: the short_message for the git commit, etc. """ for base_fname in base_paths: self.labbook.git.remove(str(base_fname), keep_file=False) # The repository includes an underscore where the slash is for e.g., # .gigantum/env/base/gigantum_base-images_r-tidyverse.yaml curr_repo, curr_base_name = base_fname.stem.rsplit('_', 1) # Create detail record long_message = f"Removing base from {curr_repo}: {curr_base_name}" adr = ActivityDetailRecord(ActivityDetailType.ENVIRONMENT, show=False, action=ActivityAction.DELETE) adr.add_value('text/plain', long_message) detail_records.append(adr) return f"Removing all bases from project with {len(base_paths)} base configuration files."
def remove_bundled_app(self, name: str) -> None: """Remove a bundled app from this labbook Args: name(str): name of the bundled app Returns: None """ data = self.get_bundled_apps() if name not in data: raise ValueError(f"App {name} does not exist. Cannot remove.") del data[name] with open(self.bundled_app_file, 'wt') as baf: json.dump(data, baf) # Commit the changes self.labbook.git.add(self.bundled_app_file) commit = self.labbook.git.commit(f"Committing bundled app") adr = ActivityDetailRecord(ActivityDetailType.ENVIRONMENT, show=False, action=ActivityAction.CREATE) adr.add_value('text/plain', f"Removed bundled application: {name}") ar = ActivityRecord(ActivityType.ENVIRONMENT, message=f"Removed bundled application: {name}", show=True, linked_commit=commit.hexsha, tags=["environment", "docker", "bundled_app"]) ar.add_detail_object(adr) ars = ActivityStore(self.labbook) ars.create_activity_record(ar)
def mutate_and_get_payload(cls, root, info, owner, labbook_name, description_content, client_mutation_id=None): username = get_logged_in_username() lb = InventoryManager().load_labbook(username, owner, labbook_name, author=get_logged_in_author()) lb.description = description_content with lb.lock(): lb.git.add(os.path.join(lb.config_path)) commit = lb.git.commit('Updating description') adr = ActivityDetailRecord(ActivityDetailType.LABBOOK, show=False) adr.add_value('text/plain', "Updated description of Project") ar = ActivityRecord(ActivityType.LABBOOK, message="Updated description of Project", linked_commit=commit.hexsha, tags=["labbook"], show=False) ar.add_detail_object(adr) ars = ActivityStore(lb) ars.create_activity_record(ar) return SetLabbookDescription(success=True)
def migrate_labbook_schema(labbook: LabBook) -> None: # Fallback point in case of a problem initial_commit = labbook.git.commit_hash try: migrate_schema_to_current(labbook.root_dir) except Exception as e: logger.exception(e) call_subprocess(f'git reset --hard {initial_commit}'.split(), cwd=labbook.root_dir) raise msg = f"Migrate schema to {CURRENT_LABBOOK_SCHEMA}" labbook.git.add(labbook.config_path) cmt = labbook.git.commit(msg, author=labbook.author, committer=labbook.author) adr = ActivityDetailRecord(ActivityDetailType.LABBOOK, show=True, importance=100, action=ActivityAction.EDIT) adr.add_value('text/plain', msg) ar = ActivityRecord(ActivityType.LABBOOK, message=msg, show=True, importance=255, linked_commit=cmt.hexsha, tags=['schema', 'update', 'migration']) ar.add_detail_object(adr) ars = ActivityStore(labbook) ars.create_activity_record(ar)
def process(self, result_obj: ActivityRecord, data: List[ExecutionData], status: Dict[str, Any], metadata: Dict[str, Any]) -> ActivityRecord: """Method to update a result object based on code and result data Args: result_obj(ActivityNote): An object containing the note data(list): A list of ExecutionData instances containing the data for this record status(dict): A dict containing the result of git status from gitlib metadata(str): A dictionary containing Dev Env specific or other developer defined data Returns: ActivityNote """ with result_obj.inspect_detail_objects() as detail_objs: orig_num = result_obj.num_detail_objects if result_obj.num_detail_objects > 255: result_obj.trim_detail_objects(255) adr = ActivityDetailRecord(ActivityDetailType.NOTE, show=True, importance=0, action=ActivityAction.NOACTION) adr.add_value('text/markdown', f"This activity produced {orig_num} detail records, " f"but was truncated to the top 255 items. Inspect your code to make " f"sure that this was not accidental. In Jupyter for example, you can" f" use a `;` at the end of a line to suppress output from functions" f" that print excessively.") result_obj.add_detail_object(adr) return result_obj
def mutate_and_get_payload(cls, root, info, owner, dataset_name, description, client_mutation_id=None): username = get_logged_in_username() ds = InventoryManager().load_dataset(username, owner, dataset_name, author=get_logged_in_author()) ds.description = description with ds.lock(): ds.git.add(os.path.join(ds.root_dir, '.gigantum/gigantum.yaml')) commit = ds.git.commit('Updating description') adr = ActivityDetailRecord(ActivityDetailType.LABBOOK, show=False) adr.add_value('text/plain', f"Updated Dataset description: {description}") ar = ActivityRecord(ActivityType.LABBOOK, message="Updated Dataset description", linked_commit=commit.hexsha, tags=["dataset"], show=False) ar.add_detail_object(adr) ars = ActivityStore(ds) ars.create_activity_record(ar) return SetDatasetDescription( updated_dataset=Dataset(owner=owner, name=dataset_name))
def remove_base(self, base_fname: Path, detail_records: List[ActivityDetailRecord]) -> str: """Remove the base from `base_fname` and append records to detail_records for later use Removing files isn't hard. The main point of this method is to provide detail records that make sense in the context of a properly configured project with a single base. Args: base_fname: Matched YAML file for base image detail_records: we'll append details here that will be added to an ActivityRecord by the caller Returns: the short_message for the git commit, etc. """ base_data = self.base_fields revision = base_data['revision'] # The repository includes an underscore where the slash is for e.g., # .gigantum/env/base/gigantum_base-images_r-tidyverse.yaml repo, base_name = base_fname.stem.rsplit('_', 1) self.labbook.git.remove(str(base_fname), keep_file=False) # Create detail record long_message = "\n".join( (f"Removed base {base_name}\n", f"{base_data['description']}\n", f" - repository: {repo}", f" - component: {base_name}", f" - revision: {revision}\n")) adr = ActivityDetailRecord(ActivityDetailType.ENVIRONMENT, show=False, action=ActivityAction.DELETE) adr.add_value('text/plain', long_message) detail_records.append(adr) return f"Removed base from {repo}: {base_name} r{revision}"
def insert_file(cls, labbook: LabBook, section: str, src_file: str, dst_path: str = '') -> Dict[str, Any]: """ Move the file at `src_file` into the `dst_dir`, overwriting if a file already exists there. This calls `copy_into_container()` under- the-hood, but will create an activity record. Args: labbook: Subject labbook section: Section name (code, input, output) src_file: Full path of file to insert into dst_path: Relative path within labbook where `src_file` should be copied to Returns: dict: The inserted file's info """ finfo = FileOperations.put_file(labbook=labbook, section=section, src_file=src_file, dst_path=dst_path) rel_path = os.path.join(section, finfo['key']) # If we are setting this section to be untracked activity_type, activity_detail_type, section_str = \ labbook.get_activity_type_from_section(section) commit_msg = f"Added new {section_str} file {rel_path}" try: labbook.git.add(rel_path) commit = labbook.git.commit(commit_msg) except Exception as x: logger.error(x) os.remove(dst_path) raise FileOperationsException(x) # Create Activity record and detail _, ext = os.path.splitext(rel_path) or 'file' adr = ActivityDetailRecord(activity_detail_type, show=False, importance=0, action=ActivityAction.CREATE) adr.add_value('text/plain', commit_msg) ar = ActivityRecord(activity_type, message=commit_msg, show=True, importance=255, linked_commit=commit.hexsha, tags=[ext]) ar.add_detail_object(adr) ars = ActivityStore(labbook) ars.create_activity_record(ar) return finfo
def add_docker_snippet(self, name: str, docker_content: List[str], description: Optional[str] = None) -> None: """ Add a custom docker snippet to the environment (replacing custom dependency). Args: name: Name or identifier of the custom docker snippet docker_content: Content of the docker material (May make this a list of strings instead) description: Human-readable verbose description of what the snippet is intended to accomplish. Returns: None """ if not name: raise ValueError('Argument `name` cannot be None or empty') if not name.replace('-', '').replace('_', '').isalnum(): raise ValueError( 'Argument `name` must be alphanumeric string (- and _ accepted)' ) if not docker_content: docker_content = [] file_data = { 'name': name, 'timestamp_utc': datetime.datetime.utcnow().isoformat(), 'description': description or "", 'content': docker_content } docker_dir = os.path.join(self.labbook.root_dir, '.gigantum', 'env', 'docker') docker_file = os.path.join(docker_dir, f'{name}.yaml') os.makedirs(docker_dir, exist_ok=True) yaml_dump = yaml.safe_dump(file_data, default_flow_style=False) with open(docker_file, 'w') as df: df.write(yaml_dump) logger.info( f"Wrote custom Docker snippet `{name}` to {str(self.labbook)}") short_message = f"Wrote custom Docker snippet `{name}`" self.labbook.git.add(docker_file) commit = self.labbook.git.commit(short_message) adr = ActivityDetailRecord(ActivityDetailType.ENVIRONMENT, show=False, action=ActivityAction.CREATE) adr.add_value('text/plain', '\n'.join(docker_content)) ar = ActivityRecord(ActivityType.ENVIRONMENT, message=short_message, show=True, linked_commit=commit.hexsha, tags=["environment", "docker", "snippet"]) ar.add_detail_object(adr) ars = ActivityStore(self.labbook) ars.create_activity_record(ar)
def process(self, result_obj: ActivityRecord, data: List[ExecutionData], status: Dict[str, Any], metadata: Dict[str, Any]) -> ActivityRecord: """Method to update a result object based on code and result data Args: result_obj(ActivityNote): An object containing the note data(list): A list of ExecutionData instances containing the data for this record status(dict): A dict containing the result of git status from gitlib metadata(str): A dictionary containing Dev Env specific or other developer defined data Returns: ActivityRecord """ for cnt, filename in enumerate(status['untracked']): # skip any file in .git or .gigantum dirs if ".git" in filename or ".gigantum" in filename: continue activity_type, activity_detail_type, section = LabBook.infer_section_from_relative_path(filename) adr = ActivityDetailRecord(activity_detail_type, show=False, importance=max(255-cnt, 0), action=ActivityAction.CREATE) # We use a "private" attribute here, but it's better than the silent breakage that happened before # cf. https://github.com/gigantum/gigantum-client/issues/436 if section == LabBook._default_activity_section: msg = f'Created new file `{filename}` in the Project Root. Note, it is best practice to use the Code, ' \ 'Input, and Output sections exclusively. ' else: msg = f"Created new {section} file `{filename}`" adr.add_value('text/markdown', msg) result_obj.add_detail_object(adr) cnt = 0 for filename, change in status['unstaged']: # skip any file in .git or .gigantum dirs if ".git" in filename or ".gigantum" in filename: continue activity_type, activity_detail_type, section = LabBook.infer_section_from_relative_path(filename) if change == "deleted": action = ActivityAction.DELETE elif change == "added": action = ActivityAction.CREATE elif change == "modified": action = ActivityAction.EDIT elif change == "renamed": action = ActivityAction.EDIT else: action = ActivityAction.NOACTION adr = ActivityDetailRecord(activity_detail_type, show=False, importance=max(255-cnt, 0), action=action) adr.add_value('text/markdown', f"{change[0].upper() + change[1:]} {section} file `{filename}`") result_obj.add_detail_object(adr) cnt += 1 return result_obj
def create_directory(self, path: str) -> Dict[str, Any]: """Method to create an empty directory in a dataset Args: path: Relative path to the directory Returns: dict """ relative_path = self.dataset.make_path_relative(path) new_directory_path = os.path.join(self.cache_mgr.cache_root, self.dataset_revision, relative_path) previous_revision = self.dataset_revision if os.path.exists(new_directory_path): raise ValueError(f"Directory already exists: `{relative_path}`") else: logger.info(f"Creating new empty directory in `{new_directory_path}`") if os.path.isdir(Path(new_directory_path).parent) is False: raise ValueError(f"Parent directory does not exist. Failed to create `{new_directory_path}` ") # create dir os.makedirs(new_directory_path) self.update() if relative_path not in self.manifest: raise ValueError("Failed to add directory to manifest") # Create detail record adr = ActivityDetailRecord(ActivityDetailType.DATASET, show=False, importance=0, action=ActivityAction.CREATE) msg = f"Created new empty directory `{relative_path}`" adr.add_value('text/markdown', msg) commit = self.dataset.git.commit(msg) # Create activity record ar = ActivityRecord(ActivityType.DATASET, message=msg, linked_commit=commit.hexsha, show=True, importance=255, tags=['directory-create']) ar.add_detail_object(adr) # Store ars = ActivityStore(self.dataset) ars.create_activity_record(ar) # Relink after the commit self.link_revision() if os.path.isdir(os.path.join(self.cache_mgr.cache_root, previous_revision)): shutil.rmtree(os.path.join(self.cache_mgr.cache_root, previous_revision)) return self.gen_file_info(relative_path)
def process(self, result_obj: ActivityRecord, data: List[ExecutionData], status: Dict[str, Any], metadata: Dict[str, Any]) -> ActivityRecord: """Method to update a result object based on code and result data Args: result_obj(ActivityNote): An object containing the note data(list): A list of ExecutionData instances containing the data for this record status(dict): A dict containing the result of git status from gitlib metadata(str): A dictionary containing Dev Env specific or other developer defined data Returns: ActivityNote """ # Only store up to 64kB of plain text result data (if the user printed a TON don't save it all) truncate_at = 64 * 1000 max_show_len = 280 result_cnt = 0 for cell in data: for result_entry in reversed(cell.result): if 'metadata' in result_entry: if 'source' in result_entry['metadata']: if result_entry['metadata'][ 'source'] == "display_data": # Don't save plain-text representations of displayed data by default. continue if 'data' in result_entry: if 'text/plain' in result_entry['data']: text_data = result_entry['data']['text/plain'] if len(text_data) > 0: adr = ActivityDetailRecord( ActivityDetailType.RESULT, show=True if len(text_data) < max_show_len else False, action=ActivityAction.CREATE, importance=max(255 - result_cnt - 100, 0)) if len(text_data) <= truncate_at: adr.add_value("text/plain", text_data) else: adr.add_value( "text/plain", text_data[:truncate_at] + " ...\n\n <result truncated>") # Set cell data to tag adr.tags = cell.tags result_obj.add_detail_object(adr) result_cnt += 1 return result_obj
def write_readme(self, contents: str) -> None: """Method to write a string to the readme file within the repository. Must write ENTIRE document at once. Args: contents(str): entire readme document in markdown format Returns: None """ # Validate readme data if len(contents) > (1000000 * 5): raise ValueError("Readme file is larger than the 5MB limit") if type(contents) is not str: raise TypeError("Invalid content. Must provide string") readme_file = os.path.join(self.root_dir, 'README.md') readme_exists = os.path.exists(readme_file) # Write file to disk with open(readme_file, 'wt') as rf: rf.write(contents) # Create commit if readme_exists: commit_msg = f"Updated README file" action = ActivityAction.EDIT else: commit_msg = f"Added README file" action = ActivityAction.CREATE self.git.add(readme_file) commit = self.git.commit(commit_msg) # Create detail record adr = ActivityDetailRecord(self._default_activity_detail_type, show=False, importance=0, action=action) adr.add_value('text/plain', commit_msg) # Create activity record ar = ActivityRecord(self._default_activity_type, message=commit_msg, show=False, importance=255, linked_commit=commit.hexsha, tags=['readme']) ar.add_detail_object(adr) # Store ars = ActivityStore(self) ars.create_activity_record(ar)
def unlink_dataset_from_labbook(self, dataset_namespace: str, dataset_name: str, labbook: LabBook) -> None: """Method to removed a dataset reference from a labbook Args: dataset_namespace: dataset_name: labbook: Returns: """ submodule_dir = os.path.join('.gigantum', 'datasets', dataset_namespace, dataset_name) call_subprocess(['git', 'rm', '-f', submodule_dir], cwd=labbook.root_dir) git_module_dir = os.path.join(labbook.root_dir, '.git', 'modules', f"{dataset_namespace}&{dataset_name}") if os.path.exists(git_module_dir): shutil.rmtree(git_module_dir) absolute_submodule_dir = os.path.join(labbook.root_dir, '.gigantum', 'datasets', dataset_namespace, dataset_name) if os.path.exists(absolute_submodule_dir): shutil.rmtree(absolute_submodule_dir) labbook.git.add_all() commit = labbook.git.commit("removing submodule ref") # Add Activity Record adr = ActivityDetailRecord(ActivityDetailType.DATASET, show=False, action=ActivityAction.DELETE) adr.add_value( 'text/markdown', f"Unlinked Dataset `{dataset_namespace}/{dataset_name}` from project" ) ar = ActivityRecord( ActivityType.DATASET, message= f"Unlinked Dataset {dataset_namespace}/{dataset_name} from project.", linked_commit=commit.hexsha, tags=["dataset"], show=True) ar.add_detail_object(adr) ars = ActivityStore(labbook) ars.create_activity_record(ar)
def update_linked_dataset_reference(self, dataset_namespace: str, dataset_name: str, labbook: LabBook) -> Dataset: """Method to update a linked dataset reference to the latest revision Args: dataset_namespace: owner (namespace) of the dateset dataset_name: name of the dataset labbook: labbook instance to which the dataset is linked Returns: none1 """ # Load dataset from inside Project directory submodule_dir = os.path.join(labbook.root_dir, '.gigantum', 'datasets', dataset_namespace, dataset_name) ds = self.load_dataset_from_directory(submodule_dir, author=labbook.author) ds.namespace = dataset_namespace # Update the submodule reference with the latest changes original_revision = ds.git.repo.head.object.hexsha ds.git.pull() revision = ds.git.repo.head.object.hexsha # If the submodule has changed, commit the changes. if original_revision != revision: labbook.git.add_all() commit = labbook.git.commit("Updating submodule ref") # Add Activity Record adr = ActivityDetailRecord(ActivityDetailType.DATASET, show=False, action=ActivityAction.DELETE) adr.add_value( 'text/markdown', f"Updated Dataset `{dataset_namespace}/{dataset_name}` link to {revision}" ) msg = f"Updated Dataset `{dataset_namespace}/{dataset_name}` link to version {revision[0:8]}" ar = ActivityRecord(ActivityType.DATASET, message=msg, linked_commit=commit.hexsha, tags=["dataset"], show=True) ar.add_detail_object(adr) ars = ActivityStore(labbook) ars.create_activity_record(ar) return ds
def _update_branch_description(cls, lb: LabBook, description: str): # Update the description on branch creation lb.description = description lb.git.add(lb.config_path) commit = lb.git.commit('Updating description') adr = ActivityDetailRecord(ActivityDetailType.LABBOOK, show=False) adr.add_value('text/plain', description) ar = ActivityRecord(ActivityType.LABBOOK, message="Updated description of Project", linked_commit=commit.hexsha, tags=["labbook"], show=False) ar.add_detail_object(adr) ars = ActivityStore(lb) ars.create_activity_record(ar)
def _create_user_note(cls, lb, title, body, tags): store = ActivityStore(lb) adr = ActivityDetailRecord(ActivityDetailType.NOTE, show=True, importance=255) if body: adr.add_value('text/markdown', body) ar = ActivityRecord(ActivityType.NOTE, message=title, linked_commit="no-linked-commit", importance=255, tags=tags) ar.add_detail_object(adr) ar = store.create_activity_record(ar) return ar
def process(self, result_obj: ActivityRecord, data: List[ExecutionData], status: Dict[str, Any], metadata: Dict[str, Any]) -> ActivityRecord: """Method to update a result object based on code and result data Args: result_obj(ActivityNote): An object containing the note data(list): A list of ExecutionData instances containing the data for this record status(dict): A dict containing the result of git status from gitlib metadata(str): A dictionary containing Dev Env specific or other developer defined data Returns: ActivityNote """ supported_image_types = [ 'image/png', 'image/jpeg', 'image/jpg', 'image/gif', 'image/bmp' ] # If a supported image exists in the result, grab it and create a detail record result_cnt = 0 for cell in data: for result_entry in reversed(cell.result): if 'data' in result_entry: for mime_type in result_entry['data']: if mime_type in supported_image_types: # You got an image adr_img = ActivityDetailRecord( ActivityDetailType.RESULT, show=True, action=ActivityAction.CREATE, importance=max(255 - result_cnt, 0)) adr_img.add_value(mime_type, result_entry['data'][mime_type]) adr_img.tags = cell.tags result_obj.add_detail_object(adr_img) # Set Activity Record Message result_obj.message = "Executed cell in notebook {} and generated a result".format( metadata['path']) result_cnt += 1 return result_obj
def _record_remove_activity(cls, secret_store, filename, lb): """Make an activity record for the removal of the secret. """ lb.git.add(secret_store.secret_path) lb.git.commit("Removed entry from secrets registry.") commit = lb.git.commit_hash adr = ActivityDetailRecord(ActivityDetailType.LABBOOK, show=True, action=ActivityAction.DELETE) adr.add_value('text/markdown', f"Removed entry for secrets file {filename}") ar = ActivityRecord( ActivityType.LABBOOK, message=f"Removed entry for secrets file {filename}", linked_commit=commit, tags=["labbook", "secrets"], show=True) ar.add_detail_object(adr) ars = ActivityStore(lb) ars.create_activity_record(ar)
def _record_insert_activity(cls, secret_store, filename, lb, mount_path): """Make an activity record for the insertion of the secret. """ lb.git.add(secret_store.secret_path) lb.git.commit("Updated secrets registry.") commit = lb.git.commit_hash adr = ActivityDetailRecord(ActivityDetailType.LABBOOK, show=True, action=ActivityAction.CREATE) adr.add_value( 'text/markdown', f"Created new entry for secrets file {filename}" f"to map to {mount_path}") ar = ActivityRecord( ActivityType.LABBOOK, message=f"Created entry for secrets file {filename}", linked_commit=commit, tags=["labbook", "secrets"], show=True) ar.add_detail_object(adr) ars = ActivityStore(lb) ars.create_activity_record(ar)
def process(self, result_obj: ActivityRecord, data: List[ExecutionData], status: Dict[str, Any], metadata: Dict[str, Any]) -> ActivityRecord: """Method to update a result object based on code and result data Args: result_obj(ActivityNote): An object containing the note data(list): A list of ExecutionData instances containing the data for this record status(dict): A dict containing the result of git status from gitlib metadata(str): A dictionary containing Dev Env specific or other developer defined data Returns: ActivityRecord """ # If there was some code, assume a cell was executed result_cnt = 0 for cell_cnt, cell in enumerate(data): for result_entry in reversed(cell.code): if result_entry.get('code'): # Create detail record to capture executed code adr_code = ActivityDetailRecord( ActivityDetailType.CODE_EXECUTED, show=False, action=ActivityAction.EXECUTE, importance=max(255 - result_cnt, 0)) adr_code.add_value( 'text/markdown', f"```\n{result_entry.get('code')}\n```") adr_code.tags = cell.tags result_obj.add_detail_object(adr_code) result_cnt += 1 # Set Activity Record Message cell_str = f"{cell_cnt} cells" if cell_cnt > 1 else "cell" result_obj.message = f"Executed {cell_str} in notebook {metadata['path']}" return result_obj
def mutate_and_get_payload(cls, root, info, name, description, repository, base_id, revision, is_untracked=False, client_mutation_id=None): username = get_logged_in_username() inv_manager = InventoryManager() lb = inv_manager.create_labbook(username=username, owner=username, labbook_name=name, description=description, author=get_logged_in_author()) adr = ActivityDetailRecord(ActivityDetailType.LABBOOK, show=False, importance=0) adr.add_value('text/plain', f"Created new LabBook: {username}/{name}") # Create activity record ar = ActivityRecord(ActivityType.LABBOOK, message=f"Created new LabBook: {username}/{name}", show=True, importance=255, linked_commit=lb.git.commit_hash) ar.add_detail_object(adr) store = ActivityStore(lb) store.create_activity_record(ar) cm = ComponentManager(lb) cm.add_base(repository, base_id, revision) return CreateLabbook(labbook=Labbook(owner=username, name=lb.name))
def _make_move_activity_record(cls, labbook: LabBook, section: str, dst_abs_path: str, commit_msg: str) -> None: if os.path.isdir(dst_abs_path): labbook.git.add_all(dst_abs_path) else: labbook.git.add(dst_abs_path) commit = labbook.git.commit(commit_msg) activity_type, activity_detail_type, section_str = labbook.get_activity_type_from_section( section) adr = ActivityDetailRecord(activity_detail_type, show=False, importance=0, action=ActivityAction.EDIT) adr.add_value('text/markdown', commit_msg) ar = ActivityRecord(activity_type, message=commit_msg, linked_commit=commit.hexsha, show=True, importance=255, tags=['file-move']) ar.add_detail_object(adr) ars = ActivityStore(labbook) ars.create_activity_record(ar)
def process_sweep_status(self, result_obj: ActivityRecord, status: Dict[str, Any]) -> Tuple[ActivityRecord, int, int, int]: sections = [] ncnt = 0 for filename in status['untracked']: # skip any file in .git or .gigantum dirs if ".git" in filename or ".gigantum" in filename: continue activity_type, activity_detail_type, section = self.infer_section_from_relative_path(filename) adr = ActivityDetailRecord(activity_detail_type, show=False, importance=max(255 - ncnt, 0), action=ActivityAction.CREATE) sections.append(section) if section == self._default_activity_section: msg = f"Created new file `{filename}` in the {self._default_activity_section}." msg = f"{msg}Note, it is best practice to use the Code, Input, and Output sections exclusively." else: msg = f"Created new {section} file `{filename}`" adr.add_value('text/markdown', msg) result_obj.add_detail_object(adr) ncnt += 1 # If all modifications were of same section new_section_set = set(sections) if ncnt > 0 and len(new_section_set) == 1: if "Code" in new_section_set: result_obj.type = ActivityType.CODE elif "Input Data" in new_section_set: result_obj.type = ActivityType.INPUT_DATA elif "Output Data" in new_section_set: result_obj.type = ActivityType.OUTPUT_DATA mcnt = 0 dcnt = 0 msections = [] changes = status['unstaged'] changes.extend(status['staged']) for filename, change in changes: # skip any file in .git or .gigantum dirs if (".git" in filename and ".gitkeep" not in filename) or ".gigantum" in filename: continue activity_type, activity_detail_type, section = self.infer_section_from_relative_path(filename) msections.append(section) if change == "deleted": action = ActivityAction.DELETE dcnt += 1 elif change == "added": action = ActivityAction.CREATE mcnt += 1 elif change == "modified": action = ActivityAction.EDIT mcnt += 1 elif change == "renamed": action = ActivityAction.EDIT mcnt += 1 else: action = ActivityAction.NOACTION mcnt += 1 adr = ActivityDetailRecord(activity_detail_type, show=False, importance=max(255 - mcnt, 0), action=action) if ".gitkeep" in filename: directory_name, _ = filename.split('.gitkeep') adr.add_value('text/markdown', f"{change[0].upper() + change[1:]} {section} directory `{directory_name}`") else: adr.add_value('text/markdown', f"{change[0].upper() + change[1:]} {section} file `{filename}`") result_obj.add_detail_object(adr) modified_section_set = set(msections) if result_obj.type == self._default_activity_type: # If new files are from different sections or no new files, you'll still be LABBOOK or DATASET type if (mcnt+dcnt) > 0 and len(modified_section_set) == 1: # If there have been modified files and they are all from the same section if len(new_section_set) == 0 or new_section_set == modified_section_set: # If there have been only modified files from a single section, # or new files are from the same section if "Code" in modified_section_set: result_obj.type = ActivityType.CODE elif "Input Data" in modified_section_set: result_obj.type = ActivityType.INPUT_DATA elif "Output Data" in modified_section_set: result_obj.type = ActivityType.OUTPUT_DATA elif (mcnt+dcnt) > 0: if len(modified_section_set) > 1 or new_section_set != modified_section_set: # Mismatch between new and modify or within modify, just use catchall LABBOOK or DATASET type result_obj.type = self._default_activity_type # Return additionally new file cnt (ncnt) and modified (mcnt) return result_obj, ncnt, mcnt, dcnt
def create_dataset(self, username: str, owner: str, dataset_name: str, storage_type: str, description: Optional[str] = None, author: Optional[GitAuthor] = None) -> Dataset: """Create a new Dataset in this Gigantum working directory. Args: username: Active username owner: Namespace in which to place this Dataset dataset_name: Name of the Dataset storage_type: String identifying the type of Dataset to instantiate description: Optional brief description of Dataset author: Optional Git Author Returns: Newly created LabBook instance """ dataset = Dataset(config_file=self.config_file, author=author, namespace=owner) if storage_type not in storage.SUPPORTED_STORAGE_BACKENDS: raise ValueError( f"Unsupported Dataset storage type: {storage_type}") try: build_info = Configuration(self.config_file).config['build_info'] except KeyError: logger.warning("Could not obtain build_info from config") build_info = None # Build data file contents dataset._data = { "schema": DATASET_CURRENT_SCHEMA, "id": uuid.uuid4().hex, "name": dataset_name, "storage_type": storage_type, "description": description or '', "created_on": datetime.datetime.utcnow().isoformat(), "build_info": build_info } dataset._validate_gigantum_data() logger.info("Creating new Dataset on disk for {}/{}/{}".format( username, owner, dataset_name)) # lock while creating initial directory with dataset.lock( lock_key=f"new_dataset_lock|{username}|{owner}|{dataset_name}" ): # Verify or Create user subdirectory # Make sure you expand a user dir string starting_dir = os.path.expanduser( dataset.client_config.config["git"]["working_directory"]) user_dir = os.path.join(starting_dir, username) if not os.path.isdir(user_dir): os.makedirs(user_dir) # Create owner dir - store LabBooks in working dir > logged in user > owner owner_dir = os.path.join(user_dir, owner) if not os.path.isdir(owner_dir): os.makedirs(owner_dir) # Create `datasets` subdir in the owner dir owner_dir = os.path.join(owner_dir, "datasets") else: owner_dir = os.path.join(owner_dir, "datasets") # Verify name not already in use if os.path.isdir(os.path.join(owner_dir, dataset_name)): raise ValueError( f"Dataset `{dataset_name}` already exists locally. Choose a new Dataset name" ) # Create Dataset subdirectory new_root_dir = os.path.join(owner_dir, dataset_name) os.makedirs(new_root_dir) dataset._set_root_dir(new_root_dir) # Init repository dataset.git.initialize() # Create Directory Structure dirs = [ 'manifest', 'metadata', '.gigantum', os.path.join('.gigantum', 'activity'), os.path.join('.gigantum', 'activity', 'log') ] # Create .gitignore default file shutil.copyfile( os.path.join(resource_filename('gtmcore', 'dataset'), 'gitignore.default'), os.path.join(dataset.root_dir, ".gitignore")) for d in dirs: p = os.path.join(dataset.root_dir, d, '.gitkeep') os.makedirs(os.path.dirname(p), exist_ok=True) with open(p, 'w') as gk: gk.write( "This file is necessary to keep this directory tracked by Git" " and archivable by compression tools. Do not delete or modify!" ) dataset._save_gigantum_data() # Create an empty storage.json file dataset.backend_config = {} # Commit dataset.git.add_all() # NOTE: this string is used to indicate there are no more activity records to get. Changing the string will # break activity paging. # TODO: Improve method for detecting the first activity record dataset.git.commit(f"Creating new empty Dataset: {dataset_name}") # Create Activity Record adr = ActivityDetailRecord(ActivityDetailType.DATASET, show=False, importance=0) adr.add_value('text/plain', f"Created new Dataset: {username}/{dataset_name}") ar = ActivityRecord( ActivityType.DATASET, message=f"Created new Dataset: {username}/{dataset_name}", show=True, importance=255, linked_commit=dataset.git.commit_hash) ar.add_detail_object(adr) store = ActivityStore(dataset) store.create_activity_record(ar) # Initialize file cache and link revision m = Manifest(dataset, username) m.link_revision() return dataset
def test_get_recent_activity(self, fixture_working_dir, snapshot, fixture_test_file): """Test paging through activity records""" im = InventoryManager(fixture_working_dir[0]) lb = im.create_labbook("default", "default", "labbook11", description="my test description", author=GitAuthor(name="tester", email="*****@*****.**")) FileOperations.insert_file(lb, "code", fixture_test_file) # fake activity store = ActivityStore(lb) adr1 = ActivityDetailRecord(ActivityDetailType.CODE) adr1.show = False adr1.importance = 100 adr1.add_value("text/plain", "first") ar = ActivityRecord(ActivityType.CODE, show=False, message="ran some code", importance=50, linked_commit="asdf") ar.add_detail_object(adr1) # Create Activity Record store.create_activity_record(ar) store.create_activity_record(ar) store.create_activity_record(ar) store.create_activity_record(ar) open('/tmp/test_file.txt', 'w').write("xxx" * 50) FileOperations.insert_file(lb, "input", '/tmp/test_file.txt') FileOperations.makedir(lb, "input/test") open('/tmp/test_file.txt', 'w').write("xxx" * 50) FileOperations.insert_file(lb, "input", '/tmp/test_file.txt', "test") FileOperations.makedir(lb, "input/test2") open('/tmp/test_file.txt', 'w').write("xxx" * 50) FileOperations.insert_file(lb, "input", '/tmp/test_file.txt', "test2") store.create_activity_record(ar) store.create_activity_record(ar) store.create_activity_record(ar) store.create_activity_record(ar) store.create_activity_record(ar) open('/tmp/test_file.txt', 'w').write("xxx" * 50) FileOperations.insert_file(lb, "output", '/tmp/test_file.txt') # Get all records at once with no pagination args and verify cursors look OK directly query = """ { labbook(owner: "default", name: "labbook11") { overview { recentActivity { message type show importance tags } } } } """ snapshot.assert_match(fixture_working_dir[2].execute(query))
def link_dataset_to_labbook(self, dataset_url: str, dataset_namespace: str, dataset_name: str, labbook: LabBook) -> Dataset: """ Args: dataset_url: dataset_namespace: dataset_name: labbook: Returns: """ def _clean_submodule(): """Helper method to clean a submodule reference from a repository""" if os.path.exists(absolute_submodule_dir): logger.warning( f"Cleaning {relative_submodule_dir} from parent git repo") try: call_subprocess([ 'git', 'rm', '-f', '--cached', relative_submodule_dir ], cwd=labbook.root_dir) except subprocess.CalledProcessError: logger.warning( f"git rm on {relative_submodule_dir} failed. Continuing..." ) pass if os.path.exists(absolute_submodule_dir): logger.warning(f"Removing {absolute_submodule_dir} directory") shutil.rmtree(absolute_submodule_dir) if os.path.exists(git_module_dir): logger.warning(f"Removing {git_module_dir} directory") shutil.rmtree(git_module_dir) relative_submodule_dir = os.path.join('.gigantum', 'datasets', dataset_namespace, dataset_name) absolute_submodule_dir = os.path.join(labbook.root_dir, relative_submodule_dir) absolute_submodule_root = os.path.join(labbook.root_dir, '.gigantum', 'datasets', dataset_namespace) git_module_dir = os.path.join(labbook.root_dir, '.git', 'modules', f"{dataset_namespace}&{dataset_name}") if not os.path.exists(absolute_submodule_root): pathlib.Path(absolute_submodule_root).mkdir(parents=True, exist_ok=True) if os.path.exists(absolute_submodule_dir) and os.path.exists( git_module_dir): # Seem to be trying to link a dataset after a reset removed the dataset. Clean up first. _clean_submodule() try: # Link dataset via submodule reference call_subprocess([ 'git', 'submodule', 'add', '--name', f"{dataset_namespace}&{dataset_name}", dataset_url, relative_submodule_dir ], cwd=labbook.root_dir) except subprocess.CalledProcessError: logger.warning( "Failed to link dataset. Attempting to repair repository and link again." ) _clean_submodule() # Try to add again 1 more time, allowing a failure to raise an exception call_subprocess([ 'git', 'submodule', 'add', '--name', f"{dataset_namespace}&{dataset_name}", dataset_url, relative_submodule_dir ], cwd=labbook.root_dir) # If you got here, repair worked and link OK logger.info("Repository repair and linking retry successful.") labbook.git.add_all() commit = labbook.git.commit( f"adding submodule ref to link dataset {dataset_namespace}/{dataset_name}" ) labbook.git.update_submodules(init=True) ds = self.load_dataset_from_directory(absolute_submodule_dir) dataset_revision = ds.git.repo.head.commit.hexsha # Add Activity Record adr = ActivityDetailRecord(ActivityDetailType.DATASET, show=False, action=ActivityAction.CREATE) adr.add_value( 'text/markdown', f"Linked Dataset `{dataset_namespace}/{dataset_name}` to " f"project at revision `{dataset_revision}`") ar = ActivityRecord( ActivityType.DATASET, message= f"Linked Dataset {dataset_namespace}/{dataset_name} to project.", linked_commit=commit.hexsha, tags=["dataset"], show=True) ar.add_detail_object(adr) ars = ActivityStore(labbook) ars.create_activity_record(ar) return ds