def _pull(repository: Repository, branch_name: str, override: str, feedback_cb: Callable, username: Optional[str] = None) -> None: # TODO(billvb) Refactor to BranchManager feedback_cb(f"Pulling from remote branch \"{branch_name}\"...") cp = repository.git.commit_hash try: call_subprocess(f'git pull'.split(), cwd=repository.root_dir) except subprocess.CalledProcessError as cp_error: if 'Automatic merge failed' in cp_error.stdout.decode(): feedback_cb( f"Detected merge conflict, resolution method = {override}") bm = BranchManager(repository, username='') conflicted_files = bm._infer_conflicted_files( cp_error.stdout.decode()) if 'abort' == override: call_subprocess(f'git reset --hard {cp}'.split(), cwd=repository.root_dir) raise MergeConflict('Merge conflict pulling upstream', conflicted_files) call_subprocess( f'git checkout --{override} {" ".join(conflicted_files)}'. split(), cwd=repository.root_dir) call_subprocess('git add .'.split(), cwd=repository.root_dir) call_subprocess('git commit -m "Merge"'.split(), cwd=repository.root_dir) feedback_cb("Resolved merge conflict") else: raise
def publish_to_remote(repository: Repository, username: str, remote: str, feedback_callback: Callable) -> None: # TODO(billvb) - Refactor all (or part) to BranchManager bm = BranchManager(repository, username=username) if bm.workspace_branch != bm.active_branch: raise ValueError(f'Must be on branch {bm.workspace_branch} to publish') feedback_callback(f"Preparing to publish {repository.name}") git_garbage_collect(repository) # Try five attempts to fetch - the remote repo could have been created just milliseconds # ago, so may need a few moments to settle before it supports all the git operations. for tr in range(5): try: repository.git.fetch(remote=remote) break except Exception as e: logger.warning( f"Fetch attempt {tr+1}/5 failed for {str(repository)}: {e}") time.sleep(1) else: raise ValueError( f"Timed out trying to fetch repo for {str(repository)}") feedback_callback("Pushing up regular objects...") call_subprocess( ['git', 'push', '--set-upstream', 'origin', bm.workspace_branch], cwd=repository.root_dir) feedback_callback(f"Publish complete.") repository.git.clear_checkout_context()
def clone_repo(remote_url: str, username: str, owner: str, load_repository: Callable[[str], Any], put_repository: Callable[[str, str, str], Any], make_owner: bool = False) -> Repository: # Clone into a temporary directory, such that if anything # gets messed up, then this directory will be cleaned up. tempdir = os.path.join( Configuration().upload_dir, f"{username}_{owner}_clone_{uuid.uuid4().hex[0:10]}") os.makedirs(tempdir) path = _clone(remote_url=remote_url, working_dir=tempdir) candidate_repo = load_repository(path) if os.environ.get('WINDOWS_HOST'): logger.warning("Imported on Windows host - set fileMode to false") call_subprocess("git config core.fileMode false".split(), cwd=candidate_repo.root_dir) repository = put_repository(candidate_repo.root_dir, username, owner) shutil.rmtree(tempdir) return repository
def _set_upstream_branch(repository: Repository, branch_name: str, feedback_cb: Callable): # TODO(billvb) - Refactor to BranchManager set_upstream_tokens = [ 'git', 'push', '--set-upstream', 'origin', branch_name ] call_subprocess(set_upstream_tokens, cwd=repository.root_dir)
def _clone(remote_url: str, working_dir: str) -> str: clone_tokens = f"git clone {remote_url}".split() call_subprocess(clone_tokens, cwd=working_dir) # Affirm there is only one directory created dirs = os.listdir(working_dir) if len(dirs) != 1: raise GigantumException('Git clone produced extra directories') p = os.path.join(working_dir, dirs[0]) if not os.path.exists(p): raise GigantumException( 'Could not find expected path of repo after clone') try: # This is for backward compatibility -- old projects will clone to # branch "gm.workspace" by default -- even if it has already been migrated. # This will therefore set the user to the proper branch if the project has been # migrated, and will have no affect if it hasn't r = call_subprocess("git checkout master".split(), cwd=p) except Exception as e: logger.error(e) return p
def migrate_labbook_schema(labbook: LabBook) -> None: # Fallback point in case of a problem initial_commit = labbook.git.commit_hash try: migrate_schema_to_current(labbook.root_dir) except Exception as e: logger.exception(e) call_subprocess(f'git reset --hard {initial_commit}'.split(), cwd=labbook.root_dir) raise msg = f"Migrate schema to {CURRENT_LABBOOK_SCHEMA}" labbook.git.add(labbook.config_path) cmt = labbook.git.commit(msg, author=labbook.author, committer=labbook.author) adr = ActivityDetailRecord(ActivityDetailType.LABBOOK, show=True, importance=100, action=ActivityAction.EDIT) adr.add_value('text/plain', msg) ar = ActivityRecord(ActivityType.LABBOOK, message=msg, show=True, importance=255, linked_commit=cmt.hexsha, tags=['schema', 'update', 'migration']) ar.add_detail_object(adr) ars = ActivityStore(labbook) ars.create_activity_record(ar)
def _export_zip( cls, repo: Repository, export_directory: str, config_file: Optional[str] = None, ) -> str: if not os.path.isdir(export_directory): os.makedirs(export_directory, exist_ok=True) repo_dir, _ = repo.root_dir.rsplit(os.path.sep, 1) repo_zip_name = f'{repo.name}-' \ f'{repo.git.log()[0]["commit"][:6]}' zip_path = f'{repo_zip_name}.zip' exported_path = os.path.join(export_directory, zip_path) try: # zip data using subprocess - NOTE! Python zipfile library does not work correctly. call_subprocess( ['zip', '-r', exported_path, os.path.basename(repo.root_dir)], cwd=repo_dir, check=True) assert os.path.exists(exported_path) return exported_path except: try: os.remove(exported_path) except: pass raise
def sync_branch(repository: Repository, username: Optional[str], override: str, pull_only: bool, feedback_callback: Callable) -> int: """""" if not repository.has_remote: return 0 repository.sweep_uncommitted_changes() repository.git.fetch() bm = BranchManager(repository) branch_name = bm.active_branch if pull_only and branch_name not in bm.branches_remote: # Cannot pull when remote branch doesn't exist. feedback_callback("Pull complete - nothing to pull") return 0 if branch_name not in bm.branches_remote: # Branch does not exist, so push it to remote. _set_upstream_branch(repository, bm.active_branch, feedback_callback) return 0 else: pulled_updates_count = bm.get_commits_behind() _pull(repository, branch_name, override, feedback_callback) should_push = not pull_only if should_push: # Skip pushing back up if set to pull_only push_tokens = f'git push origin {branch_name}'.split() if branch_name not in bm.branches_remote: push_tokens.insert(2, "--set-upstream") call_subprocess(push_tokens, cwd=repository.root_dir) feedback_callback("Sync complete") else: feedback_callback("Pull complete") return pulled_updates_count
def complete_batch(cls, labbook: LabBook, txid: str, cancel: bool = False, rollback: bool = False) -> None: """ Indicate a batch upload is finished and sweep all new files. Args: labbook: Subject labbook txid: Transaction id (correlator) cancel: Indicate transaction finished but due to cancellation rollback: Undo all local changes if cancelled (default False) Returns: None """ if cancel and rollback: logger.warning(f"Cancelled tx {txid}, doing git reset") call_subprocess(['git', 'reset', '--hard'], cwd=labbook.root_dir) else: logger.info(f"Done batch upload {txid}, cancelled={cancel}") if cancel: logger.warning("Sweeping aborted batch upload.") m = "Cancelled upload `{txid}`. " if cancel else '' labbook.sweep_uncommitted_changes(upload=True, extra_msg=m, show=True)
def git_garbage_collect(repository: Repository) -> None: """Run "git gc" (garbage collect) over the repo. If run frequently enough, this only takes a short time even on large repos. Note!! This method assumes the subject repository has already been locked! TODO(billvb): Refactor into BranchManager Args: repository: Subject Repository Returns: None Raises: subprocess.CalledProcessError when git gc fails. """ logger.info(f"Running git gc (Garbage Collect) in {str(repository)}...") if os.environ.get('WINDOWS_HOST'): logger.warning( f"Avoiding `git gc` in {str(repository)} on Windows host fs") return try: call_subprocess(['git', 'gc'], cwd=repository.root_dir) except subprocess.CalledProcessError: logger.warning( f"Ignore `git gc` error - {str(repository)} repo remains unpruned")
def remove_lfs_remotes(self) -> None: """Remove all LFS endpoints. Each LFS enpoint has its own entry in the git config. It takes the form of the following: ``` [lfs "https://repo.location.whatever"] access = basic ``` In order to get the section name, which is "lfs.https://repo.location.whatever", we need to search by all LFS fields and remove them (and in order to get the section need to strip the variables off the end). Returns: None """ lfs_sections = call_subprocess( ['git', 'config', '--get-regexp', 'lfs.http*'], cwd=self.root_dir).split('\n') logger.info(f"LFS entries to delete are {lfs_sections}") for lfs_sec in set([n for n in lfs_sections if n]): var = lfs_sec.split(' ')[0] section = '.'.join(var.split('.')[:-1]) call_subprocess(['git', 'config', '--remove-section', section], cwd=self.root_dir)
def merge_from(self, other_branch: str) -> None: """Pulls/merges `other_branch` into current branch. If in the event of a conflict, it resets to the point prior to merge. Args: other_branch: Name of other branch to merge from """ if other_branch not in self.branches_local: raise InvalidBranchName(f'Branch {other_branch} not found') checkpoint = self.repository.git.commit_hash try: self.repository.sweep_uncommitted_changes() try: call_subprocess(f'git merge {other_branch}'.split(), cwd=self.repository.root_dir) except subprocess.CalledProcessError as merge_error: logger.warning( f"Merge conflict syncing {str(self.repository)}") # TODO - This should be cleaned up (The UI attempts to match on the token "Merge conflict") conflicted_files = self._infer_conflicted_files( merge_error.stdout.decode()) raise MergeConflict(f"Merge conflict - {merge_error}", file_conflicts=conflicted_files) self.repository.git.commit(f'Merged from branch `{other_branch}`') except Exception as e: call_subprocess(f'git reset --hard {checkpoint}'.split(), cwd=self.repository.root_dir) raise e
def update_linked_dataset(labbook: LabBook, username: str, init: bool = False) -> None: """ Args: labbook: username: init: Returns: """ # List all existing linked datasets IN this repository existing_dataset_abs_paths = glob.glob( os.path.join(labbook.root_dir, '.gigantum', 'datasets', "*/*")) if len(labbook.git.repo.submodules) > 0: for submodule in labbook.git.list_submodules(): try: namespace, dataset_name = submodule['name'].split("&") rel_submodule_dir = os.path.join('.gigantum', 'datasets', namespace, dataset_name) submodule_dir = os.path.join(labbook.root_dir, rel_submodule_dir) # If submodule is currently present, init/update it, don't remove it! if submodule_dir in existing_dataset_abs_paths: existing_dataset_abs_paths.remove(submodule_dir) if init: # Optionally Init submodule call_subprocess( ['git', 'submodule', 'init', rel_submodule_dir], cwd=labbook.root_dir, check=True) # Update submodule call_subprocess( ['git', 'submodule', 'update', rel_submodule_dir], cwd=labbook.root_dir, check=True) ds = InventoryManager().load_dataset_from_directory( submodule_dir) ds.namespace = namespace manifest = Manifest(ds, username) manifest.link_revision() except Exception as err: logger.error( f"Failed to initialize linked Dataset (submodule reference): {submodule['name']}. " f"This may be an actual error or simply due to repository permissions" ) logger.exception(err) continue # Clean out lingering dataset files if you previously had a dataset linked, but now don't for submodule_dir in existing_dataset_abs_paths: shutil.rmtree(submodule_dir)
def merge_use_theirs(self, other_branch: str): self.repository.sweep_uncommitted_changes() ot = call_subprocess(f'git merge {other_branch}'.split(), cwd=self.repository.root_dir, check=False) conf_files = self._infer_conflicted_files(ot) if conf_files: call_subprocess(f'git checkout --theirs {" ".join(conf_files)}'.split(), cwd=self.repository.root_dir) self.repository.sweep_uncommitted_changes(extra_msg=f"Merged {other_branch} using theirs.")
def reset(self, username: str): """ Perform a Git reset to undo all local changes""" bm = BranchManager(self.repository, username) if self.remote and bm.active_branch in bm.branches_remote: self.repository.git.fetch() self.repository.sweep_uncommitted_changes() call_subprocess( ['git', 'reset', '--hard', f'origin/{bm.active_branch}'], cwd=self.repository.root_dir) call_subprocess(['git', 'clean', '-fd'], cwd=self.repository.root_dir)
def publish(self, username: str, access_token: Optional[str] = None, remote: str = "origin", public: bool = False, feedback_callback: Callable = lambda _: None, id_token: Optional[str] = None) -> None: """ Publish this repository to the remote GitLab instance. Args: username: Subject username access_token: Temp token/password to gain permissions on GitLab instance remote: Name of Git remote (always "origin" for now). public: Allow public read access feedback_callback: Callback to give user-facing realtime feedback id_token: Dataset credentials Returns: None """ logger.info( f"Publishing {str(self.repository)} for user {username} to remote {remote}" ) if self.remote: raise GitWorkflowException( "Cannot publish Labbook when remote already set.") branch_mgr = BranchManager(self.repository, username=username) if branch_mgr.active_branch != branch_mgr.workspace_branch: raise GitWorkflowException( f"Must be on branch {branch_mgr.workspace_branch} to publish") try: self.repository.sweep_uncommitted_changes() vis = "public" if public is True else "private" gitworkflows_utils.create_remote_gitlab_repo( repository=self.repository, username=username, access_token=access_token, visibility=vis) gitworkflows_utils.publish_to_remote( repository=self.repository, username=username, remote=remote, feedback_callback=feedback_callback) except Exception as e: # Unsure what specific exception add_remote creates, so make a catchall. logger.error( f"Publish failed {e}: {str(self.repository)} may be in corrupted Git state!" ) call_subprocess(['git', 'reset', '--hard'], cwd=self.repository.root_dir) raise e
def test_merge_conflict_basic(self, mock_labbook_lfs_disabled): """ Test a basic merge-conflict scenario with a conflict on one file. First, assert that a MergeConflict is raised when the conflict is detected Second, test the force flag to overwrite the conflict using the incoming branch.""" lb = mock_labbook_lfs_disabled[2] # Insert a text file into the master branch of lb with open('/tmp/s1.txt', 'w') as s1: s1.write('original-file\ndata') FileOperations.insert_file(lb, section='code', src_file=s1.name) # Create a new branch from this point and make a change to s1.txt bm = BranchManager(lb, username=TEST_USER) feature_name = bm.create_branch("example-feature-branch") with open('/tmp/s1.txt', 'w') as s1: s1.write('new-changes-in\nfeature-branch') FileOperations.insert_file(lb, section='code', src_file=s1.name) # Switch back to the main branch and make a new, conflicting change. bm.workon_branch(bm.workspace_branch) assert lb.is_repo_clean assert not os.path.exists(os.path.join(lb.root_dir, 'output/sample')) with open('/tmp/s1.txt', 'w') as s1: s1.write('upstream-changes-from-workspace') FileOperations.insert_file(lb, section='code', src_file=s1.name, dst_path='') # Switch back to feature branch -- make sure that failed merges rollback to state before merge. bm.workon_branch(feature_name) cp = bm.repository.git.commit_hash try: bm.merge_from(bm.workspace_branch) assert False, "merge_from should have thrown conflict" except MergeConflict as m: # Assert that the conflicted file(s) are as expected assert m.file_conflicts == ['code/s1.txt'] assert lb.is_repo_clean # Now try to force merge, and changes are taken from the workspace-branch bm.merge_use_ours(bm.workspace_branch) assert open(os.path.join(lb.root_dir, 'code', 's1.txt')).read(1000) == \ 'new-changes-in\nfeature-branch' assert lb.is_repo_clean # Reset this branch call_subprocess(f'git reset --hard {cp}'.split(), cwd=bm.repository.root_dir) bm.merge_use_theirs(bm.workspace_branch) assert open(os.path.join(lb.root_dir, 'code', 's1.txt')).read(1000) == \ 'upstream-changes-from-workspace' assert lb.is_repo_clean
def remove_remote_branch(self, target_branch) -> None: # If no remote, do nothing. if not self.repository.has_remote: return if target_branch not in self.branches_remote: raise InvalidBranchName(f'Cannot delete `{target_branch}`; does not exist') if target_branch == self.active_branch: raise BranchWorkflowViolation(f'Cannot delete current active branch `{target_branch}`') logger.info(f'Removing remote branch {target_branch} from {str(self.repository)}') call_subprocess(f'git push origin --delete {target_branch}'.split(), cwd=self.repository.root_dir)
def put_labbook(self, path: str, username: str, owner: str) -> LabBook: """ Take given path to a candidate labbook and insert it into its proper place in the file system. Args: path: Path to a given labbook username: Active username owner: Intended owner of labbook Returns: LabBook """ try: lb = self._put_labbook(path, username, owner) # Init dataset submodules if present if len(lb.git.repo.submodules) > 0: # Link datasets for submodule in lb.git.list_submodules(): try: namespace, dataset_name = submodule['name'].split("&") rel_submodule_dir = os.path.join( '.gigantum', 'datasets', namespace, dataset_name) submodule_dir = os.path.join(lb.root_dir, rel_submodule_dir) call_subprocess( ['git', 'submodule', 'init', rel_submodule_dir], cwd=lb.root_dir, check=True) call_subprocess( ['git', 'submodule', 'update', rel_submodule_dir], cwd=lb.root_dir, check=True) ds = InventoryManager().load_dataset_from_directory( submodule_dir) ds.namespace = namespace manifest = Manifest(ds, username) manifest.link_revision() except Exception as err: logger.exception( f"Failed to import submodule: {submodule['name']}") continue return lb except Exception as e: logger.error(e) raise InventoryException(e)
def unlink_dataset_from_labbook(self, dataset_namespace: str, dataset_name: str, labbook: LabBook) -> None: """Method to removed a dataset reference from a labbook Args: dataset_namespace: dataset_name: labbook: Returns: """ submodule_dir = os.path.join('.gigantum', 'datasets', dataset_namespace, dataset_name) call_subprocess(['git', 'rm', '-f', submodule_dir], cwd=labbook.root_dir) git_module_dir = os.path.join(labbook.root_dir, '.git', 'modules', f"{dataset_namespace}&{dataset_name}") if os.path.exists(git_module_dir): shutil.rmtree(git_module_dir) absolute_submodule_dir = os.path.join(labbook.root_dir, '.gigantum', 'datasets', dataset_namespace, dataset_name) if os.path.exists(absolute_submodule_dir): shutil.rmtree(absolute_submodule_dir) labbook.git.add_all() commit = labbook.git.commit("removing submodule ref") # Add Activity Record adr = ActivityDetailRecord(ActivityDetailType.DATASET, show=False, action=ActivityAction.DELETE) adr.add_value( 'text/markdown', f"Unlinked Dataset `{dataset_namespace}/{dataset_name}` from project" ) ar = ActivityRecord( ActivityType.DATASET, message= f"Unlinked Dataset {dataset_namespace}/{dataset_name} from project.", linked_commit=commit.hexsha, tags=["dataset"], show=True) ar.add_detail_object(adr) ars = ActivityStore(labbook) ars.create_activity_record(ar)
def _import_zip(cls, archive_path: str, username: str, owner: str, fetch_method: Callable, put_method: Callable, update_meta: Callable = lambda _: None) -> Repository: if not os.path.isfile(archive_path): raise ValueError( f'Archive at {archive_path} is not a file or does not exist') if '.zip' not in archive_path and '.lbk' not in archive_path: raise ValueError( f'Archive at {archive_path} does not have .zip (or legacy .lbk) extension' ) statusmsg = f'Unzipping Repository Archive...' update_meta(statusmsg) # Unzip into a temporary directory and cleanup if fails with TemporaryDirectory() as temp_dir: call_subprocess(['unzip', archive_path, '-d', 'project'], cwd=temp_dir, check=True) pdirs = os.listdir(os.path.join(temp_dir, 'project')) if len(pdirs) != 1: raise ValueError("Expected only one directory unzipped") unzipped_path = os.path.join(temp_dir, 'project', pdirs[0]) repo = fetch_method(unzipped_path) statusmsg = f'{statusmsg}\nCreating workspace branch...' update_meta(statusmsg) # Also, remove any lingering remotes. # If it gets re-published, it will be to a new remote. if repo.has_remote: repo.git.remove_remote('origin') # Ignore execution bit changes (due to moving between windows/mac/linux) call_subprocess("git config core.fileMode false".split(), cwd=repo.root_dir) repo = put_method(unzipped_path, username=username, owner=owner) statusmsg = f'{statusmsg}\nImport Complete' update_meta(statusmsg) return repo
def test_reset__reset_local_change_same_owner(self, mock_labbook_lfs_disabled, mock_config_file): """ test reset performs no operation when there's nothing to do """ username = '******' lb = mock_labbook_lfs_disabled[2] wf = LabbookWorkflow(lb) wf.publish(username=username) commit_to_check = lb.git.commit_hash # Make some change locally and commit fpath = os.path.join(lb.root_dir, 'input', 'testfile') with open(fpath, 'w') as f: f.write('filedata') lb.sweep_uncommitted_changes() assert lb.git.commit_hash != commit_to_check # Make an UNTRACKED change locally, make sure it gets clared up untracked_file = os.path.join(lb.root_dir, 'output', 'untracked-file') with open(untracked_file, 'w') as f: f.write('untracked data') # Do a reset and make sure state resets appropriately wf.reset(username=username) assert lb.git.commit_hash == commit_to_check assert not os.path.exists(fpath) assert not os.path.exists(untracked_file) remote_hash = call_subprocess('git log -n 1 --oneline'.split(), cwd=wf.remote).split()[0] assert remote_hash in lb.git.commit_hash
def run_fetch(key: str): # Get identifying info from key repository_type, username, owner_name, repository_name = key.split('&') if repository_type == 'labbook': repo = InventoryManager().load_labbook(username, owner_name, repository_name) elif repository_type == 'dataset': repo = InventoryManager().load_dataset(username, owner_name, repository_name) else: raise ValueError(f"Unsupported repository type: {repository_type}") if repo.remote: # If no remote, can't fetch! call_subprocess(['git', 'fetch'], cwd=repo.root_dir).strip() return None
def reset(self, username: str): """ Perform a Git reset to undo all local changes""" bm = BranchManager(self.repository, username) if self.remote and bm.active_branch in bm.branches_remote: self.repository.git.fetch() self.repository.sweep_uncommitted_changes() call_subprocess( ['git', 'reset', '--hard', f'origin/{bm.active_branch}'], cwd=self.repository.root_dir) call_subprocess(['git', 'clean', '-fd'], cwd=self.repository.root_dir) self.repository.git.clear_checkout_context() # update dataset references on reset if isinstance(self.repository, LabBook): InventoryManager().update_linked_dataset(self.repository, username, init=True)
def clone_repo(remote_url: str, username: str, owner: str, load_repository: Callable[[str], Any], put_repository: Callable[[str, str, str], Any], make_owner: bool = False) -> Repository: with tempfile.TemporaryDirectory() as tempdir: # Clone into a temporary directory, such that if anything # gets messed up, then this directory will be cleaned up. path = _clone(remote_url=remote_url, working_dir=tempdir) candidate_repo = load_repository(path) if os.environ.get('WINDOWS_HOST'): logger.warning("Imported on Windows host - set fileMode to false") call_subprocess("git config core.fileMode false".split(), cwd=candidate_repo.root_dir) repository = put_repository(candidate_repo.root_dir, username, owner) return repository
def test_migrate(self, mock_config_file): p = resource_filename('gtmcore', 'labbook') p2 = os.path.join(p, 'tests', 'test.zip') with tempfile.TemporaryDirectory() as td: call_subprocess(f"unzip {p2} -d {td}".split(), cwd=td) temp_lb_path = os.path.join(td, 'test') # Tests backwards compatibility (test.zip is a very old schema 1 LabBook) lb = InventoryManager( mock_config_file[0]).load_labbook_from_directory(temp_lb_path) assert lb.schema < CURRENT_SCHEMA # Test schema migration -- migrate and then refresh. migrate_schema_to_current(lb.root_dir) lb = InventoryManager( mock_config_file[0]).load_labbook_from_directory(lb.root_dir) assert validate_labbook_schema(CURRENT_SCHEMA, lb_data=lb.data) assert lb.schema == CURRENT_SCHEMA
def put_file(cls, labbook: LabBook, section: str, src_file: str, dst_path: str, txid: Optional[str] = None) -> Dict[str, Any]: """Move the file at `src_file` to `dst_dir`. Filename removes upload ID if present. This operation does NOT commit or create an activity record. Args: labbook: Subject LabBook section: Section name (code, input, output) src_file: Full path of file to insert into dst_path: Path within section to insert `src_file` txid: Optional transaction id Returns: Full path to inserted file. """ if not os.path.abspath(src_file): raise ValueError(f"Source file `{src_file}` not an absolute path") if not os.path.isfile(src_file): raise ValueError(f"Source file does not exist at `{src_file}`") labbook.validate_section(section) r = call_subprocess( ['git', 'check-ignore', os.path.basename(dst_path)], cwd=labbook.root_dir, check=False) if dst_path and r and os.path.basename(dst_path) in r: logger.warning(f"File {dst_path} matches gitignore; " f"not put into {str(labbook)}") raise FileOperationsException(f"`{dst_path}` matches " f"ignored pattern") mdst_dir = _make_path_relative(dst_path) full_dst = os.path.join(labbook.root_dir, section, mdst_dir) full_dst = full_dst.replace('..', '') full_dst = full_dst.replace('~', '') # Force overwrite if file already exists if os.path.isfile(os.path.join(full_dst, os.path.basename(src_file))): os.remove(os.path.join(full_dst, os.path.basename(src_file))) if not os.path.isdir(os.path.dirname(full_dst)): os.makedirs(os.path.dirname(full_dst), exist_ok=True) fdst = shutil.move(src_file, full_dst) relpath = fdst.replace(os.path.join(labbook.root_dir, section), '') return cls.get_file_info(labbook, section, relpath)
def _calc_disk_free() -> Tuple[float, float]: disk_results = call_subprocess("df -h /".split(), cwd='/').split('\n') _, disk_size, disk_used, disk_avail, use_pct, _ = disk_results[1].split() disk_size_num, disk_size_unit = float(disk_used[:-1]), disk_used[-1] if disk_size_unit == 'M': disk_size_num /= 1000.0 disk_avail_num, disk_avail_unit = float(disk_avail[:-1]), disk_avail[-1] if disk_avail_unit == 'M': disk_avail_num /= 1000.0 return disk_size_num, disk_avail_num
def _clean_submodule(): """Helper method to clean a submodule reference from a repository""" if os.path.exists(absolute_submodule_dir): logger.warning( f"Cleaning {relative_submodule_dir} from parent git repo") try: call_subprocess([ 'git', 'rm', '-f', '--cached', relative_submodule_dir ], cwd=labbook.root_dir) except subprocess.CalledProcessError: logger.warning( f"git rm on {relative_submodule_dir} failed. Continuing..." ) pass if os.path.exists(absolute_submodule_dir): logger.warning(f"Removing {absolute_submodule_dir} directory") shutil.rmtree(absolute_submodule_dir) if os.path.exists(git_module_dir): logger.warning(f"Removing {git_module_dir} directory") shutil.rmtree(git_module_dir)
def test_migrate_old_schema_1_project(self, mock_config_file): """ Test migrating a very old schema 1/gm.workspace LabBook """ p = resource_filename('gtmcore', 'workflows') p2 = os.path.join(p, 'tests', 'snappy.zip') with tempfile.TemporaryDirectory() as td: call_subprocess(f"unzip {p2} -d {td}".split(), cwd=td) temp_lb_path = os.path.join(td, 'snappy') # Tests backwards compatibility (test.zip is a very old schema 1 LabBook) lb = InventoryManager( mock_config_file[0]).load_labbook_from_directory(temp_lb_path) wf = LabbookWorkflow(lb) wf.labbook.remove_remote() wf.migrate() # Test that current branch is as appropriate assert wf.labbook.active_branch == 'master' # Test that there is an activity record indicate migration assert any([ 'Migrate schema to 2' in c['message'] for c in wf.labbook.git.log()[:5] ]) # Test schema has successfully rolled to 2 assert wf.labbook.schema == 2 # Test that untracked space exists (if we add something to untracked space) assert wf.labbook.is_repo_clean with open( os.path.join(lb.root_dir, 'output/untracked', 'untracked-file'), 'wb') as fb: fb.write(b'cat' * 100) assert wf.labbook.is_repo_clean