示例#1
0
    def backend_config(self, data: dict) -> None:
        """Save storage config data"""
        if self._backend:
            self._backend.configuration = {**self._backend.configuration, **data}

        # Remove defaults set at runtime that shouldn't be persisted
        if "username" in data:
            del data["username"]
        if "gigantum_bearer_token" in data:
            del data["gigantum_bearer_token"]
        if "gigantum_id_token" in data:
            del data["gigantum_id_token"]

        config_file = os.path.join(self.root_dir, ".gigantum", "backend.json")
        with open(config_file, 'wt') as sf:
            json.dump(data, sf, indent=2)

        self.git.add(config_file)
        cm = self.git.commit("Updating backend config")

        ar = ActivityRecord(ActivityType.DATASET,
                            message="Updated Dataset storage backend configuration",
                            show=True,
                            importance=255,
                            linked_commit=cm.hexsha,
                            tags=['config'])
        adr = ActivityDetailRecord(ActivityDetailType.DATASET, show=False, importance=255,
                                   action=ActivityAction.EDIT)
        d = json.dumps(data, indent=2)
        adr.add_value('text/markdown', f"Updated dataset storage backend configuration:\n\n ```{d}```")
        ar.add_detail_object(adr)
        ars = ActivityStore(self)
        ars.create_activity_record(ar)
    def remove_docker_snippet(self, name: str) -> None:
        """Remove a custom docker snippet

        Args:
            name: Name or identifer of snippet to remove

        Returns:
            None
        """
        docker_dir = os.path.join(self.labbook.root_dir, '.gigantum', 'env',
                                  'docker')
        docker_file = os.path.join(docker_dir, f'{name}.yaml')

        if not os.path.exists(docker_file):
            raise ValueError(f'Docker snippet name `{name}` does not exist')

        self.labbook.git.remove(docker_file, keep_file=False)
        short_message = f"Removed custom Docker snippet `{name}`"
        logger.info(short_message)
        commit = self.labbook.git.commit(short_message)
        adr = ActivityDetailRecord(ActivityDetailType.ENVIRONMENT,
                                   show=False,
                                   action=ActivityAction.DELETE)
        adr.add_value('text/plain', short_message)
        ar = ActivityRecord(ActivityType.ENVIRONMENT,
                            message=short_message,
                            show=False,
                            linked_commit=commit.hexsha,
                            tags=["environment", "docker", "snippet"])
        ar.add_detail_object(adr)
        ars = ActivityStore(self.labbook)
        ars.create_activity_record(ar)
示例#3
0
    def mutate_and_get_payload(cls,
                               root,
                               info,
                               name,
                               description,
                               repository,
                               base_id,
                               revision,
                               is_untracked=False,
                               client_mutation_id=None):
        username = get_logged_in_username()
        inv_manager = InventoryManager()
        if is_untracked:
            lb = inv_manager.create_labbook_disabled_lfs(
                username=username,
                owner=username,
                labbook_name=name,
                description=description,
                author=get_logged_in_author())
        else:
            lb = inv_manager.create_labbook(username=username,
                                            owner=username,
                                            labbook_name=name,
                                            description=description,
                                            author=get_logged_in_author())

        if is_untracked:
            FileOperations.set_untracked(lb, 'input')
            FileOperations.set_untracked(lb, 'output')
            input_set = FileOperations.is_set_untracked(lb, 'input')
            output_set = FileOperations.is_set_untracked(lb, 'output')
            if not (input_set and output_set):
                raise ValueError(
                    f'{str(lb)} untracking for input/output in malformed state'
                )
            if not lb.is_repo_clean:
                raise ValueError(
                    f'{str(lb)} should have clean Git state after setting for untracked'
                )

        adr = ActivityDetailRecord(ActivityDetailType.LABBOOK,
                                   show=False,
                                   importance=0)
        adr.add_value('text/plain', f"Created new LabBook: {username}/{name}")

        # Create activity record
        ar = ActivityRecord(ActivityType.LABBOOK,
                            message=f"Created new LabBook: {username}/{name}",
                            show=True,
                            importance=255,
                            linked_commit=lb.git.commit_hash)
        ar.add_detail_object(adr)

        store = ActivityStore(lb)
        store.create_activity_record(ar)

        cm = ComponentManager(lb)
        cm.add_base(repository, base_id, revision)

        return CreateLabbook(labbook=Labbook(owner=username, name=lb.name))
    def remove_all_bases(self, base_paths: List[Path],
                         detail_records: List[ActivityDetailRecord]) -> str:
        """Remove all files listed in `matching_fnames` and append records to detail_records for later use

        Removing files isn't hard. The main point of this method is to provide detail records that make
        sense in the context of a misconfigured project.

        Args:
            base_paths: List of matched YAML files for base images
            detail_records: we'll append details here that will be added to an ActivityRecord by the caller

        Returns:
            the short_message for the git commit, etc.
        """
        for base_fname in base_paths:
            self.labbook.git.remove(str(base_fname), keep_file=False)
            # The repository includes an underscore where the slash is for e.g.,
            # .gigantum/env/base/gigantum_base-images_r-tidyverse.yaml
            curr_repo, curr_base_name = base_fname.stem.rsplit('_', 1)

            # Create detail record
            long_message = f"Removing base from {curr_repo}: {curr_base_name}"
            adr = ActivityDetailRecord(ActivityDetailType.ENVIRONMENT,
                                       show=False,
                                       action=ActivityAction.DELETE)
            adr.add_value('text/plain', long_message)
            detail_records.append(adr)

        return f"Removing all bases from project with {len(base_paths)} base configuration files."
示例#5
0
    def remove_bundled_app(self, name: str) -> None:
        """Remove a bundled app from this labbook

        Args:
            name(str): name of the bundled app

        Returns:
            None
        """
        data = self.get_bundled_apps()
        if name not in data:
            raise ValueError(f"App {name} does not exist. Cannot remove.")

        del data[name]

        with open(self.bundled_app_file, 'wt') as baf:
            json.dump(data, baf)

        # Commit the changes
        self.labbook.git.add(self.bundled_app_file)
        commit = self.labbook.git.commit(f"Committing bundled app")

        adr = ActivityDetailRecord(ActivityDetailType.ENVIRONMENT,
                                   show=False,
                                   action=ActivityAction.CREATE)
        adr.add_value('text/plain', f"Removed bundled application: {name}")
        ar = ActivityRecord(ActivityType.ENVIRONMENT,
                            message=f"Removed bundled application: {name}",
                            show=True,
                            linked_commit=commit.hexsha,
                            tags=["environment", "docker", "bundled_app"])
        ar.add_detail_object(adr)
        ars = ActivityStore(self.labbook)
        ars.create_activity_record(ar)
示例#6
0
    def mutate_and_get_payload(cls,
                               root,
                               info,
                               owner,
                               labbook_name,
                               description_content,
                               client_mutation_id=None):
        username = get_logged_in_username()
        lb = InventoryManager().load_labbook(username,
                                             owner,
                                             labbook_name,
                                             author=get_logged_in_author())
        lb.description = description_content
        with lb.lock():
            lb.git.add(os.path.join(lb.config_path))
            commit = lb.git.commit('Updating description')

            adr = ActivityDetailRecord(ActivityDetailType.LABBOOK, show=False)
            adr.add_value('text/plain', "Updated description of Project")
            ar = ActivityRecord(ActivityType.LABBOOK,
                                message="Updated description of Project",
                                linked_commit=commit.hexsha,
                                tags=["labbook"],
                                show=False)
            ar.add_detail_object(adr)
            ars = ActivityStore(lb)
            ars.create_activity_record(ar)
        return SetLabbookDescription(success=True)
def migrate_labbook_schema(labbook: LabBook) -> None:
    # Fallback point in case of a problem
    initial_commit = labbook.git.commit_hash

    try:
        migrate_schema_to_current(labbook.root_dir)
    except Exception as e:
        logger.exception(e)
        call_subprocess(f'git reset --hard {initial_commit}'.split(),
                        cwd=labbook.root_dir)
        raise

    msg = f"Migrate schema to {CURRENT_LABBOOK_SCHEMA}"
    labbook.git.add(labbook.config_path)
    cmt = labbook.git.commit(msg,
                             author=labbook.author,
                             committer=labbook.author)
    adr = ActivityDetailRecord(ActivityDetailType.LABBOOK,
                               show=True,
                               importance=100,
                               action=ActivityAction.EDIT)

    adr.add_value('text/plain', msg)
    ar = ActivityRecord(ActivityType.LABBOOK,
                        message=msg,
                        show=True,
                        importance=255,
                        linked_commit=cmt.hexsha,
                        tags=['schema', 'update', 'migration'])
    ar.add_detail_object(adr)
    ars = ActivityStore(labbook)
    ars.create_activity_record(ar)
示例#8
0
    def process(self, result_obj: ActivityRecord, data: List[ExecutionData],
                status: Dict[str, Any], metadata: Dict[str, Any]) -> ActivityRecord:
        """Method to update a result object based on code and result data

        Args:
            result_obj(ActivityNote): An object containing the note
            data(list): A list of ExecutionData instances containing the data for this record
            status(dict): A dict containing the result of git status from gitlib
            metadata(str): A dictionary containing Dev Env specific or other developer defined data

        Returns:
            ActivityNote
        """
        with result_obj.inspect_detail_objects() as detail_objs:
            orig_num = result_obj.num_detail_objects
            if result_obj.num_detail_objects > 255:
                result_obj.trim_detail_objects(255)

                adr = ActivityDetailRecord(ActivityDetailType.NOTE, show=True, importance=0,
                                           action=ActivityAction.NOACTION)
                adr.add_value('text/markdown', f"This activity produced {orig_num} detail records, "
                                               f"but was truncated to the top 255 items. Inspect your code to make "
                                               f"sure that this was not accidental. In Jupyter for example, you can"
                                               f" use a `;` at the end of a line to suppress output from functions"
                                               f" that print excessively.")
                result_obj.add_detail_object(adr)

        return result_obj
示例#9
0
    def mutate_and_get_payload(cls,
                               root,
                               info,
                               owner,
                               dataset_name,
                               description,
                               client_mutation_id=None):
        username = get_logged_in_username()
        ds = InventoryManager().load_dataset(username,
                                             owner,
                                             dataset_name,
                                             author=get_logged_in_author())
        ds.description = description
        with ds.lock():
            ds.git.add(os.path.join(ds.root_dir, '.gigantum/gigantum.yaml'))
            commit = ds.git.commit('Updating description')

            adr = ActivityDetailRecord(ActivityDetailType.LABBOOK, show=False)
            adr.add_value('text/plain',
                          f"Updated Dataset description: {description}")
            ar = ActivityRecord(ActivityType.LABBOOK,
                                message="Updated Dataset description",
                                linked_commit=commit.hexsha,
                                tags=["dataset"],
                                show=False)
            ar.add_detail_object(adr)
            ars = ActivityStore(ds)
            ars.create_activity_record(ar)
        return SetDatasetDescription(
            updated_dataset=Dataset(owner=owner, name=dataset_name))
    def remove_base(self, base_fname: Path,
                    detail_records: List[ActivityDetailRecord]) -> str:
        """Remove the base from `base_fname` and append records to detail_records for later use

        Removing files isn't hard. The main point of this method is to provide detail records that make
        sense in the context of a properly configured project with a single base.

        Args:
            base_fname: Matched YAML file for base image
            detail_records: we'll append details here that will be added to an ActivityRecord by the caller

        Returns:
            the short_message for the git commit, etc.
        """
        base_data = self.base_fields
        revision = base_data['revision']
        # The repository includes an underscore where the slash is for e.g.,
        # .gigantum/env/base/gigantum_base-images_r-tidyverse.yaml
        repo, base_name = base_fname.stem.rsplit('_', 1)
        self.labbook.git.remove(str(base_fname), keep_file=False)

        # Create detail record
        long_message = "\n".join(
            (f"Removed base {base_name}\n", f"{base_data['description']}\n",
             f"  - repository: {repo}", f"  - component: {base_name}",
             f"  - revision: {revision}\n"))
        adr = ActivityDetailRecord(ActivityDetailType.ENVIRONMENT,
                                   show=False,
                                   action=ActivityAction.DELETE)
        adr.add_value('text/plain', long_message)
        detail_records.append(adr)

        return f"Removed base from {repo}: {base_name} r{revision}"
示例#11
0
    def insert_file(cls,
                    labbook: LabBook,
                    section: str,
                    src_file: str,
                    dst_path: str = '') -> Dict[str, Any]:
        """ Move the file at `src_file` into the `dst_dir`, overwriting
        if a file already exists there. This calls `copy_into_container()` under-
        the-hood, but will create an activity record.

        Args:
            labbook: Subject labbook
            section: Section name (code, input, output)
            src_file: Full path of file to insert into
            dst_path: Relative path within labbook where `src_file`
                      should be copied to

        Returns:
            dict: The inserted file's info
        """

        finfo = FileOperations.put_file(labbook=labbook,
                                        section=section,
                                        src_file=src_file,
                                        dst_path=dst_path)

        rel_path = os.path.join(section, finfo['key'])

        # If we are setting this section to be untracked
        activity_type, activity_detail_type, section_str = \
            labbook.get_activity_type_from_section(section)

        commit_msg = f"Added new {section_str} file {rel_path}"
        try:
            labbook.git.add(rel_path)
            commit = labbook.git.commit(commit_msg)
        except Exception as x:
            logger.error(x)
            os.remove(dst_path)
            raise FileOperationsException(x)

        # Create Activity record and detail
        _, ext = os.path.splitext(rel_path) or 'file'
        adr = ActivityDetailRecord(activity_detail_type,
                                   show=False,
                                   importance=0,
                                   action=ActivityAction.CREATE)
        adr.add_value('text/plain', commit_msg)
        ar = ActivityRecord(activity_type,
                            message=commit_msg,
                            show=True,
                            importance=255,
                            linked_commit=commit.hexsha,
                            tags=[ext])
        ar.add_detail_object(adr)
        ars = ActivityStore(labbook)
        ars.create_activity_record(ar)

        return finfo
示例#12
0
    def add_docker_snippet(self,
                           name: str,
                           docker_content: List[str],
                           description: Optional[str] = None) -> None:
        """ Add a custom docker snippet to the environment (replacing custom dependency).

        Args:
            name: Name or identifier of the custom docker snippet
            docker_content: Content of the docker material (May make this a list of strings instead)
            description: Human-readable verbose description of what the snippet is intended to accomplish.

        Returns:
            None
        """

        if not name:
            raise ValueError('Argument `name` cannot be None or empty')

        if not name.replace('-', '').replace('_', '').isalnum():
            raise ValueError(
                'Argument `name` must be alphanumeric string (- and _ accepted)'
            )

        if not docker_content:
            docker_content = []

        file_data = {
            'name': name,
            'timestamp_utc': datetime.datetime.utcnow().isoformat(),
            'description': description or "",
            'content': docker_content
        }

        docker_dir = os.path.join(self.labbook.root_dir, '.gigantum', 'env',
                                  'docker')
        docker_file = os.path.join(docker_dir, f'{name}.yaml')
        os.makedirs(docker_dir, exist_ok=True)
        yaml_dump = yaml.safe_dump(file_data, default_flow_style=False)
        with open(docker_file, 'w') as df:
            df.write(yaml_dump)

        logger.info(
            f"Wrote custom Docker snippet `{name}` to {str(self.labbook)}")
        short_message = f"Wrote custom Docker snippet `{name}`"
        self.labbook.git.add(docker_file)
        commit = self.labbook.git.commit(short_message)
        adr = ActivityDetailRecord(ActivityDetailType.ENVIRONMENT,
                                   show=False,
                                   action=ActivityAction.CREATE)
        adr.add_value('text/plain', '\n'.join(docker_content))
        ar = ActivityRecord(ActivityType.ENVIRONMENT,
                            message=short_message,
                            show=True,
                            linked_commit=commit.hexsha,
                            tags=["environment", "docker", "snippet"])
        ar.add_detail_object(adr)
        ars = ActivityStore(self.labbook)
        ars.create_activity_record(ar)
示例#13
0
    def process(self, result_obj: ActivityRecord, data: List[ExecutionData],
                status: Dict[str, Any], metadata: Dict[str, Any]) -> ActivityRecord:
        """Method to update a result object based on code and result data

        Args:
            result_obj(ActivityNote): An object containing the note
            data(list): A list of ExecutionData instances containing the data for this record
            status(dict): A dict containing the result of git status from gitlib
            metadata(str): A dictionary containing Dev Env specific or other developer defined data

        Returns:
            ActivityRecord
        """
        for cnt, filename in enumerate(status['untracked']):
            # skip any file in .git or .gigantum dirs
            if ".git" in filename or ".gigantum" in filename:
                continue

            activity_type, activity_detail_type, section = LabBook.infer_section_from_relative_path(filename)

            adr = ActivityDetailRecord(activity_detail_type, show=False, importance=max(255-cnt, 0),
                                       action=ActivityAction.CREATE)
            # We use a "private" attribute here, but it's better than the silent breakage that happened before
            # cf. https://github.com/gigantum/gigantum-client/issues/436
            if section == LabBook._default_activity_section:
                msg = f'Created new file `{filename}` in the Project Root. Note, it is best practice to use the Code, ' \
                    'Input, and Output sections exclusively. '
            else:
                msg = f"Created new {section} file `{filename}`"
            adr.add_value('text/markdown', msg)
            result_obj.add_detail_object(adr)

        cnt = 0
        for filename, change in status['unstaged']:
            # skip any file in .git or .gigantum dirs
            if ".git" in filename or ".gigantum" in filename:
                continue

            activity_type, activity_detail_type, section = LabBook.infer_section_from_relative_path(filename)

            if change == "deleted":
                action = ActivityAction.DELETE
            elif change == "added":
                action = ActivityAction.CREATE
            elif change == "modified":
                action = ActivityAction.EDIT
            elif change == "renamed":
                action = ActivityAction.EDIT
            else:
                action = ActivityAction.NOACTION

            adr = ActivityDetailRecord(activity_detail_type, show=False, importance=max(255-cnt, 0), action=action)
            adr.add_value('text/markdown', f"{change[0].upper() + change[1:]} {section} file `{filename}`")
            result_obj.add_detail_object(adr)
            cnt += 1

        return result_obj
示例#14
0
    def create_directory(self, path: str) -> Dict[str, Any]:
        """Method to create an empty directory in a dataset

        Args:
            path: Relative path to the directory

        Returns:
            dict
        """
        relative_path = self.dataset.make_path_relative(path)
        new_directory_path = os.path.join(self.cache_mgr.cache_root, self.dataset_revision, relative_path)

        previous_revision = self.dataset_revision

        if os.path.exists(new_directory_path):
            raise ValueError(f"Directory already exists: `{relative_path}`")
        else:
            logger.info(f"Creating new empty directory in `{new_directory_path}`")

            if os.path.isdir(Path(new_directory_path).parent) is False:
                raise ValueError(f"Parent directory does not exist. Failed to create `{new_directory_path}` ")

            # create dir
            os.makedirs(new_directory_path)
            self.update()
            if relative_path not in self.manifest:
                raise ValueError("Failed to add directory to manifest")

            # Create detail record
            adr = ActivityDetailRecord(ActivityDetailType.DATASET, show=False, importance=0,
                                       action=ActivityAction.CREATE)

            msg = f"Created new empty directory `{relative_path}`"
            adr.add_value('text/markdown', msg)

            commit = self.dataset.git.commit(msg)

            # Create activity record
            ar = ActivityRecord(ActivityType.DATASET,
                                message=msg,
                                linked_commit=commit.hexsha,
                                show=True,
                                importance=255,
                                tags=['directory-create'])
            ar.add_detail_object(adr)

            # Store
            ars = ActivityStore(self.dataset)
            ars.create_activity_record(ar)

            # Relink after the commit
            self.link_revision()
            if os.path.isdir(os.path.join(self.cache_mgr.cache_root, previous_revision)):
                shutil.rmtree(os.path.join(self.cache_mgr.cache_root, previous_revision))

            return self.gen_file_info(relative_path)
示例#15
0
    def process(self, result_obj: ActivityRecord, data: List[ExecutionData],
                status: Dict[str, Any], metadata: Dict[str,
                                                       Any]) -> ActivityRecord:
        """Method to update a result object based on code and result data

        Args:
            result_obj(ActivityNote): An object containing the note
            data(list): A list of ExecutionData instances containing the data for this record
            status(dict): A dict containing the result of git status from gitlib
            metadata(str): A dictionary containing Dev Env specific or other developer defined data

        Returns:
            ActivityNote
        """
        # Only store up to 64kB of plain text result data (if the user printed a TON don't save it all)
        truncate_at = 64 * 1000
        max_show_len = 280

        result_cnt = 0
        for cell in data:
            for result_entry in reversed(cell.result):
                if 'metadata' in result_entry:
                    if 'source' in result_entry['metadata']:
                        if result_entry['metadata'][
                                'source'] == "display_data":
                            # Don't save plain-text representations of displayed data by default.
                            continue

                if 'data' in result_entry:
                    if 'text/plain' in result_entry['data']:
                        text_data = result_entry['data']['text/plain']

                        if len(text_data) > 0:
                            adr = ActivityDetailRecord(
                                ActivityDetailType.RESULT,
                                show=True
                                if len(text_data) < max_show_len else False,
                                action=ActivityAction.CREATE,
                                importance=max(255 - result_cnt - 100, 0))

                            if len(text_data) <= truncate_at:
                                adr.add_value("text/plain", text_data)
                            else:
                                adr.add_value(
                                    "text/plain", text_data[:truncate_at] +
                                    " ...\n\n <result truncated>")

                            # Set cell data to tag
                            adr.tags = cell.tags
                            result_obj.add_detail_object(adr)

                            result_cnt += 1

        return result_obj
示例#16
0
    def write_readme(self, contents: str) -> None:
        """Method to write a string to the readme file within the repository. Must write ENTIRE document at once.

        Args:
            contents(str): entire readme document in markdown format

        Returns:
            None
        """
        # Validate readme data
        if len(contents) > (1000000 * 5):
            raise ValueError("Readme file is larger than the 5MB limit")

        if type(contents) is not str:
            raise TypeError("Invalid content. Must provide string")

        readme_file = os.path.join(self.root_dir, 'README.md')
        readme_exists = os.path.exists(readme_file)

        # Write file to disk
        with open(readme_file, 'wt') as rf:
            rf.write(contents)

        # Create commit
        if readme_exists:
            commit_msg = f"Updated README file"
            action = ActivityAction.EDIT
        else:
            commit_msg = f"Added README file"
            action = ActivityAction.CREATE

        self.git.add(readme_file)
        commit = self.git.commit(commit_msg)

        # Create detail record
        adr = ActivityDetailRecord(self._default_activity_detail_type,
                                   show=False,
                                   importance=0,
                                   action=action)
        adr.add_value('text/plain', commit_msg)

        # Create activity record
        ar = ActivityRecord(self._default_activity_type,
                            message=commit_msg,
                            show=False,
                            importance=255,
                            linked_commit=commit.hexsha,
                            tags=['readme'])
        ar.add_detail_object(adr)

        # Store
        ars = ActivityStore(self)
        ars.create_activity_record(ar)
示例#17
0
    def unlink_dataset_from_labbook(self, dataset_namespace: str,
                                    dataset_name: str,
                                    labbook: LabBook) -> None:
        """Method to removed a dataset reference from a labbook

        Args:
            dataset_namespace:
            dataset_name:
            labbook:

        Returns:

        """
        submodule_dir = os.path.join('.gigantum', 'datasets',
                                     dataset_namespace, dataset_name)
        call_subprocess(['git', 'rm', '-f', submodule_dir],
                        cwd=labbook.root_dir)

        git_module_dir = os.path.join(labbook.root_dir, '.git', 'modules',
                                      f"{dataset_namespace}&{dataset_name}")
        if os.path.exists(git_module_dir):
            shutil.rmtree(git_module_dir)

        absolute_submodule_dir = os.path.join(labbook.root_dir, '.gigantum',
                                              'datasets', dataset_namespace,
                                              dataset_name)
        if os.path.exists(absolute_submodule_dir):
            shutil.rmtree(absolute_submodule_dir)

        labbook.git.add_all()
        commit = labbook.git.commit("removing submodule ref")

        # Add Activity Record
        adr = ActivityDetailRecord(ActivityDetailType.DATASET,
                                   show=False,
                                   action=ActivityAction.DELETE)
        adr.add_value(
            'text/markdown',
            f"Unlinked Dataset `{dataset_namespace}/{dataset_name}` from project"
        )
        ar = ActivityRecord(
            ActivityType.DATASET,
            message=
            f"Unlinked Dataset {dataset_namespace}/{dataset_name} from project.",
            linked_commit=commit.hexsha,
            tags=["dataset"],
            show=True)
        ar.add_detail_object(adr)
        ars = ActivityStore(labbook)
        ars.create_activity_record(ar)
示例#18
0
    def update_linked_dataset_reference(self, dataset_namespace: str,
                                        dataset_name: str,
                                        labbook: LabBook) -> Dataset:
        """Method to update a linked dataset reference to the latest revision

        Args:
            dataset_namespace: owner (namespace) of the dateset
            dataset_name: name of the dataset
            labbook: labbook instance to which the dataset is linked

        Returns:
            none1
        """
        # Load dataset from inside Project directory
        submodule_dir = os.path.join(labbook.root_dir, '.gigantum', 'datasets',
                                     dataset_namespace, dataset_name)
        ds = self.load_dataset_from_directory(submodule_dir,
                                              author=labbook.author)
        ds.namespace = dataset_namespace

        # Update the submodule reference with the latest changes
        original_revision = ds.git.repo.head.object.hexsha
        ds.git.pull()
        revision = ds.git.repo.head.object.hexsha

        # If the submodule has changed, commit the changes.
        if original_revision != revision:
            labbook.git.add_all()
            commit = labbook.git.commit("Updating submodule ref")

            # Add Activity Record
            adr = ActivityDetailRecord(ActivityDetailType.DATASET,
                                       show=False,
                                       action=ActivityAction.DELETE)
            adr.add_value(
                'text/markdown',
                f"Updated Dataset `{dataset_namespace}/{dataset_name}` link to {revision}"
            )
            msg = f"Updated Dataset `{dataset_namespace}/{dataset_name}` link to version {revision[0:8]}"
            ar = ActivityRecord(ActivityType.DATASET,
                                message=msg,
                                linked_commit=commit.hexsha,
                                tags=["dataset"],
                                show=True)
            ar.add_detail_object(adr)
            ars = ActivityStore(labbook)
            ars.create_activity_record(ar)

        return ds
示例#19
0
    def _update_branch_description(cls, lb: LabBook, description: str):
        # Update the description on branch creation
        lb.description = description
        lb.git.add(lb.config_path)
        commit = lb.git.commit('Updating description')

        adr = ActivityDetailRecord(ActivityDetailType.LABBOOK, show=False)
        adr.add_value('text/plain', description)
        ar = ActivityRecord(ActivityType.LABBOOK,
                            message="Updated description of Project",
                            linked_commit=commit.hexsha,
                            tags=["labbook"],
                            show=False)
        ar.add_detail_object(adr)
        ars = ActivityStore(lb)
        ars.create_activity_record(ar)
示例#20
0
    def _create_user_note(cls, lb, title, body, tags):
        store = ActivityStore(lb)
        adr = ActivityDetailRecord(ActivityDetailType.NOTE,
                                   show=True,
                                   importance=255)
        if body:
            adr.add_value('text/markdown', body)

        ar = ActivityRecord(ActivityType.NOTE,
                            message=title,
                            linked_commit="no-linked-commit",
                            importance=255,
                            tags=tags)
        ar.add_detail_object(adr)
        ar = store.create_activity_record(ar)
        return ar
示例#21
0
    def process(self, result_obj: ActivityRecord, data: List[ExecutionData],
                status: Dict[str, Any], metadata: Dict[str,
                                                       Any]) -> ActivityRecord:
        """Method to update a result object based on code and result data

        Args:
            result_obj(ActivityNote): An object containing the note
            data(list): A list of ExecutionData instances containing the data for this record
            status(dict): A dict containing the result of git status from gitlib
            metadata(str): A dictionary containing Dev Env specific or other developer defined data

        Returns:
            ActivityNote
        """
        supported_image_types = [
            'image/png', 'image/jpeg', 'image/jpg', 'image/gif', 'image/bmp'
        ]

        # If a supported image exists in the result, grab it and create a detail record
        result_cnt = 0
        for cell in data:
            for result_entry in reversed(cell.result):
                if 'data' in result_entry:
                    for mime_type in result_entry['data']:
                        if mime_type in supported_image_types:
                            # You got an image
                            adr_img = ActivityDetailRecord(
                                ActivityDetailType.RESULT,
                                show=True,
                                action=ActivityAction.CREATE,
                                importance=max(255 - result_cnt, 0))

                            adr_img.add_value(mime_type,
                                              result_entry['data'][mime_type])

                            adr_img.tags = cell.tags
                            result_obj.add_detail_object(adr_img)

                            # Set Activity Record Message
                            result_obj.message = "Executed cell in notebook {} and generated a result".format(
                                metadata['path'])

                            result_cnt += 1

        return result_obj
示例#22
0
 def _record_remove_activity(cls, secret_store, filename, lb):
     """Make an activity record for the removal of the secret. """
     lb.git.add(secret_store.secret_path)
     lb.git.commit("Removed entry from secrets registry.")
     commit = lb.git.commit_hash
     adr = ActivityDetailRecord(ActivityDetailType.LABBOOK,
                                show=True,
                                action=ActivityAction.DELETE)
     adr.add_value('text/markdown',
                   f"Removed entry for secrets file {filename}")
     ar = ActivityRecord(
         ActivityType.LABBOOK,
         message=f"Removed entry for secrets file {filename}",
         linked_commit=commit,
         tags=["labbook", "secrets"],
         show=True)
     ar.add_detail_object(adr)
     ars = ActivityStore(lb)
     ars.create_activity_record(ar)
示例#23
0
 def _record_insert_activity(cls, secret_store, filename, lb, mount_path):
     """Make an activity record for the insertion of the secret. """
     lb.git.add(secret_store.secret_path)
     lb.git.commit("Updated secrets registry.")
     commit = lb.git.commit_hash
     adr = ActivityDetailRecord(ActivityDetailType.LABBOOK,
                                show=True,
                                action=ActivityAction.CREATE)
     adr.add_value(
         'text/markdown', f"Created new entry for secrets file {filename}"
         f"to map to {mount_path}")
     ar = ActivityRecord(
         ActivityType.LABBOOK,
         message=f"Created entry for secrets file {filename}",
         linked_commit=commit,
         tags=["labbook", "secrets"],
         show=True)
     ar.add_detail_object(adr)
     ars = ActivityStore(lb)
     ars.create_activity_record(ar)
示例#24
0
    def process(self, result_obj: ActivityRecord, data: List[ExecutionData],
                status: Dict[str, Any], metadata: Dict[str,
                                                       Any]) -> ActivityRecord:
        """Method to update a result object based on code and result data

        Args:
            result_obj(ActivityNote): An object containing the note
            data(list): A list of ExecutionData instances containing the data for this record
            status(dict): A dict containing the result of git status from gitlib
            metadata(str): A dictionary containing Dev Env specific or other developer defined data

        Returns:
            ActivityRecord
        """
        # If there was some code, assume a cell was executed
        result_cnt = 0
        for cell_cnt, cell in enumerate(data):
            for result_entry in reversed(cell.code):
                if result_entry.get('code'):
                    # Create detail record to capture executed code
                    adr_code = ActivityDetailRecord(
                        ActivityDetailType.CODE_EXECUTED,
                        show=False,
                        action=ActivityAction.EXECUTE,
                        importance=max(255 - result_cnt, 0))

                    adr_code.add_value(
                        'text/markdown',
                        f"```\n{result_entry.get('code')}\n```")
                    adr_code.tags = cell.tags

                    result_obj.add_detail_object(adr_code)

                    result_cnt += 1

        # Set Activity Record Message
        cell_str = f"{cell_cnt} cells" if cell_cnt > 1 else "cell"
        result_obj.message = f"Executed {cell_str} in notebook {metadata['path']}"

        return result_obj
示例#25
0
    def mutate_and_get_payload(cls,
                               root,
                               info,
                               name,
                               description,
                               repository,
                               base_id,
                               revision,
                               is_untracked=False,
                               client_mutation_id=None):
        username = get_logged_in_username()
        inv_manager = InventoryManager()
        lb = inv_manager.create_labbook(username=username,
                                        owner=username,
                                        labbook_name=name,
                                        description=description,
                                        author=get_logged_in_author())

        adr = ActivityDetailRecord(ActivityDetailType.LABBOOK,
                                   show=False,
                                   importance=0)
        adr.add_value('text/plain', f"Created new LabBook: {username}/{name}")

        # Create activity record
        ar = ActivityRecord(ActivityType.LABBOOK,
                            message=f"Created new LabBook: {username}/{name}",
                            show=True,
                            importance=255,
                            linked_commit=lb.git.commit_hash)
        ar.add_detail_object(adr)

        store = ActivityStore(lb)
        store.create_activity_record(ar)

        cm = ComponentManager(lb)
        cm.add_base(repository, base_id, revision)

        return CreateLabbook(labbook=Labbook(owner=username, name=lb.name))
示例#26
0
    def _make_move_activity_record(cls, labbook: LabBook, section: str,
                                   dst_abs_path: str, commit_msg: str) -> None:
        if os.path.isdir(dst_abs_path):
            labbook.git.add_all(dst_abs_path)
        else:
            labbook.git.add(dst_abs_path)

        commit = labbook.git.commit(commit_msg)
        activity_type, activity_detail_type, section_str = labbook.get_activity_type_from_section(
            section)
        adr = ActivityDetailRecord(activity_detail_type,
                                   show=False,
                                   importance=0,
                                   action=ActivityAction.EDIT)
        adr.add_value('text/markdown', commit_msg)
        ar = ActivityRecord(activity_type,
                            message=commit_msg,
                            linked_commit=commit.hexsha,
                            show=True,
                            importance=255,
                            tags=['file-move'])
        ar.add_detail_object(adr)
        ars = ActivityStore(labbook)
        ars.create_activity_record(ar)
示例#27
0
    def process_sweep_status(self, result_obj: ActivityRecord,
                             status: Dict[str, Any]) -> Tuple[ActivityRecord, int, int, int]:
        sections = []
        ncnt = 0
        for filename in status['untracked']:
            # skip any file in .git or .gigantum dirs
            if ".git" in filename or ".gigantum" in filename:
                continue
            activity_type, activity_detail_type, section = self.infer_section_from_relative_path(filename)
            adr = ActivityDetailRecord(activity_detail_type, show=False, importance=max(255 - ncnt, 0),
                                       action=ActivityAction.CREATE)
            sections.append(section)
            if section == self._default_activity_section:
                msg = f"Created new file `{filename}` in the {self._default_activity_section}."
                msg = f"{msg}Note, it is best practice to use the Code, Input, and Output sections exclusively."
            else:
                msg = f"Created new {section} file `{filename}`"
            adr.add_value('text/markdown', msg)
            result_obj.add_detail_object(adr)
            ncnt += 1

        # If all modifications were of same section
        new_section_set = set(sections)
        if ncnt > 0 and len(new_section_set) == 1:
            if "Code" in new_section_set:
                result_obj.type = ActivityType.CODE
            elif "Input Data" in new_section_set:
                result_obj.type = ActivityType.INPUT_DATA
            elif "Output Data" in new_section_set:
                result_obj.type = ActivityType.OUTPUT_DATA

        mcnt = 0
        dcnt = 0
        msections = []
        changes = status['unstaged']
        changes.extend(status['staged'])
        for filename, change in changes:
            # skip any file in .git or .gigantum dirs
            if (".git" in filename and ".gitkeep" not in filename) or ".gigantum" in filename:
                continue

            activity_type, activity_detail_type, section = self.infer_section_from_relative_path(filename)
            msections.append(section)

            if change == "deleted":
                action = ActivityAction.DELETE
                dcnt += 1
            elif change == "added":
                action = ActivityAction.CREATE
                mcnt += 1
            elif change == "modified":
                action = ActivityAction.EDIT
                mcnt += 1
            elif change == "renamed":
                action = ActivityAction.EDIT
                mcnt += 1
            else:
                action = ActivityAction.NOACTION
                mcnt += 1

            adr = ActivityDetailRecord(activity_detail_type, show=False, importance=max(255 - mcnt, 0), action=action)
            if ".gitkeep" in filename:
                directory_name, _ = filename.split('.gitkeep')
                adr.add_value('text/markdown', f"{change[0].upper() + change[1:]} {section} directory `{directory_name}`")
            else:
                adr.add_value('text/markdown', f"{change[0].upper() + change[1:]} {section} file `{filename}`")
            result_obj.add_detail_object(adr)

        modified_section_set = set(msections)
        if result_obj.type == self._default_activity_type:
            # If new files are from different sections or no new files, you'll still be LABBOOK or DATASET type
            if (mcnt+dcnt) > 0 and len(modified_section_set) == 1:
                # If there have been modified files and they are all from the same section
                if len(new_section_set) == 0 or new_section_set == modified_section_set:
                    # If there have been only modified files from a single section,
                    # or new files are from the same section
                    if "Code" in modified_section_set:
                        result_obj.type = ActivityType.CODE
                    elif "Input Data" in modified_section_set:
                        result_obj.type = ActivityType.INPUT_DATA
                    elif "Output Data" in modified_section_set:
                        result_obj.type = ActivityType.OUTPUT_DATA
        elif (mcnt+dcnt) > 0:
            if len(modified_section_set) > 1 or new_section_set != modified_section_set:
                # Mismatch between new and modify or within modify, just use catchall LABBOOK or DATASET type
                result_obj.type = self._default_activity_type

        # Return additionally new file cnt (ncnt) and modified (mcnt)
        return result_obj, ncnt, mcnt, dcnt
示例#28
0
    def create_dataset(self,
                       username: str,
                       owner: str,
                       dataset_name: str,
                       storage_type: str,
                       description: Optional[str] = None,
                       author: Optional[GitAuthor] = None) -> Dataset:
        """Create a new Dataset in this Gigantum working directory.

        Args:
            username: Active username
            owner: Namespace in which to place this Dataset
            dataset_name: Name of the Dataset
            storage_type: String identifying the type of Dataset to instantiate
            description: Optional brief description of Dataset
            author: Optional Git Author

        Returns:
            Newly created LabBook instance

        """
        dataset = Dataset(config_file=self.config_file,
                          author=author,
                          namespace=owner)

        if storage_type not in storage.SUPPORTED_STORAGE_BACKENDS:
            raise ValueError(
                f"Unsupported Dataset storage type: {storage_type}")

        try:
            build_info = Configuration(self.config_file).config['build_info']
        except KeyError:
            logger.warning("Could not obtain build_info from config")
            build_info = None

        # Build data file contents
        dataset._data = {
            "schema": DATASET_CURRENT_SCHEMA,
            "id": uuid.uuid4().hex,
            "name": dataset_name,
            "storage_type": storage_type,
            "description": description or '',
            "created_on": datetime.datetime.utcnow().isoformat(),
            "build_info": build_info
        }
        dataset._validate_gigantum_data()

        logger.info("Creating new Dataset on disk for {}/{}/{}".format(
            username, owner, dataset_name))
        # lock while creating initial directory
        with dataset.lock(
                lock_key=f"new_dataset_lock|{username}|{owner}|{dataset_name}"
        ):
            # Verify or Create user subdirectory
            # Make sure you expand a user dir string
            starting_dir = os.path.expanduser(
                dataset.client_config.config["git"]["working_directory"])
            user_dir = os.path.join(starting_dir, username)
            if not os.path.isdir(user_dir):
                os.makedirs(user_dir)

            # Create owner dir - store LabBooks in working dir > logged in user > owner
            owner_dir = os.path.join(user_dir, owner)
            if not os.path.isdir(owner_dir):
                os.makedirs(owner_dir)

                # Create `datasets` subdir in the owner dir
                owner_dir = os.path.join(owner_dir, "datasets")
            else:
                owner_dir = os.path.join(owner_dir, "datasets")

            # Verify name not already in use
            if os.path.isdir(os.path.join(owner_dir, dataset_name)):
                raise ValueError(
                    f"Dataset `{dataset_name}` already exists locally. Choose a new Dataset name"
                )

            # Create Dataset subdirectory
            new_root_dir = os.path.join(owner_dir, dataset_name)
            os.makedirs(new_root_dir)
            dataset._set_root_dir(new_root_dir)

            # Init repository
            dataset.git.initialize()

            # Create Directory Structure
            dirs = [
                'manifest', 'metadata', '.gigantum',
                os.path.join('.gigantum', 'activity'),
                os.path.join('.gigantum', 'activity', 'log')
            ]

            # Create .gitignore default file
            shutil.copyfile(
                os.path.join(resource_filename('gtmcore', 'dataset'),
                             'gitignore.default'),
                os.path.join(dataset.root_dir, ".gitignore"))

            for d in dirs:
                p = os.path.join(dataset.root_dir, d, '.gitkeep')
                os.makedirs(os.path.dirname(p), exist_ok=True)
                with open(p, 'w') as gk:
                    gk.write(
                        "This file is necessary to keep this directory tracked by Git"
                        " and archivable by compression tools. Do not delete or modify!"
                    )

            dataset._save_gigantum_data()

            # Create an empty storage.json file
            dataset.backend_config = {}

            # Commit
            dataset.git.add_all()

            # NOTE: this string is used to indicate there are no more activity records to get. Changing the string will
            # break activity paging.
            # TODO: Improve method for detecting the first activity record
            dataset.git.commit(f"Creating new empty Dataset: {dataset_name}")

            # Create Activity Record
            adr = ActivityDetailRecord(ActivityDetailType.DATASET,
                                       show=False,
                                       importance=0)
            adr.add_value('text/plain',
                          f"Created new Dataset: {username}/{dataset_name}")
            ar = ActivityRecord(
                ActivityType.DATASET,
                message=f"Created new Dataset: {username}/{dataset_name}",
                show=True,
                importance=255,
                linked_commit=dataset.git.commit_hash)
            ar.add_detail_object(adr)
            store = ActivityStore(dataset)
            store.create_activity_record(ar)

            # Initialize file cache and link revision
            m = Manifest(dataset, username)
            m.link_revision()

            return dataset
    def test_get_recent_activity(self, fixture_working_dir, snapshot,
                                 fixture_test_file):
        """Test paging through activity records"""
        im = InventoryManager(fixture_working_dir[0])
        lb = im.create_labbook("default",
                               "default",
                               "labbook11",
                               description="my test description",
                               author=GitAuthor(name="tester",
                                                email="*****@*****.**"))

        FileOperations.insert_file(lb, "code", fixture_test_file)

        # fake activity
        store = ActivityStore(lb)
        adr1 = ActivityDetailRecord(ActivityDetailType.CODE)
        adr1.show = False
        adr1.importance = 100
        adr1.add_value("text/plain", "first")

        ar = ActivityRecord(ActivityType.CODE,
                            show=False,
                            message="ran some code",
                            importance=50,
                            linked_commit="asdf")

        ar.add_detail_object(adr1)

        # Create Activity Record
        store.create_activity_record(ar)
        store.create_activity_record(ar)
        store.create_activity_record(ar)
        store.create_activity_record(ar)
        open('/tmp/test_file.txt', 'w').write("xxx" * 50)
        FileOperations.insert_file(lb, "input", '/tmp/test_file.txt')
        FileOperations.makedir(lb, "input/test")
        open('/tmp/test_file.txt', 'w').write("xxx" * 50)
        FileOperations.insert_file(lb, "input", '/tmp/test_file.txt', "test")
        FileOperations.makedir(lb, "input/test2")
        open('/tmp/test_file.txt', 'w').write("xxx" * 50)
        FileOperations.insert_file(lb, "input", '/tmp/test_file.txt', "test2")
        store.create_activity_record(ar)
        store.create_activity_record(ar)
        store.create_activity_record(ar)
        store.create_activity_record(ar)
        store.create_activity_record(ar)
        open('/tmp/test_file.txt', 'w').write("xxx" * 50)
        FileOperations.insert_file(lb, "output", '/tmp/test_file.txt')

        # Get all records at once with no pagination args and verify cursors look OK directly
        query = """
                    {
                      labbook(owner: "default", name: "labbook11") {
                        overview {
                          recentActivity {
                            message
                            type
                            show
                            importance
                            tags
                          }
                        }
                      }
                    }
                    """
        snapshot.assert_match(fixture_working_dir[2].execute(query))
示例#30
0
    def link_dataset_to_labbook(self, dataset_url: str, dataset_namespace: str,
                                dataset_name: str,
                                labbook: LabBook) -> Dataset:
        """

        Args:
            dataset_url:
            dataset_namespace:
            dataset_name:
            labbook:

        Returns:

        """
        def _clean_submodule():
            """Helper method to clean a submodule reference from a repository"""
            if os.path.exists(absolute_submodule_dir):
                logger.warning(
                    f"Cleaning {relative_submodule_dir} from parent git repo")
                try:
                    call_subprocess([
                        'git', 'rm', '-f', '--cached', relative_submodule_dir
                    ],
                                    cwd=labbook.root_dir)
                except subprocess.CalledProcessError:
                    logger.warning(
                        f"git rm on {relative_submodule_dir} failed. Continuing..."
                    )
                    pass

            if os.path.exists(absolute_submodule_dir):
                logger.warning(f"Removing {absolute_submodule_dir} directory")
                shutil.rmtree(absolute_submodule_dir)

            if os.path.exists(git_module_dir):
                logger.warning(f"Removing {git_module_dir} directory")
                shutil.rmtree(git_module_dir)

        relative_submodule_dir = os.path.join('.gigantum', 'datasets',
                                              dataset_namespace, dataset_name)
        absolute_submodule_dir = os.path.join(labbook.root_dir,
                                              relative_submodule_dir)
        absolute_submodule_root = os.path.join(labbook.root_dir, '.gigantum',
                                               'datasets', dataset_namespace)
        git_module_dir = os.path.join(labbook.root_dir, '.git', 'modules',
                                      f"{dataset_namespace}&{dataset_name}")

        if not os.path.exists(absolute_submodule_root):
            pathlib.Path(absolute_submodule_root).mkdir(parents=True,
                                                        exist_ok=True)

        if os.path.exists(absolute_submodule_dir) and os.path.exists(
                git_module_dir):
            # Seem to be trying to link a dataset after a reset removed the dataset. Clean up first.
            _clean_submodule()

        try:
            # Link dataset via submodule reference
            call_subprocess([
                'git', 'submodule', 'add', '--name',
                f"{dataset_namespace}&{dataset_name}", dataset_url,
                relative_submodule_dir
            ],
                            cwd=labbook.root_dir)

        except subprocess.CalledProcessError:
            logger.warning(
                "Failed to link dataset. Attempting to repair repository and link again."
            )
            _clean_submodule()

            # Try to add again 1 more time, allowing a failure to raise an exception
            call_subprocess([
                'git', 'submodule', 'add', '--name',
                f"{dataset_namespace}&{dataset_name}", dataset_url,
                relative_submodule_dir
            ],
                            cwd=labbook.root_dir)

            # If you got here, repair worked and link OK
            logger.info("Repository repair and linking retry successful.")

        labbook.git.add_all()
        commit = labbook.git.commit(
            f"adding submodule ref to link dataset {dataset_namespace}/{dataset_name}"
        )
        labbook.git.update_submodules(init=True)

        ds = self.load_dataset_from_directory(absolute_submodule_dir)
        dataset_revision = ds.git.repo.head.commit.hexsha

        # Add Activity Record
        adr = ActivityDetailRecord(ActivityDetailType.DATASET,
                                   show=False,
                                   action=ActivityAction.CREATE)
        adr.add_value(
            'text/markdown',
            f"Linked Dataset `{dataset_namespace}/{dataset_name}` to "
            f"project at revision `{dataset_revision}`")
        ar = ActivityRecord(
            ActivityType.DATASET,
            message=
            f"Linked Dataset {dataset_namespace}/{dataset_name} to project.",
            linked_commit=commit.hexsha,
            tags=["dataset"],
            show=True)
        ar.add_detail_object(adr)
        ars = ActivityStore(labbook)
        ars.create_activity_record(ar)

        return ds