示例#1
0
def clone(repository_url, folder=None, track=False):
    """This command will clone minimal configuration files from repository-url with valid .ml-git/config.yaml,
    then initialize the metadata according to configurations.

    Example:
        clone('https://[email protected]/mlgit-repository')

    Args:
        repository_url (str): The git repository that will be cloned.
        folder (str, optional): Directory that can be created to execute the clone command [default: current path].
        track (bool, optional): Set if the tracking of the cloned repository should be kept [default: False].

    """

    repo = Repository(config_load(), 'project')
    if folder is not None:
        repo.clone_config(repository_url, folder, track)
    else:
        current_directory = os.getcwd()
        with tempfile.TemporaryDirectory(dir=current_directory) as tempdir:
            mlgit_path = os.path.join(tempdir, 'mlgit')
            repo.clone_config(repository_url, mlgit_path, track)
            if not os.path.exists(os.path.join(current_directory, '.ml-git')):
                shutil.move(os.path.join(mlgit_path, '.ml-git'),
                            current_directory)
            os.chdir(current_directory)
示例#2
0
def get_repository_instance(repo_type):
    project_repo_type = 'project'
    if repo_type not in EntityType.to_list(
    ) and repo_type != project_repo_type:
        raise RuntimeError(output_messages['ERROR_INVALID_ENTITY_TYPE'] %
                           EntityType.to_list())
    return Repository(config_load(), repo_type)
示例#3
0
def create(entity, entity_name, categories, mutability, **kwargs):
    """This command will create the workspace structure with data and spec file for an entity and set the store configurations.

        Example:
            create('dataset', 'dataset-ex', categories=['computer-vision', 'images'], mutability='strict')

        Args:
            entity (str): The type of an ML entity. (dataset, labels or model).
            entity_name (str): An ml-git entity name to identify a ML entity.
            categories (list): Artifact's category name.
            mutability (str): Mutability type. The mutability options are strict, flexible and mutable.
            store_type (str, optional): Data store type [default: s3h].
            version (int, optional): Number of retries to upload the files to the storage [default: 2].
            import_path (str, optional): Path to be imported to the project.
            bucket_name (str, optional): Bucket name.
            import_url (str, optional): Import data from a google drive url.
            credentials_path (str, optional): Directory of credentials.json.
            unzip (bool, optional): Unzip imported zipped files [default: False].
    """

    args = {'artifact_name': entity_name, 'category': categories, 'mutability': mutability,
            'version_number': kwargs.get('version', 1), 'import': kwargs.get('import_path', None),
            'store_type':  kwargs.get('store_type', StoreType.S3H.value),
            'bucket_name': kwargs.get('bucket_name', None), 'unzip': kwargs.get('unzip', False),
            'import_url': kwargs.get('import_url', None), 'credentials_path': kwargs.get('credentials_path', None),
            'wizard_config': False}

    repo = Repository(config_load(), entity)
    repo.create(args)
示例#4
0
 def test_paths(self):
     config = config_load()
     self.assertTrue(len(get_index_path(config)) > 0)
     self.assertTrue(len(get_objects_path(config)) > 0)
     self.assertTrue(len(get_cache_path(config)) > 0)
     self.assertTrue(len(get_metadata_path(config)) > 0)
     self.assertTrue('.ml-git' in get_refs_path(config))
示例#5
0
def config(**kwargs):
    config_file = config_load()
    if kwargs['global']:
        config_file = global_config_load()
    elif kwargs['local']:
        config_file = mlgit_config_load()
    print('config:')
    pprint(config_file)
示例#6
0
 def test_init_refs(self):
     config = config_load()
     spec_path = 'dataset-ex'
     ml_dir = os.path.join(self.tmp_dir, config['mlgit_path'])
     os.mkdir(ml_dir)
     refs_dir = os.path.join(ml_dir, 'dataset', 'refs')
     refs = Refs(refs_dir, spec_path, 'dataset')
     self.assertIsNotNone(refs)
     self.assertTrue(os.path.exists(os.path.join(refs_dir, spec_path)))
示例#7
0
 def repo_remote_add(self, repo_type, mlgit_remote, global_conf=False):
     try:
         remote_add(repo_type, mlgit_remote, global_conf)
         self.__config = config_load()
         metadata_path = get_metadata_path(self.__config)
         m = Metadata('', metadata_path, self.__config, self.__repo_type)
         m.remote_set_url(mlgit_remote)
     except Exception as e:
         log.error(e, class_name=REPOSITORY_CLASS_NAME)
         return
示例#8
0
 def test_get_batch_size(self):
     config = config_load()
     batch_size = get_batch_size(config)
     self.assertEqual(batch_size, BATCH_SIZE_VALUE)
     config[BATCH_SIZE] = 0
     self.assertRaises(Exception, lambda: get_batch_size(config))
     config[BATCH_SIZE] = 'string'
     self.assertRaises(Exception, lambda: get_batch_size(config))
     del config[BATCH_SIZE]
     batch_size = get_batch_size(config)
     self.assertEqual(batch_size, BATCH_SIZE_VALUE)
示例#9
0
def remote_add(entity, remote_url, global_configuration=False):
    """This command will add a remote to store the metadata from this ml-git project.

        Examples:
            remote_add('dataset', 'https://[email protected]/mlgit-datasets')

        Args:
            entity (str): The type of an ML entity. (repository, dataset, labels or model).
            remote_url(str): URL of an existing remote git repository.
            global_configuration (bool, optional): Use this option to set configuration at global level [default: False].
    """

    repo = Repository(config_load(), entity)
    repo.repo_remote_add(entity, remote_url, global_configuration)
示例#10
0
 def test_head(self):
     config = config_load()
     spec_path = 'dataset-ex'
     ml_dir = os.path.join(self.tmp_dir, config['mlgit_path'])
     os.mkdir(ml_dir)
     refs_dir = os.path.join(ml_dir, 'dataset', 'refs')
     refs = Refs(refs_dir, spec_path)
     sha = 'b569b7e4cd82206b451315123669057ef5f1ac3b'
     tag = 'images__dataset_ex__1'
     refs.update_head(tag, sha)
     head = os.path.join(refs_dir, spec_path, 'HEAD')
     self.assertEqual((tag, sha), refs.head())
     os.remove(head)
     self.assertEqual((None, None), refs.head())
示例#11
0
 def test_update_head(self):
     config = config_load()
     spec_path = 'dataset-ex'
     ml_dir = os.path.join(self.tmp_dir, config['mlgit_path'])
     os.mkdir(ml_dir)
     refs_dir = os.path.join(ml_dir, 'dataset', 'refs')
     refs = Refs(refs_dir, spec_path)
     sha = 'b569b7e4cd82206b451315123669057ef5f1ac3b'
     tag = 'images__dataset_ex__1'
     refs.update_head(tag, sha)
     head = os.path.join(refs_dir, spec_path, 'HEAD')
     self.assertTrue(os.path.exists(head))
     yaml = yaml_load(head)
     self.assertEqual(yaml[tag], sha)
示例#12
0
def push(entity, entity_name, retries=2, clear_on_fail=False):
    """This command allows pushing the data of a specific version of an ML entity.

        Example:
            push('dataset', 'dataset-ex')

        Args:
            entity (str): The type of an ML entity. (dataset, labels or model).
            entity_name (str): An ml-git entity name to identify a ML entity.
            retries (int, optional): Number of retries to upload the files to the storage [default: 2].
            clear_on_fail (bool, optional): Remove the files from the store in case of failure during the push operation [default: False].
    """

    repo = Repository(config_load(), entity)
    repo.push(entity_name, retries, clear_on_fail)
示例#13
0
 def __init_manager(self, type_entity):
     try:
         get_root_path()
         config = config_load()
         if not config[type_entity]['git']:
             log.warn(
                 output_messages['WARN_REPOSITORY_NOT_FOUND_FOR_ENTITY'] %
                 type_entity,
                 class_name=LocalEntityManager.__name__)
             return
         self._manager = MetadataManager(config, repo_type=type_entity)
         if not self._manager.check_exists():
             self._manager.init()
     except Exception as e:
         log.error(e, class_name=LocalEntityManager.__name__)
示例#14
0
def add(entity_type, entity_name, bumpversion=False, fsck=False, file_path=[]):
    """This command will add all the files under the directory into the ml-git index/staging area.

    Example:
        add('dataset', 'dataset-ex', bumpversion=True)

    Args:
        entity_type (str): The type of an ML entity. (dataset, labels or model)
        entity_name (str): The name of the ML entity you want to add the files.
        bumpversion (bool, optional): Increment the entity version number when adding more files [default: False].
        fsck (bool, optional): Run fsck after command execution [default: False].
        file_path (list, optional): List of files that must be added by the command [default: all files].
    """

    repo = Repository(config_load(), entity_type)
    repo.add(entity_name, file_path, bumpversion, fsck)
示例#15
0
def init(entity):
    """This command will start the ml-git entity.

        Examples:
            init('repository')
            init('dataset')

        Args:
            entity (str): The type of entity that will be initialized (repository, dataset, labels or model).
    """

    if entity == 'repository':
        init_mlgit()
    elif entity in EntityType.to_list():
        repo = Repository(config_load(), entity)
        repo.init()
    else:
        log.error('The type of entity entered is invalid. Valid types are: [repository, dataset, labels or model]')
示例#16
0
def checkout(entity, tag, sampling=None, retries=2, force=False, dataset=False, labels=False, version=-1):
    """This command allows retrieving the data of a specific version of an ML entity.

    Example:
        checkout('dataset', 'computer-vision__images3__imagenet__1')

    Args:
        entity (str): The type of an ML entity. (dataset, labels or model)
        tag (str): An ml-git tag to identify a specific version of an ML entity.
        sampling (dict): group: <amount>:<group> The group sample option consists of amount and group used to
                                 download a sample.\n
                         range: <start:stop:step> The range sample option consists of start, stop and step used
                                to download a sample. The start parameter can be equal or greater than zero. The
                                stop parameter can be 'all', -1 or any integer above zero.\n
                         random: <amount:frequency> The random sample option consists of amount and frequency
                                used to download a sample.
                         seed: The seed is used to initialize the pseudorandom numbers.
        retries (int, optional): Number of retries to download the files from the storage [default: 2].
        force (bool, optional): Force checkout command to delete untracked/uncommitted files from the local repository [default: False].
        dataset (bool, optional): If exist a dataset related with the model or labels, this one must be downloaded [default: False].
        labels (bool, optional): If exist labels related with the model, they must be downloaded [default: False].

    Returns:
        str: Return the path where the data was checked out.

    """

    repo = Repository(config_load(), entity)
    repo.update()
    if sampling is not None and not validate_sample(sampling):
        return None
    options = {}
    options['with_dataset'] = dataset
    options['with_labels'] = labels
    options['retry'] = retries
    options['force'] = force
    options['bare'] = False
    options['version'] = version
    repo.checkout(tag, sampling, options)

    data_path = os.path.join(entity, *tag.split('__')[:-1])
    if not os.path.exists(data_path):
        data_path = None
    return data_path
示例#17
0
文件: utils.py 项目: tspthomas/ml-git
def init_repository(entity_type=DATASETS):
    return Repository(config_load(), entity_type)
示例#18
0
def init_repository(entity_type='dataset'):
    return Repository(config_load(), entity_type)
示例#19
0
def restore_config():
    config = config_load()
    config_cp = deepcopy(config)
    yield
    for key in config_cp.keys():
        config[key] = config_cp[key]
示例#20
0
def get_repository_instance(repo_type):
    return Repository(config_load(), repo_type)
示例#21
0
            index_path = get_index_path(self.__config, repo_type)

            log_info = metadata.get_log_info(spec, fullstat)

        except Exception as e:
            log.error(e, class_name=REPOSITORY_CLASS_NAME)
            return
        fidx = FullIndex(spec, index_path)
        if stat or fullstat:
            workspace_size = fidx.get_total_size()

            amount_message = 'Total of files: %s' % fidx.get_total_count()
            size_message = 'Workspace size: %s' % size(workspace_size,
                                                       system=alternative)

            workspace_info = '------------------------------------------------- \n{}\t{}' \
                .format(amount_message, size_message)

            log_info = '{}\n{}'.format(log_info, workspace_info)

        log.info(log_info, class_name=REPOSITORY_CLASS_NAME)


if __name__ == '__main__':
    config = config_load()
    r = Repository(config)
    r.init()
    r.add('dataset-ex')
    r.commit('dataset-ex')
    r.status('dataset-ex')
示例#22
0
def init_repository(entity_type=DATASETS):
    return Repository(config_load(hide_logs=True), entity_type)
示例#23
0
 def clone_config(self, url, folder=None, track=False):
     if clone_config_repository(url, folder, track):
         self.__config = config_load()
         m = Metadata('', get_metadata_path(self.__config), self.__config)
         m.clone_config_repo()