Пример #1
0
    def test_independent_experiment_creation_with_run_triggers_experiment_building_scheduling(self):
        config = ExperimentSpecification.read(exec_experiment_spec_content)
        # Create a repo for the project
        repo = RepoFactory()

        with patch('scheduler.tasks.experiments.experiments_build.apply_async') as mock_build:
            experiment = ExperimentFactory(config=config.parsed_data, project=repo.project)

        assert mock_build.call_count == 1
        assert experiment.project.repo is not None
        assert experiment.is_independent is True

        assert ExperimentStatus.objects.filter(experiment=experiment).count() == 1
        assert list(ExperimentStatus.objects.filter(experiment=experiment).values_list(
            'status', flat=True)) == [ExperimentLifeCycle.CREATED]

        with patch('dockerizer.builders.experiments.build_experiment') as mock_build:
            build_experiment(experiment_id=experiment.id)

        assert mock_build.call_count == 1
        assert ExperimentStatus.objects.filter(experiment=experiment).count() == 4
        assert list(ExperimentStatus.objects.filter(experiment=experiment).values_list(
            'status', flat=True)) == [ExperimentLifeCycle.CREATED,
                                      ExperimentLifeCycle.BUILDING,
                                      ExperimentLifeCycle.SCHEDULED,
                                      ExperimentLifeCycle.FAILED]
        experiment.refresh_from_db()
        assert experiment.last_status == ExperimentLifeCycle.FAILED
Пример #2
0
    def test_create_experiment_with_resources_spec(self, spawner_mock):
        config = ExperimentSpecification.read(exec_experiment_resources_content)
        mock_instance = spawner_mock.return_value
        mock_instance.start_experiment.return_value = start_experiment_value
        mock_instance.spec = config

        experiment = ExperimentFactory(config=config.parsed_data)
        assert experiment.is_independent is True

        assert ExperimentStatus.objects.filter(experiment=experiment).count() == 3
        assert list(ExperimentStatus.objects.filter(experiment=experiment).values_list(
            'status', flat=True)) == [ExperimentLifeCycle.CREATED,
                                      ExperimentLifeCycle.SCHEDULED,
                                      ExperimentLifeCycle.STARTING]

        experiment.refresh_from_db()
        assert experiment.last_status == ExperimentLifeCycle.STARTING

        # Assert 3 jobs were created with resources
        assert ExperimentJob.objects.filter(experiment=experiment).count() == 3
        assert JobResources.objects.count() == 3
        jobs_statuses = ExperimentJob.objects.values_list('statuses__status', flat=True)
        assert set(jobs_statuses) == {JobLifeCycle.CREATED, }
        jobs = ExperimentJob.objects.filter(experiment=experiment)
        assert experiment.calculated_status == ExperimentLifeCycle.STARTING

        for job in jobs:
            # Assert the jobs status is created
            assert job.last_status == JobLifeCycle.CREATED
Пример #3
0
    def test_set_metrics(self):
        config = ExperimentSpecification.read(experiment_spec_content)
        experiment = ExperimentFactory(config=config.parsed_data)
        assert experiment.metrics.count() == 0

        create_at = timezone.now()
        experiments_set_metrics(experiment_id=experiment.id,
                                data={
                                    'created_at': create_at,
                                    'values': {
                                        'accuracy': 0.9,
                                        'precision': 0.9
                                    }
                                })

        assert experiment.metrics.count() == 1

        experiments_set_metrics(experiment_id=experiment.id,
                                data=[{
                                    'created_at': create_at,
                                    'values': {
                                        'accuracy': 0.9,
                                        'precision': 0.9
                                    }
                                }, {
                                    'created_at': create_at,
                                    'values': {
                                        'accuracy': 0.9,
                                        'precision': 0.9
                                    }
                                }])

        assert experiment.metrics.count() == 3
Пример #4
0
def experiments_stop(project_name,
                     project_uuid,
                     experiment_name,
                     experiment_group_name,
                     experiment_group_uuid,
                     experiment_uuid,
                     specification,
                     update_status=True):
    specification = ExperimentSpecification.read(specification)
    experiment_scheduler.stop_experiment(
        project_name=project_name,
        project_uuid=project_uuid,
        experiment_name=experiment_name,
        experiment_group_name=experiment_group_name,
        experiment_group_uuid=experiment_group_uuid,
        experiment_uuid=experiment_uuid,
        specification=specification,
    )

    if not update_status:
        return

    experiment = get_valid_experiment(experiment_uuid=experiment_uuid)
    if not experiment:
        _logger.info(
            'Something went wrong, '
            'the Experiment `%s` does not exist anymore.', experiment_uuid)
        return

    # Update experiment status to show that its stopped
    experiment.set_status(ExperimentLifeCycle.STOPPED,
                          message='Experiment was stopped')
Пример #5
0
    def test_serialize_with_environment_section(self):
        spec_content = """---
            version: 1

            kind: experiment

            environment:
              resources:
                cpu:
                  requests: 2
                  limits: 4
                memory:
                  requests: 4096
                  limits: 10240
              pytorch:
                n_workers: 2
                default_worker:
                  resources:
                    cpu:
                      requests: 2
                      limits: 4
                    memory:
                      requests: 4096
                      limits: 10240

            run:
              image: my_image
              cmd: video_prediction_train --model=DNA --num_masks=1
        """
        spec = ExperimentSpecification.read(spec_content)

        obj = self.factory_class(config=spec.parsed_data)
        serializer = self.serializer_class(obj)
        data = serializer.data
        assert 'resources' in data
Пример #6
0
    def test_create_experiment_with_resources_spec(self, spawner_mock):
        config = ExperimentSpecification.read(exec_experiment_resources_content)
        mock_instance = spawner_mock.return_value
        mock_instance.start_experiment.return_value = start_experiment_value
        mock_instance.job_uuids = {'master': ['fa6203c189a855dd977019854a7ffcc3'],
                                   'worker': ['3a9c9b0bd56b5e9fbdbd1a3d43d57960'],
                                   'ps': ['59e3601232b85a3d8be2511f23a62945']}
        mock_instance.spec = config

        experiment = ExperimentFactory(config=config.parsed_data)
        assert experiment.is_independent is True

        assert ExperimentStatus.objects.filter(experiment=experiment).count() == 3
        assert list(ExperimentStatus.objects.filter(experiment=experiment).values_list(
            'status', flat=True)) == [ExperimentLifeCycle.CREATED,
                                      ExperimentLifeCycle.SCHEDULED,
                                      ExperimentLifeCycle.STARTING]

        experiment.refresh_from_db()
        assert experiment.last_status == ExperimentLifeCycle.STARTING

        # Assert 3 jobs were created with resources
        assert ExperimentJob.objects.filter(experiment=experiment).count() == 3
        assert JobResources.objects.count() == 3
        jobs_statuses = ExperimentJob.objects.values_list('statuses__status', flat=True)
        assert set(jobs_statuses) == {JobLifeCycle.CREATED, }
        jobs = ExperimentJob.objects.filter(experiment=experiment)
        assert experiment.calculated_status == ExperimentLifeCycle.STARTING

        for job in jobs:
            # Assert the jobs status is created
            assert job.last_status == JobLifeCycle.CREATED
Пример #7
0
    def test_independent_experiment_creation_with_run_triggers_experiment_scheduling(self):
        config = ExperimentSpecification.read(exec_experiment_spec_content)
        # Create a repo for the project
        repo = RepoFactory()

        with patch('scheduler.tasks.experiments.experiments_build.apply_async') as mock_build:
            experiment = ExperimentFactory(config=config.parsed_data, project=repo.project)

        assert mock_build.call_count == 1
        assert experiment.project.repo is not None
        assert experiment.is_independent is True

        assert ExperimentStatus.objects.filter(experiment=experiment).count() == 1
        assert list(ExperimentStatus.objects.filter(experiment=experiment).values_list(
            'status', flat=True)) == [ExperimentLifeCycle.CREATED]

        with patch('scheduler.dockerizer_scheduler.create_build_job') as mock_start:
            build = BuildJobFactory()
            BuildJobStatus.objects.create(status=JobLifeCycle.SUCCEEDED, job=build)
            mock_start.return_value = build, True, True
            experiments_build(experiment_id=experiment.id)

        assert mock_start.call_count == 1
        assert ExperimentStatus.objects.filter(experiment=experiment).count() == 3
        assert list(ExperimentStatus.objects.filter(experiment=experiment).values_list(
            'status', flat=True)) == [ExperimentLifeCycle.CREATED,
                                      ExperimentLifeCycle.SCHEDULED,
                                      ExperimentLifeCycle.FAILED]
        experiment.refresh_from_db()
        assert experiment.last_status == ExperimentLifeCycle.FAILED
Пример #8
0
    def test_independent_experiment_creation_triggers_experiment_scheduling(self):
        content = ExperimentSpecification.read(experiment_spec_content)
        experiment = ExperimentFactory(config=content.parsed_data)
        assert experiment.is_independent is True

        assert ExperimentStatus.objects.filter(experiment=experiment).count() == 3
        assert list(ExperimentStatus.objects.filter(experiment=experiment).values_list(
            'status', flat=True)) == [ExperimentLifeCycle.CREATED,
                                      ExperimentLifeCycle.SCHEDULED,
                                      ExperimentLifeCycle.FAILED]
        experiment.refresh_from_db()
        assert experiment.last_status == ExperimentLifeCycle.FAILED
Пример #9
0
def experiments_stop(self,
                     project_name,
                     project_uuid,
                     experiment_name,
                     experiment_group_name,
                     experiment_group_uuid,
                     experiment_uuid,
                     specification,
                     update_status=True,
                     collect_logs=True,
                     message=None):
    if collect_logs:
        try:
            collectors.logs_collect_experiment_jobs(
                experiment_uuid=experiment_uuid)
        except OSError:
            _logger.warning(
                'Scheduler could not collect '
                'the logs for experiment `%s`.', experiment_name)
    if specification:
        specification = ExperimentSpecification.read(specification)
        deleted = experiment_scheduler.stop_experiment(
            project_name=project_name,
            project_uuid=project_uuid,
            experiment_name=experiment_name,
            experiment_group_name=experiment_group_name,
            experiment_group_uuid=experiment_group_uuid,
            experiment_uuid=experiment_uuid,
            specification=specification,
        )
    else:
        deleted = True

    if not deleted and self.request.retries < 2:
        _logger.info('Trying again to delete job `%s` in experiment.',
                     experiment_name)
        self.retry(countdown=Intervals.EXPERIMENTS_SCHEDULER)
        return

    if not update_status:
        return

    experiment = get_valid_experiment(experiment_uuid=experiment_uuid,
                                      include_deleted=True)
    if not experiment:
        _logger.info(
            'Something went wrong, '
            'the Experiment `%s` does not exist anymore.', experiment_uuid)
        return

    # Update experiment status to show that its stopped
    experiment.set_status(ExperimentLifeCycle.STOPPED,
                          message=message or 'Experiment was stopped')
Пример #10
0
def validate_experiment_spec_config(config, raise_for_rest: bool = False):
    try:
        spec = ExperimentSpecification.read(config)
    except (MarshmallowValidationError, PolyaxonfileError,
            PolyaxonConfigurationError) as e:
        message_error = 'Received non valid specification config. %s' % e
        if raise_for_rest:
            raise ValidationError(message_error)
        else:
            raise DjangoValidationError(message_error)

    return spec
Пример #11
0
    def test_set_metrics(self):
        config = ExperimentSpecification.read(experiment_spec_content)
        experiment = ExperimentFactory(config=config.parsed_data)
        assert experiment.metrics.count() == 0

        create_at = timezone.now()
        experiments_set_metrics(experiment_uuid=experiment.uuid.hex,
                                created_at=create_at,
                                metrics={
                                    'accuracy': 0.9,
                                    'precision': 0.9
                                })

        assert experiment.metrics.count() == 1
Пример #12
0
    def test_serialize_with_environment_section(self):
        spec_content = """---
            version: 1

            kind: experiment

            framework: pytorch

            environment:
              resources:
                cpu:
                  requests: 2
                  limits: 4
                memory:
                  requests: 4096
                  limits: 10240

              replicas:
                n_workers: 2
                default_worker:
                  resources:
                    cpu:
                      requests: 2
                      limits: 4
                    memory:
                      requests: 4096
                      limits: 10240

            build:
                image: foo

            run:
              cmd: video_prediction_train --model=DNA --num_masks=1
        """
        spec = ExperimentSpecification.read(spec_content)

        obj = self.factory_class(config=spec.parsed_data)
        obj1_query = queries.experiments_details.get(id=obj.id)
        serializer = self.serializer_class(obj1_query)
        data = serializer.data
        assert 'resources' in data
Пример #13
0
def experiments_stop(self,
                     project_name,
                     project_uuid,
                     experiment_name,
                     experiment_group_name,
                     experiment_group_uuid,
                     experiment_uuid,
                     specification,
                     update_status=True):
    specification = ExperimentSpecification.read(specification)
    deleted = experiment_scheduler.stop_experiment(
        project_name=project_name,
        project_uuid=project_uuid,
        experiment_name=experiment_name,
        experiment_group_name=experiment_group_name,
        experiment_group_uuid=experiment_group_uuid,
        experiment_uuid=experiment_uuid,
        specification=specification,
    )

    if not deleted and self.request.retries < 2:
        _logger.info('Trying again to delete job `%s` in experiment.',
                     experiment_name)
        self.retry(countdown=Intervals.EXPERIMENTS_SCHEDULER)
        return

    if not update_status:
        return

    experiment = get_valid_experiment(experiment_uuid=experiment_uuid)
    if not experiment:
        _logger.info(
            'Something went wrong, '
            'the Experiment `%s` does not exist anymore.', experiment_uuid)
        return

    # Update experiment status to show that its stopped
    experiment.set_status(ExperimentLifeCycle.STOPPED,
                          message='Experiment was stopped')
Пример #14
0
          - Flatten:
          - Dense:
              units: 10
              activation: softmax

    train:
      data_pipeline:
        TFRecordImagePipeline:
          batch_size: 64
          num_epochs: 1
          shuffle: true
          dynamic_pad: false
          data_files: ["../data/mnist/mnist_train.tfrecord"]
          meta_data_file: "../data/mnist/meta_data.json"
"""
experiment_spec_parsed_content = ExperimentSpecification.read(
    experiment_spec_content)

exec_experiment_spec_content = """---
    version: 1
    
    kind: experiment
    
    tags: [fixtures]

    build:
      image: my_image
    
    run:
      cmd: video_prediction_train --model=DNA --num_masks=1
"""
Пример #15
0
 def specification(self):
     return ExperimentSpecification(
         values=self.config) if self.config else None
Пример #16
0
 def create_experiment(self, config):
     config = ExperimentSpecification.read(config)
     return ExperimentFactory(config=config.parsed_data, project=self.project)