def validate_experiment_spec_config(config): try: spec = ExperimentSpecification.read(config) except (PolyaxonfileError, PolyaxonConfigurationError): raise ValidationError('Received non valid specification config.') return spec
def test_create_experiment_with_valid_spec(self, spawner_mock): config = ExperimentSpecification.read(experiment_spec_content) mock_instance = spawner_mock.return_value mock_instance.start_experiment.return_value = start_experiment_value mock_instance.spec = config experiment = ExperimentFactory(config=config.parsed_data) assert experiment.is_independent is True assert ExperimentStatus.objects.filter( experiment=experiment).count() == 3 assert list( ExperimentStatus.objects.filter(experiment=experiment).values_list( 'status', flat=True)) == [ ExperimentLifeCycle.CREATED, ExperimentLifeCycle.SCHEDULED, ExperimentLifeCycle.STARTING ] experiment.refresh_from_db() assert experiment.last_status == ExperimentLifeCycle.STARTING # Assert 1 job was created assert ExperimentJob.objects.filter(experiment=experiment).count() == 1 assert JobResources.objects.count() == 0 jobs_statuses = ExperimentJob.objects.values_list('statuses__status', flat=True) assert set(jobs_statuses) == { JobLifeCycle.CREATED, } jobs = ExperimentJob.objects.filter(experiment=experiment) assert experiment.calculated_status == ExperimentLifeCycle.STARTING for job in jobs: # Assert the jobs status is created assert job.last_status == JobLifeCycle.CREATED
def test_get_with_resource_reg_90(self): # Fix issue#90: # Failed to getting experiment when specify resources without framework in environment spec_content = """--- version: 1 kind: experiment environment: resources: gpu: requests: 1 limits: 1 build: image: my_image run: cmd: video_prediction_train --model=DNA --num_masks=1 """ spec_parsed_content = ExperimentSpecification.read(spec_content) project = ProjectFactory(user=self.auth_client.user) exp = self.factory_class(project=project, config=spec_parsed_content.parsed_data) url = '/{}/{}/{}/experiments/{}/'.format(API_V1, project.user.username, project.name, exp.sequence) resp = self.auth_client.get(url) assert resp.status_code == status.HTTP_200_OK exp.refresh_from_db() assert resp.data == self.serializer_class(exp).data
def test_independent_experiment_creation_with_run_triggers_experiment_building_scheduling( self): config = ExperimentSpecification.read(exec_experiment_spec_content) # Create a repo for the project repo = RepoFactory() with patch('runner.dockerizer.builders.experiments.build_experiment' ) as mock_docker_build: experiment = ExperimentFactory(config=config.parsed_data, project=repo.project) assert mock_docker_build.call_count == 1 assert experiment.project.repo is not None assert experiment.is_independent is True assert ExperimentStatus.objects.filter( experiment=experiment).count() == 3 assert list( ExperimentStatus.objects.filter(experiment=experiment).values_list( 'status', flat=True)) == [ ExperimentLifeCycle.CREATED, ExperimentLifeCycle.BUILDING, ExperimentLifeCycle.SCHEDULED ] experiment.refresh_from_db() assert experiment.last_status == ExperimentLifeCycle.SCHEDULED
def test_serialize_with_environment_section(self): spec_content = """--- version: 1 kind: experiment environment: resources: cpu: requests: 2 limits: 4 memory: requests: 4096 limits: 10240 pytorch: n_workers: 2 default_worker: resources: cpu: requests: 2 limits: 4 memory: requests: 4096 limits: 10240 run: image: my_image cmd: video_prediction_train --model=DNA --num_masks=1 """ spec = ExperimentSpecification.read(spec_content) obj = self.factory_class(config=spec.parsed_data) serializer = self.serializer_class(obj) data = serializer.data assert 'resources' in data
def test_serialize_cluster_resources(self): spec_content = """--- version: 1 kind: experiment environment: resources: cpu: requests: 2 limits: 4 memory: requests: 4096 limits: 10240 pytorch: n_workers: 2 default_worker_resources: cpu: requests: 2 limits: 4 memory: requests: 4096 limits: 10240 run: image: my_image cmd: video_prediction_train --model=DNA --num_masks=1 """ spec = ExperimentSpecification.read(spec_content) obj = self.factory_class(config=spec.parsed_data) self.serializer_class(obj).data # pylint:disable=expression-not-assigned
def experiments_stop(project_name, project_uuid, experiment_name, experiment_group_name, experiment_group_uuid, experiment_uuid, specification, update_status=True): specification = ExperimentSpecification.read(specification) experiment_scheduler.stop_experiment( project_name=project_name, project_uuid=project_uuid, experiment_name=experiment_name, experiment_group_name=experiment_group_name, experiment_group_uuid=experiment_group_uuid, experiment_uuid=experiment_uuid, specification=specification, ) if not update_status: return experiment = get_valid_experiment(experiment_uuid=experiment_uuid) if not experiment: _logger.info( 'Something went wrong, ' 'the Experiment `%s` does not exist anymore.', experiment_uuid) return # Update experiment status to show that its stopped experiment.set_status(ExperimentLifeCycle.STOPPED)
def validate_experiment_spec_config(config, raise_for_rest=False): try: spec = ExperimentSpecification.read(config) except (PolyaxonfileError, PolyaxonConfigurationError) as e: message_error = 'Received non valid specification config. %s' % e if raise_for_rest: raise ValidationError(message_error) else: raise DjangoValidationError(message_error) return spec
def test_set_metrics(self): config = ExperimentSpecification.read(experiment_spec_content) experiment = ExperimentFactory(config=config.parsed_data) assert experiment.metrics.count() == 0 create_at = timezone.now() experiments_set_metrics(experiment_uuid=experiment.uuid.hex, created_at=create_at, metrics={'accuracy': 0.9, 'precision': 0.9}) assert experiment.metrics.count() == 1
def test_independent_experiment_creation_triggers_experiment_scheduling(self): content = ExperimentSpecification.read(experiment_spec_content) experiment = ExperimentFactory(config=content.parsed_data) assert experiment.is_independent is True assert ExperimentStatus.objects.filter(experiment=experiment).count() == 2 assert list(ExperimentStatus.objects.filter(experiment=experiment).values_list( 'status', flat=True)) == [ExperimentLifeCycle.CREATED, ExperimentLifeCycle.SCHEDULED] experiment.refresh_from_db() assert experiment.last_status == ExperimentLifeCycle.SCHEDULED # Assert also that experiment is monitored assert experiment.last_status == ExperimentLifeCycle.SCHEDULED
def reduce_configs(self): """Reduce the experiments to restart.""" experiment_ids = self.get_reduced_configs() experiments = self.experiment_group.experiments.filter( id__in=experiment_ids) self.create_iteration(experiment_ids=experiment_ids) iteration_config = self.experiment_group.iteration_config params_config = self.experiment_group.params_config n_resources = self.experiment_group.search_manager.get_resources_for_iteration( iteration=iteration_config.iteration) resource_value = self.experiment_group.search_manager.get_n_resources( n_resources=n_resources, bracket_iteration=iteration_config.bracket_iteration) resource_name = params_config.hyperband.resource.name resource_value = params_config.hyperband.resource.cast_value( resource_value) # Check if we need to resume or restart the experiments for experiment in experiments: declarations = experiment.declarations declarations[resource_name] = resource_value declarations_spec = {'declarations': declarations} specification = ExperimentSpecification(values=[ experiment.specification.parsed_data, declarations_spec ]) status_message = 'Hyperband iteration: {}, bracket iteration: {}'.format( iteration_config.iteration, iteration_config.bracket_iteration) if params_config.hyperband.resume: experiment.resume(declarations=declarations, config=specification.parsed_data, message=status_message) else: experiment.restart(experiment_group=self.experiment_group, declarations=declarations, config=specification.parsed_data)
def create_experiment(self, config): config = ExperimentSpecification.read(config) return ExperimentFactory(config=config.parsed_data, project=self.project)
- Flatten: - Dense: units: 10 activation: softmax train: data_pipeline: TFRecordImagePipeline: batch_size: 64 num_epochs: 1 shuffle: true dynamic_pad: false data_files: ["../data/mnist/mnist_train.tfrecord"] meta_data_file: "../data/mnist/meta_data.json" """ experiment_spec_parsed_content = ExperimentSpecification.read(experiment_spec_content) exec_experiment_spec_content = """--- version: 1 kind: experiment project: name: project1 run: image: my_image cmd: video_prediction_train --model=DNA --num_masks=1 """ exec_experiment_spec_parsed_content = ExperimentSpecification.read(exec_experiment_spec_content)
def specification(self): return ExperimentSpecification( values=self.config) if self.config else None
def test_cluster_def_without_framwork(self): spec = ExperimentSpecification.read(os.path.abspath('tests/fixtures/env_without_framework.yml')) self.assertEqual(spec.cluster_def, ({TaskType.MASTER: 1}, False))