def get(self, request, *args, **kwargs): auditor.record(event_type=EXPERIMENT_LOGS_VIEWED, instance=self.experiment, actor_id=request.user.id, actor_name=request.user.username) experiment_name = self.experiment.unique_name if self.experiment.is_done: log_path = get_experiment_logs_path(experiment_name, temp=False) else: process_logs(experiment=self.experiment, temp=True) log_path = get_experiment_logs_path( experiment_name=experiment_name, temp=True) filename = os.path.basename(log_path) chunk_size = 8192 try: wrapped_file = FileWrapper(open(log_path, 'rb'), chunk_size) response = StreamingHttpResponse( wrapped_file, content_type=mimetypes.guess_type(log_path)[0]) response['Content-Length'] = os.path.getsize(log_path) response['Content-Disposition'] = "attachment; filename={}".format( filename) return response except FileNotFoundError: _logger.warning('Log file not found: log_path=%s', log_path) return Response( status=status.HTTP_404_NOT_FOUND, data='Log file not found: log_path={}'.format(log_path))
def test_experiment_logs_path_creation_deletion(self): create_experiment_logs_path(self.experiment.unique_name) experiment_logs_path = get_experiment_logs_path(self.experiment.unique_name) filepath = get_experiment_logs_path(self.experiment.unique_name) open(filepath, '+w') assert os.path.exists(experiment_logs_path) is True assert os.path.exists(filepath) is True delete_experiment_logs(self.experiment.unique_name) assert os.path.exists(filepath) is False
def test_experiment_logs_path_creation_deletion(self): experiment_logs_path = get_experiment_logs_path( self.experiment.unique_name) filepath = get_experiment_logs_path(self.experiment.unique_name) open(filepath, '+w') # Should be true, created by the signal assert os.path.exists(experiment_logs_path) is True assert os.path.exists(filepath) is True delete_experiment_logs(self.experiment.unique_name) assert os.path.exists(filepath) is False
def test_experiment_logs_path_creation_deletion(self): create_experiment_logs_path(self.experiment.unique_name) experiment_logs_path = get_experiment_logs_path( self.experiment.unique_name) filepath = get_experiment_logs_path(self.experiment.unique_name) open(filepath, '+w') assert os.path.exists(experiment_logs_path) is True assert os.path.exists(filepath) is True delete_experiment_logs(self.experiment.unique_name) assert os.path.exists(filepath) is False
def handle_events_job_logs(experiment_name, experiment_uuid, job_uuid, log_line, task_type=None, task_idx=None): # Must persist resources if logs according to the config if not Experiment.objects.filter(uuid=experiment_uuid).exists(): return _logger.debug('handling log event for %s %s', experiment_uuid, job_uuid) if task_type and task_idx: log_line = '{}.{} -- {}'.format(task_type, int(task_idx) + 1, log_line) xp_logger = logging.getLogger(experiment_name) log_path = get_experiment_logs_path(experiment_name) try: log_handler = logging.FileHandler(log_path) log_formatter = logging.Formatter( '%(asctime)s %(levelname)s %(message)s') log_handler.setFormatter(log_formatter) xp_logger.addHandler(log_handler) xp_logger.setLevel(logging.INFO) xp_logger.info(log_line) xp_logger.handlers = [] except OSError: # TODO: retry instead? pass
def test_handle_events_job_logs_create_one_handler(self): with patch('scheduler.tasks.experiments.experiments_build.apply_async' ) as _: # noqa experiment = ExperimentFactory() params = dict(experiment_name=experiment.unique_name, experiment_uuid=experiment.uuid.hex, job_uuid=uuid.uuid4().hex, log_line='First test', task_type=TaskType.MASTER, task_idx=0) handle_events_job_logs(**params) # Check new log path is created log_path = get_experiment_logs_path(experiment.unique_name) assert os.path.exists(log_path) is True # Check the logger has no file handler, and one line created xp_logger = logging.getLogger(experiment.unique_name) assert len(xp_logger.handlers) == 0 # pylint:disable=len-as-condition assert self.file_line_count(log_path) == 1 # pylint:disable=len-as-condition # Calling again the task should not reuse handler, and create a new line handle_events_job_logs(**params) # Check the logger has no file handler, and one line created xp_logger = logging.getLogger(experiment.unique_name) assert len(xp_logger.handlers) == 0 # pylint:disable=len-as-condition assert self.file_line_count(log_path) == 2
def get_config_map(namespace, project_name, experiment_group_name, experiment_name, project_uuid, experiment_group_uuid, experiment_uuid, original_name, cloning_strategy, cluster_def, declarations, log_level): name = constants.CONFIG_MAP_NAME.format(experiment_uuid=experiment_uuid) labels = get_map_labels(project_name, experiment_group_name, experiment_name, project_uuid, experiment_group_uuid, experiment_uuid) metadata = client.V1ObjectMeta(name=name, labels=labels, namespace=namespace) experiment_outputs_path = get_experiment_outputs_path( experiment_name=experiment_name, original_name=original_name, cloning_strategy=cloning_strategy) experiment_logs_path = get_experiment_logs_path(experiment_name) experiment_data_path = get_project_data_path(project_name) data = { constants.CONFIG_MAP_CLUSTER_KEY_NAME: json.dumps(cluster_def), constants.CONFIG_MAP_DECLARATIONS_KEY_NAME: json.dumps(declarations) or '{}', constants.CONFIG_MAP_EXPERIMENT_INFO_KEY_NAME: json.dumps(labels), constants.CONFIG_MAP_LOG_LEVEL_KEY_NAME: log_level, API_KEY_NAME: get_settings_api_url(), constants.CONFIG_MAP_EXPERIMENT_OUTPUTS_PATH_KEY_NAME: experiment_outputs_path, constants.CONFIG_MAP_EXPERIMENT_LOGS_PATH_KEY_NAME: experiment_logs_path, constants.CONFIG_MAP_EXPERIMENT_DATA_PATH_KEY_NAME: experiment_data_path, } return client.V1ConfigMap(api_version=k8s_constants.K8S_API_VERSION_V1, kind=k8s_constants.K8S_CONFIG_MAP_KIND, metadata=metadata, data=data)
def get_pod_container(self, volume_mounts, env_vars=None, command=None, args=None, persistence_outputs=None, persistence_data=None, outputs_refs_jobs=None, outputs_refs_experiments=None, secret_refs=None, configmap_refs=None, resources=None, ephemeral_token=None): """Pod job container for task.""" assert self.cluster_def is not None # Env vars preparations env_vars = to_list(env_vars, check_none=True) outputs_path = get_experiment_outputs_path( persistence_outputs=persistence_outputs, experiment_name=self.experiment_name, original_name=self.original_name, cloning_strategy=self.cloning_strategy) env_vars += get_job_env_vars( persistence_outputs=persistence_outputs, outputs_path=outputs_path, persistence_data=persistence_data, log_level=self.log_level, logs_path=get_experiment_logs_path(self.experiment_name, temp=False), outputs_refs_jobs=outputs_refs_jobs, outputs_refs_experiments=outputs_refs_experiments, ephemeral_token=ephemeral_token, ) env_vars += [ get_env_var(name=constants.CONFIG_MAP_CLUSTER_KEY_NAME, value=json.dumps(self.cluster_def)), get_env_var(name=constants.CONFIG_MAP_DECLARATIONS_KEY_NAME, value=self.declarations), get_env_var(name=constants.CONFIG_MAP_EXPERIMENT_INFO_KEY_NAME, value=json.dumps(self.experiment_labels)), ] env_vars += get_resources_env_vars(resources=resources) # Env from configmap and secret refs env_from = get_pod_env_from(secret_refs=secret_refs, configmap_refs=configmap_refs) ports = [ client.V1ContainerPort(container_port=port) for port in self.ports ] return client.V1Container(name=self.job_container_name, image=self.job_docker_image, command=command, args=args, ports=ports, env=env_vars, env_from=env_from, resources=get_resources(resources), volume_mounts=volume_mounts)
def safe_log_experiment_job(experiment_name, log_lines): log_path = get_experiment_logs_path(experiment_name) try: _lock_log(log_path, log_lines) except (FileNotFoundError, OSError): create_experiment_logs_path(experiment_name=experiment_name) # Retry _lock_log(log_path, log_lines)
def test_experiment_group_logs_path_creation_deletion(self): experiment = ExperimentFactory(user=self.project.user, project=self.project, experiment_group=self.experiment_group) experiment_logs_path = get_experiment_logs_path(experiment.unique_name) open(experiment_logs_path, '+w') experiment_group_logs_path = get_experiment_group_logs_path( self.experiment_group.unique_name) # Should be true, created by the signal assert os.path.exists(experiment_logs_path) is True assert os.path.exists(experiment_group_logs_path) is True delete_experiment_group_logs(self.experiment_group.unique_name) assert os.path.exists(experiment_logs_path) is False assert os.path.exists(experiment_group_logs_path) is False
def test_experiment_group_logs_path_creation_deletion(self): experiment = ExperimentFactory(user=self.project.user, project=self.project, experiment_group=self.experiment_group) experiment_logs_path = get_experiment_logs_path(experiment.unique_name) create_experiment_logs_path(experiment.unique_name) open(experiment_logs_path, '+w') experiment_group_logs_path = get_experiment_group_logs_path( self.experiment_group.unique_name) # Should be true, created by the signal assert os.path.exists(experiment_logs_path) is True assert os.path.exists(experiment_group_logs_path) is True delete_experiment_group_logs(self.experiment_group.unique_name) assert os.path.exists(experiment_logs_path) is False assert os.path.exists(experiment_group_logs_path) is False
def get_pod_container(self, volume_mounts, env_vars=None, command=None, args=None, persistence_outputs=None, persistence_data=None, outputs_refs_jobs=None, outputs_refs_experiments=None, resources=None): """Pod job container for task.""" assert self.cluster_def is not None env_vars = get_list(env_vars) outputs_path = get_experiment_outputs_path( persistence_outputs=persistence_outputs, experiment_name=self.experiment_name, original_name=self.original_name, cloning_strategy=self.cloning_strategy) env_vars += get_job_env_vars( log_level=self.log_level, outputs_path=outputs_path, data_paths=get_data_paths(persistence_data), logs_path=get_experiment_logs_path(self.experiment_name), outputs_refs_jobs=outputs_refs_jobs, outputs_refs_experiments=outputs_refs_experiments) env_vars += [ get_env_var(name=constants.CONFIG_MAP_CLUSTER_KEY_NAME, value=json.dumps(self.cluster_def)), get_env_var(name=constants.CONFIG_MAP_DECLARATIONS_KEY_NAME, value=self.declarations), get_env_var(name=constants.CONFIG_MAP_EXPERIMENT_INFO_KEY_NAME, value=json.dumps(self.experiment_labels)), ] env_vars += get_resources_env_vars(resources=resources) ports = [ client.V1ContainerPort(container_port=port) for port in self.ports ] return client.V1Container(name=self.job_container_name, image=self.job_docker_image, command=command, args=args, ports=ports, env=env_vars, resources=get_resources(resources), volume_mounts=volume_mounts)
def setUp(self): super().setUp() project = ProjectFactory(user=self.auth_client.user) experiment = ExperimentFactory(project=project) self.url = '/{}/{}/{}/experiments/{}/logs'.format( API_V1, project.user.username, project.name, experiment.sequence) log_path = get_experiment_logs_path(experiment.unique_name) fake = Faker() self.logs = [] for _ in range(self.num_log_lines): self.logs.append(fake.sentence()) with open(log_path, 'w') as file: for line in self.logs: file.write(line) file.write('\n')
def test_project_logs_path_creation_deletion(self): with patch('scheduler.tasks.experiments.experiments_build.apply_async') as _: # noqa experiment = ExperimentFactory(user=self.project.user, project=self.project) experiment_logs_path = get_experiment_logs_path(experiment.unique_name) create_experiment_logs_path(experiment.unique_name) open(experiment_logs_path, '+w') project_logs_path = get_project_logs_path(self.project.unique_name) project_repos_path = get_project_logs_path(self.project.unique_name) # Should be true, created by the signal assert os.path.exists(experiment_logs_path) is True assert os.path.exists(project_logs_path) is True assert os.path.exists(project_repos_path) is True delete_project_logs(self.project.unique_name) assert os.path.exists(experiment_logs_path) is False assert os.path.exists(project_logs_path) is False assert os.path.exists(project_repos_path) is False
def get_pod_container(self, volume_mounts, env_vars=None, command=None, args=None, resources=None): """Pod job container for task.""" assert self.cluster_def is not None env_vars = get_list(env_vars) outputs_path = get_experiment_outputs_path( experiment_name=self.experiment_name, original_name=self.original_name, cloning_strategy=self.cloning_strategy) env_vars += get_job_env_vars( log_level=self.log_level, outputs_path=outputs_path, logs_path=get_experiment_logs_path(self.experiment_name), data_path=get_experiment_data_path(self.experiment_name), project_data_path=get_project_data_path(project_name=self.project_name) ) env_vars += [ get_env_var(name=constants.CONFIG_MAP_CLUSTER_KEY_NAME, value=json.dumps(self.cluster_def)), get_env_var(name=constants.CONFIG_MAP_DECLARATIONS_KEY_NAME, value=self.declarations), get_env_var(name=constants.CONFIG_MAP_EXPERIMENT_INFO_KEY_NAME, value=json.dumps(self.experiment_labels)), ] if resources: env_vars += get_resources_env_vars(resources=resources) ports = [client.V1ContainerPort(container_port=port) for port in self.ports] return client.V1Container(name=self.job_container_name, image=self.job_docker_image, command=command, args=args, ports=ports, env=env_vars, resources=get_resources(resources), volume_mounts=volume_mounts)
def get(self, request, *args, **kwargs): experiment = self.get_experiment() auditor.record(event_type=EXPERIMENT_LOGS_VIEWED, instance=self.experiment, actor_id=request.user.id) log_path = get_experiment_logs_path(experiment.unique_name) filename = os.path.basename(log_path) chunk_size = 8192 try: wrapped_file = FileWrapper(open(log_path, 'rb'), chunk_size) response = StreamingHttpResponse(wrapped_file, content_type=mimetypes.guess_type(log_path)[0]) response['Content-Length'] = os.path.getsize(log_path) response['Content-Disposition'] = "attachment; filename={}".format(filename) return response except FileNotFoundError: logger.warning('Log file not found: log_path=%s', log_path) return Response(status=status.HTTP_404_NOT_FOUND, data='Log file not found: log_path={}'.format(log_path))
def setUp(self): super().setUp() project = ProjectFactory(user=self.auth_client.user) experiment = ExperimentFactory(project=project) self.url = '/{}/{}/{}/experiments/{}/logs'.format( API_V1, project.user.username, project.name, experiment.sequence) log_path = get_experiment_logs_path(experiment.unique_name) create_experiment_logs_path(experiment_name=experiment.unique_name) fake = Faker() self.logs = [] for _ in range(self.num_log_lines): self.logs.append(fake.sentence()) with open(log_path, 'w') as file: for line in self.logs: file.write(line) file.write('\n')
def get_config_map(namespace, project_name, experiment_group_name, experiment_name, project_uuid, experiment_group_uuid, experiment_uuid, original_name, cloning_strategy, cluster_def, declarations, log_level): name = constants.CONFIG_MAP_NAME.format(uuid=experiment_uuid) labels = get_map_labels(project_name, experiment_group_name, experiment_name, project_uuid, experiment_group_uuid, experiment_uuid) metadata = client.V1ObjectMeta(name=name, labels=labels, namespace=namespace) experiment_outputs_path = get_experiment_outputs_path(experiment_name=experiment_name, original_name=original_name, cloning_strategy=cloning_strategy) experiment_logs_path = get_experiment_logs_path(experiment_name) experiment_data_path = get_project_data_path(project_name) data = { constants.CONFIG_MAP_CLUSTER_KEY_NAME: json.dumps(cluster_def), constants.CONFIG_MAP_DECLARATIONS_KEY_NAME: json.dumps(declarations) or '{}', constants.CONFIG_MAP_EXPERIMENT_INFO_KEY_NAME: json.dumps(labels), constants.CONFIG_MAP_LOG_LEVEL_KEY_NAME: log_level, API_KEY_NAME: get_settings_api_url(), constants.CONFIG_MAP_RUN_OUTPUTS_PATH_KEY_NAME: experiment_outputs_path, constants.CONFIG_MAP_RUN_LOGS_PATH_KEY_NAME: experiment_logs_path, constants.CONFIG_MAP_RUN_DATA_PATH_KEY_NAME: experiment_data_path, } return client.V1ConfigMap(api_version=k8s_constants.K8S_API_VERSION_V1, kind=k8s_constants.K8S_CONFIG_MAP_KIND, metadata=metadata, data=data)
def get_log_path(instance): return get_experiment_logs_path(instance.unique_name)