def run_task_predictions(ml_backend_id, batch_size=100): """ Run prediction and update db, stats counts and project prerequisites :param project_id: :param batch_size: :return: """ ml_backend = MLBackend.objects.get(id=ml_backend_id) response = ml_backend.setup() if response.is_error: raise ValueError(response.error_message) else: if response.response['model_version'] != ml_backend.model_version: ml_backend.model_version = response.response['model_version'] ml_backend.save() # collect tasks without predictions for current model version tasks_without_predictions = ml_backend.project.tasks.annotate( model_version=F('predictions__model_version'), num_predictions=Count('predictions') ).filter(~Q(model_version=ml_backend.model_version) | Q(num_predictions=0)) if not tasks_without_predictions.exists(): logger.info(f'Predictions for project {ml_backend.project} with version {ml_backend.model_version} already exist, ' f'update is not needed') return {'status': 'ok'} else: logger.info(f'Found {tasks_without_predictions.count()} tasks without predictions ' f'from model version {ml_backend.model_version} in project {ml_backend.project}') # TODO: randomize tasks selection so that taken tasks don't clash with each other with high probability tasks = TaskSerializer(tasks_without_predictions[:batch_size], many=True).data failed_tasks = [] for task in tasks: task_id = task['id'] ml_api_result = ml_backend.api.make_predictions([task], ml_backend.model_version, ml_backend.project) if not _validate_ml_api_result(ml_api_result, [task], logger): logger.warning(f'Project {ml_backend.project}: task {task.id} failed') failed_tasks.append(task) continue prediction_result = ml_api_result.response['results'][0] with transaction.atomic(): Prediction.objects.filter(task_id=task_id, model_version=ml_backend.model_version).delete() Prediction.objects.create( task_id=task_id, model_version=ml_backend.model_version, result=prediction_result['result'], score=safe_float(prediction_result.get('score', 0)), cluster=prediction_result.get('cluster'), neighbors=prediction_result.get('neighbors'), mislabeling=safe_float(prediction_result.get('mislabeling', 0)) ) logger.info(f'Project {ml_backend.project}: task {task_id} processed with model version {ml_backend.model_version}') MLBackendPredictionJob.objects.filter(job_id=get_current_job().id).delete() logger.info(f'Total task processes: {len(tasks)}, failed: {len(failed_tasks)}') return {'status': 'ok', 'processed_num': len(tasks), 'failed': failed_tasks}
def predict_one_task(self, task): if self.not_ready: logger.debug(f'ML backend {self} is not ready to predict {task}') return if task.predictions.filter(model_version=self.model_version).exists(): # prediction already exists logger.info( f'Skip creating prediction with ML backend {self} for task {task}: model version is up-to-date' ) return ml_api = self.api task_ser = TaskSerializer(task).data ml_api_result = ml_api.make_predictions([task_ser], self.model_version, self.project) if ml_api_result.is_error: logger.warning( f'Prediction not created for project {self}: {ml_api_result.error_message}' ) return results = ml_api_result.response['results'] if len(results) == 0: logger.error( f'ML backend returned empty prediction for project {self}') return prediction_response = results[0] task_id = task_ser['id'] r = prediction_response['result'] score = prediction_response.get('score') matching_score = None prediction = Prediction.objects.create( result=r, score=safe_float(score), model_version=self.model_version, task_id=task_id, cluster=prediction_response.get('cluster'), neighbors=prediction_response.get('neighbors'), mislabeling=safe_float(prediction_response.get('mislabeling', 0))) logger.info( f'Prediction created: result={r}, score={score}, id={prediction.id}' ) model_version = ml_api_result.response.get('model_version') if model_version != self.model_version: self.model_version = model_version self.save() logger.info( f'Project {self} updates model version to {model_version}') return prediction
def __predict_one_task(self, task): self.update_state() if self.not_ready: logger.debug(f'ML backend {self} is not ready to predict {task}') return if task.predictions.filter(model_version=self.model_version).exists(): # prediction already exists logger.info( f'Skip creating prediction with ML backend {self} for task {task}: model version ' f'{self.model_version} is up-to-date') return ml_api = self.api task_ser = TaskSimpleSerializer(task).data ml_api_result = ml_api.make_predictions([task_ser], self.model_version, self.project) if ml_api_result.is_error: logger.warning( f'Prediction not created for project {self}: {ml_api_result.error_message}' ) return results = ml_api_result.response['results'] if len(results) == 0: logger.error( f'ML backend returned empty prediction for project {self}', extra={'sentry_skip': True}) return prediction_response = results[0] task_id = task_ser['id'] r = prediction_response['result'] score = prediction_response.get('score') with conditional_atomic(): prediction = Prediction.objects.create( result=r, score=safe_float(score), model_version=self.model_version, task_id=task_id, cluster=prediction_response.get('cluster'), neighbors=prediction_response.get('neighbors'), mislabeling=safe_float( prediction_response.get('mislabeling', 0)), ) logger.debug(f'Prediction {prediction} created') return prediction