def AddLabeledExampleIds(self, request: AddLabeledExampleIdsRequest, context: grpc.ServicerContext) -> Empty: try: search = self._manager.get_search(request.searchId) examples = queue.Queue() exceptions = [] def get_examples(): try: for object_id in request.examples: example = search.retriever.get_object(object_id, [ATTR_DATA]) examples.put(LabeledExample(label=request.examples[object_id], content=example.content)) except Exception as e: exceptions.append(e) finally: examples.put(None) threading.Thread(target=get_examples, name='get-examples').start() search.add_labeled_examples(to_iter(examples)) if len(exceptions) > 0: raise exceptions[0] return Empty() except Exception as e: logger.exception(e) raise e
def _get_example_features( self, example_dir: Path) -> Dict[str, List[List[float]]]: semaphore = threading.Semaphore( 256 ) # Make sure that the load function doesn't overload the consumer with mp.get_context('spawn').Pool( min(4, mp.cpu_count()), initializer=set_worker_feature_provider, initargs=(self.feature_provider.feature_extractor, self.feature_provider.cache)) as pool: images = pool.imap_unordered( load_from_path, bounded_iter(example_dir.glob('*/*'), semaphore)) feature_queue = queue.Queue() @log_exceptions def process_uncached(): cached = 0 uncached = 0 batch = [] for label, should_process, payload in images: semaphore.release() if should_process: image, key = payload batch.append( (label, torch.from_numpy(image).to( self.feature_provider.device, non_blocking=True), key)) if len(batch) == BATCH_SIZE: self._process_batch(batch, feature_queue) batch = [] uncached += 1 else: feature_queue.put((label, payload)) cached += 1 if len(batch) > 0: self._process_batch(batch, feature_queue) logger.info( '{} cached examples, {} new examples preprocessed'.format( cached, uncached)) feature_queue.put(None) threading.Thread(target=process_uncached, name='process-uncached-trainer').start() i = 0 features = defaultdict(list) for feature in to_iter(feature_queue): i += 1 features[feature[0]].append(feature[1]) logger.info('Retrieved {} feature vectors'.format(i)) return features
def add_labeled_examples(self, examples: Iterable[LabeledExample]) -> None: if self._context.node_index == 0: self._store_labeled_examples(examples, None) return example_queue = queue.Queue() data_requirement = self._get_data_requirement() if data_requirement is DataRequirement.MASTER_ONLY: future = self._context.nodes[0].api.AddLabeledExamples.future( to_iter(example_queue)) example_queue.put( LabeledExampleRequest(searchId=self._context.search_id)) for example in examples: example_queue.put(LabeledExampleRequest(example=example)) else: assert False future = self._context.nodes[0].api.AddLabeledExamples.future( to_iter(example_queue)) example_queue.put( LabeledExampleRequest(searchId=self._context.search_id)) if data_requirement is DataRequirement.DISTRIBUTED_FULL: self._store_labeled_examples( examples, lambda x: example_queue.put( LabeledExampleRequest(example=x))) else: # We're using distributed_positives - only share positives and test set def add_example(example: LabeledExample) -> None: if example.exampleSet.value is ExampleSet.TEST or example.label == '1': example_queue.put( LabeledExampleRequest(example=example)) self._store_labeled_examples(examples, add_example) example_queue.put(None) future.result()
def _validate_test_results_thread(self, model_version: int) -> None: logger.info('Executing validation request') model = self._get_staging_model(model_version) result_queue = queue.Queue() future = self.nodes[0].internal.SubmitTestResults.future(to_iter(result_queue)) result_queue.put(SubmitTestRequest(version=SubmitTestVersion(searchId=self._id, version=model_version))) def store_result(target: int, pred: float): result_queue.put(SubmitTestRequest(result=TestResult(label=str(target), score=pred))) eval_start = time.time() with self._data_manager.get_examples(ExampleSet.TEST) as test_dir: model.infer_dir(test_dir, store_result) result_queue.put(None) future.result() logger.info('Evaluated model in {:.3f} seconds'.format(time.time() - eval_start)) logger.info('Submitted test results for model version {}'.format(model.version))