Exemplo n.º 1
0
    def AddLabeledExampleIds(self, request: AddLabeledExampleIdsRequest, context: grpc.ServicerContext) -> Empty:
        try:
            search = self._manager.get_search(request.searchId)

            examples = queue.Queue()
            exceptions = []

            def get_examples():
                try:
                    for object_id in request.examples:
                        example = search.retriever.get_object(object_id, [ATTR_DATA])
                        examples.put(LabeledExample(label=request.examples[object_id], content=example.content))
                except Exception as e:
                    exceptions.append(e)
                finally:
                    examples.put(None)

            threading.Thread(target=get_examples, name='get-examples').start()

            search.add_labeled_examples(to_iter(examples))

            if len(exceptions) > 0:
                raise exceptions[0]

            return Empty()
        except Exception as e:
            logger.exception(e)
            raise e
Exemplo n.º 2
0
    def _get_example_features(
            self, example_dir: Path) -> Dict[str, List[List[float]]]:
        semaphore = threading.Semaphore(
            256
        )  # Make sure that the load function doesn't overload the consumer

        with mp.get_context('spawn').Pool(
                min(4, mp.cpu_count()),
                initializer=set_worker_feature_provider,
                initargs=(self.feature_provider.feature_extractor,
                          self.feature_provider.cache)) as pool:
            images = pool.imap_unordered(
                load_from_path, bounded_iter(example_dir.glob('*/*'),
                                             semaphore))
            feature_queue = queue.Queue()

            @log_exceptions
            def process_uncached():
                cached = 0
                uncached = 0
                batch = []
                for label, should_process, payload in images:
                    semaphore.release()
                    if should_process:
                        image, key = payload
                        batch.append(
                            (label, torch.from_numpy(image).to(
                                self.feature_provider.device,
                                non_blocking=True), key))
                        if len(batch) == BATCH_SIZE:
                            self._process_batch(batch, feature_queue)
                            batch = []
                        uncached += 1
                    else:
                        feature_queue.put((label, payload))
                        cached += 1

                if len(batch) > 0:
                    self._process_batch(batch, feature_queue)

                logger.info(
                    '{} cached examples, {} new examples preprocessed'.format(
                        cached, uncached))
                feature_queue.put(None)

            threading.Thread(target=process_uncached,
                             name='process-uncached-trainer').start()

            i = 0
            features = defaultdict(list)
            for feature in to_iter(feature_queue):
                i += 1
                features[feature[0]].append(feature[1])

            logger.info('Retrieved {} feature vectors'.format(i))

            return features
Exemplo n.º 3
0
    def add_labeled_examples(self, examples: Iterable[LabeledExample]) -> None:
        if self._context.node_index == 0:
            self._store_labeled_examples(examples, None)
            return

        example_queue = queue.Queue()

        data_requirement = self._get_data_requirement()
        if data_requirement is DataRequirement.MASTER_ONLY:
            future = self._context.nodes[0].api.AddLabeledExamples.future(
                to_iter(example_queue))
            example_queue.put(
                LabeledExampleRequest(searchId=self._context.search_id))
            for example in examples:
                example_queue.put(LabeledExampleRequest(example=example))
        else:
            assert False
            future = self._context.nodes[0].api.AddLabeledExamples.future(
                to_iter(example_queue))
            example_queue.put(
                LabeledExampleRequest(searchId=self._context.search_id))

            if data_requirement is DataRequirement.DISTRIBUTED_FULL:
                self._store_labeled_examples(
                    examples, lambda x: example_queue.put(
                        LabeledExampleRequest(example=x)))
            else:
                # We're using distributed_positives - only share positives and test set
                def add_example(example: LabeledExample) -> None:
                    if example.exampleSet.value is ExampleSet.TEST or example.label == '1':
                        example_queue.put(
                            LabeledExampleRequest(example=example))

                self._store_labeled_examples(examples, add_example)

        example_queue.put(None)
        future.result()
Exemplo n.º 4
0
    def _validate_test_results_thread(self, model_version: int) -> None:
        logger.info('Executing validation request')

        model = self._get_staging_model(model_version)
        result_queue = queue.Queue()

        future = self.nodes[0].internal.SubmitTestResults.future(to_iter(result_queue))
        result_queue.put(SubmitTestRequest(version=SubmitTestVersion(searchId=self._id, version=model_version)))

        def store_result(target: int, pred: float):
            result_queue.put(SubmitTestRequest(result=TestResult(label=str(target), score=pred)))

        eval_start = time.time()
        with self._data_manager.get_examples(ExampleSet.TEST) as test_dir:
            model.infer_dir(test_dir, store_result)

        result_queue.put(None)
        future.result()

        logger.info('Evaluated model in {:.3f} seconds'.format(time.time() - eval_start))
        logger.info('Submitted test results for model version {}'.format(model.version))