Пример #1
0
 def _suggest(self, text, project, params):
     self.debug('Suggesting subjects for text "{}..." (len={})'.format(
         text[:20], len(text)))
     vectors = project.vectorizer.transform([text])
     docsim = self._index[vectors[0]]
     fullresult = VectorSuggestionResult(docsim, project.subjects)
     return fullresult.filter(limit=int(self.params['limit']))
Пример #2
0
 def _suggest(self, text, params):
     self.debug('Suggesting subjects for text "{}..." (len={})'.format(
         text[:20], len(text)))
     tokens = self.project.analyzer.tokenize_words(text)
     vectors = self.vectorizer.transform([" ".join(tokens)])
     docsim = self._index[vectors[0]]
     fullresult = VectorSuggestionResult(docsim, self.project.subjects)
     return fullresult.filter(limit=int(params['limit']))
Пример #3
0
def test_vector_suggestion_result_as_vector_destination(subject_index):
    orig_vector = np.ones(len(subject_index), dtype=np.float32)
    suggestions = VectorSuggestionResult(orig_vector)
    destination = np.zeros(len(subject_index), dtype=np.float32)
    assert not (destination == orig_vector).all()  # destination is all zeros

    vector = suggestions.as_vector(subject_index, destination=destination)
    assert vector is destination
    assert (destination == orig_vector).all()  # destination now all ones
Пример #4
0
 def _merge_hits_from_sources(self, hits_from_sources, params):
     score_vector = np.array(
         [hits.vector * weight for hits, weight in hits_from_sources],
         dtype=np.float32)
     results = self._model.predict(
         np.expand_dims(score_vector.transpose(), 0))
     return VectorSuggestionResult(results[0], self.project.subjects)
Пример #5
0
def test_hitfilter_vector_suggestion_results_with_deprecated_subjects(
        subject_index):
    subject_index.append('http://example.org/deprecated', None, None)
    vector = np.ones(len(subject_index))
    suggestions = VectorSuggestionResult(vector)
    filtered_suggestions = SuggestionFilter(subject_index)(suggestions)

    assert len(suggestions) == len(filtered_suggestions) \
        + len(subject_index.deprecated_ids())

    deprecated = SubjectSuggestion(uri='http://example.org/deprecated',
                                   label=None,
                                   notation=None,
                                   score=1.0)
    assert deprecated in suggestions.as_list(subject_index)
    assert deprecated not in filtered_suggestions.as_list(subject_index)
Пример #6
0
def merge_hits(weighted_hits, subject_index):
    """Merge hits from multiple sources. Input is a sequence of WeightedSuggestion
    objects. A SubjectIndex is needed to convert between subject IDs and URIs.
    Returns an SuggestionResult object."""

    weights = [whit.weight for whit in weighted_hits]
    scores = [whit.hits.as_vector(subject_index) for whit in weighted_hits]
    result = np.average(scores, axis=0, weights=weights)
    return VectorSuggestionResult(result)
Пример #7
0
 def _merge_hits_from_sources(self, hits_from_sources, params):
     score_vector = np.array([np.sqrt(hits.as_vector(subjects))
                              * weight * len(hits_from_sources)
                              for hits, weight, subjects
                              in hits_from_sources],
                             dtype=np.float32)
     results = self._model.predict(
         np.expand_dims(score_vector.transpose(), 0))
     return VectorSuggestionResult(results[0])
Пример #8
0
 def _suggest_chunks(self, chunktexts, project):
     results = []
     for chunktext in chunktexts:
         exampletext = self._inputs_to_exampletext(project, chunktext)
         if not exampletext:
             continue
         example = ' {}'.format(exampletext)
         result = self._model.predict(example)
         results.append(self._convert_result(result, project))
     if not results:  # empty result
         return ListSuggestionResult(hits=[],
                                     subject_index=project.subjects)
     return VectorSuggestionResult(
         np.array(results).mean(axis=0), project.subjects)
Пример #9
0
    def _suggest_chunks(self, chunktexts, params):
        results = []
        for chunktext in chunktexts:

            exampletext = self._inputs_to_exampletext(chunktext)
            if not exampletext:
                continue
            example = ' {}'.format(exampletext)
            result = self._model.predict(example)
            results.append(self._convert_result(result))
        if not results:  # empty result
            return ListSuggestionResult([])
        return VectorSuggestionResult(
            np.array(results, dtype=np.float32).mean(axis=0))
Пример #10
0
 def _merge_hits_from_sources(self, hits_from_sources, project, params):
     score_vector = np.array([hits.vector for hits, _ in hits_from_sources])
     discount_rate = float(
         self.params.get('discount_rate', self.DEFAULT_DISCOUNT_RATE))
     result = np.zeros(score_vector.shape[1])
     for subj_id in range(score_vector.shape[1]):
         subj_score_vector = score_vector[:, subj_id]
         if subj_score_vector.sum() > 0.0:
             raw_score, pred_score = self._calculate_scores(
                 subj_id, subj_score_vector)
             raw_weight = 1.0 / \
                 ((discount_rate * self._subject_freq[subj_id]) + 1)
             result[subj_id] = (raw_weight * raw_score) + \
                 (1.0 - raw_weight) * pred_score
     return VectorSuggestionResult(result, project.subjects)
Пример #11
0
def test_vector_suggestions_enforce_score_range(subject_index):
    orig_vector = np.array([-0.1, 0.0, 0.5, 1.0, 1.5], dtype=np.float32)
    suggestions = VectorSuggestionResult(orig_vector)
    vector = suggestions.as_vector(subject_index)
    expected = np.array([0.0, 0.0, 0.5, 1.0, 1.0], dtype=np.float32)
    assert (vector == expected).all()
Пример #12
0
def test_vector_suggestion_result_as_vector(subject_index):
    orig_vector = np.ones(len(subject_index), dtype=np.float32)
    suggestions = VectorSuggestionResult(orig_vector)
    vector = suggestions.as_vector(subject_index)
    assert (vector == orig_vector).all()
Пример #13
0
 def _prediction_to_result(self, prediction, params):
     vector = np.zeros(len(self.project.subjects), dtype=np.float32)
     for score, subject_id in prediction:
         vector[subject_id] = score
     result = VectorSuggestionResult(vector)
     return result.filter(self.project.subjects, limit=int(params['limit']))