Пример #1
0
    def testCosineSimilarityProviderMatching(self):
        """Check correctness for querying the library with a library element."""

        num_examples = 20
        num_trials = 10
        data_dim = 5
        similarity = similarity_lib.CosineSimilarityProvider()
        library = np.float32(np.random.normal(size=(num_examples, data_dim)))
        library = tf.constant(library)
        library = similarity.preprocess_library(library)
        query_idx = tf.placeholder(shape=(), dtype=tf.int32)
        query = library[query_idx][np.newaxis, ...]
        (match_idx_op, match_similarity_op, _, _,
         _) = library_matching._max_similarity_match(library, query,
                                                     similarity)

        # Use queries that are rows of the library. This means that the maximum
        # cosine similarity is 1.0 and is achieved by the row index of the query
        # in the library.
        with tf.Session() as sess:
            for _ in range(num_trials):
                idx = np.random.randint(0, high=num_examples)
                match_idx, match_similarity = sess.run(
                    [match_idx_op, match_similarity_op],
                    feed_dict={query_idx: idx})
                # Fail if the match_idx != idx, and the similarity of match_idx does
                # is not tied with the argmax (which is 1.0 by construction).
                if match_idx != idx:
                    self.assertClose(match_similarity, 1.0)
Пример #2
0
    def testLibraryMatchingNoPredictions(self):
        """Test library_matching using hardcoded values with no predicted data."""

        y_observed = np.array([[1, 2], [2, 1], [0, 0]], dtype=np.float32)
        y_query = np.array([[2, 5], [-3, 1], [0, 0]], dtype=np.float32)

        ids_observed = self.make_ids(3)
        ids_query = self.make_ids(3)

        expected_predictions = ['0', '2', '0']

        masses_query = np.ones([3, 1], dtype=np.float32)
        query_data = self._package_data(ids=ids_query,
                                        spectrum=y_query,
                                        masses=masses_query)
        masses_observed = np.ones([3, 1], dtype=np.float32)
        observed_data = self._package_data(ids=ids_observed,
                                           spectrum=y_observed,
                                           masses=masses_observed)
        predicted_data = None
        library_matching_data = library_matching.LibraryMatchingData(
            query=query_data, observed=observed_data, predicted=predicted_data)

        _, predicted_data, _, _ = library_matching.library_matching(
            library_matching_data,
            predictor_fn=None,
            similarity_provider=similarity_lib.CosineSimilarityProvider(),
            mass_tolerance=3.0)

        with tf.Session() as sess:
            sess.run(tf.local_variables_initializer())
            predictions = sess.run(predicted_data[fmap_constants.INCHIKEY])

        self.assertAllEqual(expected_predictions, predictions)
Пример #3
0
    def perform_matching(self, ids_observed, ids_predicted, ids_query,
                         masses_observed, masses_predicted, masses_query,
                         y_observed, y_query, x_predicted, tf_transform,
                         mass_tolerance):

        query_data = self._package_data(ids=ids_query,
                                        spectrum=y_query,
                                        masses=masses_query)

        predicted_data = self._package_data(ids=ids_predicted,
                                            spectrum=None,
                                            masses=masses_predicted)
        predicted_data[PREDICTOR_INPUT_KEY] = tf.constant(x_predicted)

        observed_data = self._package_data(ids=ids_observed,
                                           spectrum=y_observed,
                                           masses=masses_observed)

        library_matching_data = library_matching.LibraryMatchingData(
            query=query_data, observed=observed_data, predicted=predicted_data)

        predictor_fn = lambda d: tf_transform(d[PREDICTOR_INPUT_KEY])
        similarity = similarity_lib.CosineSimilarityProvider()
        true_data, predicted_data, _, _ = (library_matching.library_matching(
            library_matching_data, predictor_fn, similarity, mass_tolerance,
            10))

        with tf.Session() as sess:
            sess.run(tf.local_variables_initializer())
            return sess.run([predicted_data, true_data])
Пример #4
0
    def testLibraryMatchingNoObserved(self):
        """Test library_matching using hardcoded values with no observed data."""

        tf_transform = lambda t: t + 2
        x_predicted = np.array([[1, 1], [-3, -2]], dtype=np.float32)
        y_query = np.array([[2, 5], [-3, 1], [0, 0]], dtype=np.float32)

        ids_predicted = self.make_ids(2)
        ids_query = self.make_ids(3)

        expected_predictions = ['0', '1', '0']

        masses_query = np.ones([3, 1], dtype=np.float32)
        query_data = self._package_data(ids=ids_query,
                                        spectrum=y_query,
                                        masses=masses_query)
        masses_predicted = np.ones([2, 1], dtype=np.float32)
        predicted_data = self._package_data(ids=ids_predicted,
                                            spectrum=None,
                                            masses=masses_predicted)
        predicted_data[PREDICTOR_INPUT_KEY] = tf.constant(x_predicted)

        observed_data = None
        library_matching_data = library_matching.LibraryMatchingData(
            query=query_data, observed=observed_data, predicted=predicted_data)

        predictor_fn = lambda d: tf_transform(d[PREDICTOR_INPUT_KEY])

        _, predicted_data, _, _ = library_matching.library_matching(
            library_matching_data,
            predictor_fn=predictor_fn,
            similarity_provider=similarity_lib.CosineSimilarityProvider(),
            mass_tolerance=3.0)

        with tf.Session() as sess:
            sess.run(tf.local_variables_initializer())
            predictions = sess.run(predicted_data[fmap_constants.INCHIKEY])

        self.assertAllEqual(expected_predictions, predictions)