Пример #1
0
    def _fit_top_2_special(self, predictions):
        ### Fitting across top-2 points (done differently than top-k, k=2).
        output_colourings = {}

        for output, probability in predictions.items():
            self.mapped_output(output)
            colour = self.output_colour(output)
            output_colourings[output] = (colour, probability)

        interpolation_points = {}

        for output, colour_probability in sorted(output_colourings.items(),
                                                 key=lambda item: item[1][1],
                                                 reverse=True)[:2]:
            colour, probability = colour_probability
            interpolation_points[colour] = (output, probability)

        if len(interpolation_points) == 1:
            return "rgb(%d, %d, %d)" % next(iter(interpolation_points.keys()))
        else:
            point_a, point_b = [item for item in interpolation_points.items()]
            distance = geometry.distance(point_a[0], point_b[0])
            # Not a typo: we want to invert their probabilities so that the most likely prediction gets the smallest distance, and visa versa.
            #                            v              v              v              v
            pdist = mlbase.regmax({
                point_a[1][0]: point_b[1][1],
                point_b[1][0]: point_a[1][1]
            })
            fit = geometry.fit_proportion(
                (point_a[0], point_b[0]),
                (pdist[point_a[1][0]], pdist[point_b[1][0]]))
            return "rgb(%d, %d, %d)" % tuple([round(i) for i in fit])
Пример #2
0
 def test_fit_point2_vline(self):
     reference_points = [[0, -2], [0, 4]]
     target_distances = [2, 4]
     expected = [0, 0]
     point, t = fit_point(reference_points, target_distances)
     self.assertTrue(
         math.isclose(distance(point, expected), 0, abs_tol=0.0001), point)
     self.assertLess(t, SMALL_MAX_T)
Пример #3
0
 def test_fit_point1_d0(self):
     reference_point = [5, 3]
     target_distance = 0
     point, t = fit_point([reference_point], [target_distance])
     self.assertTrue(
         math.isclose(distance(point, reference_point), 0, abs_tol=.0001),
         point)
     self.assertLess(t, SMALL_MAX_T)
Пример #4
0
 def test_fit_point3(self):
     reference_points = [[math.sqrt(8), math.sqrt(8)], [0, -4], [-4, 0]]
     target_distances = [4, 4, 4]
     expected = [0, 0]
     point, t = fit_point(reference_points,
                          target_distances,
                          visualize=True)
     self.assertTrue(
         math.isclose(distance(point, expected), 0, abs_tol=0.0001), point)
     self.assertLess(t, SMALL_MAX_T)
Пример #5
0
 def test_fit_point2_hline(self):
     reference_points = [[-1, 0], [5, 0]]
     target_distances = [2, 4]
     expected = [1, 0]
     point, t = fit_point(reference_points,
                          target_distances,
                          visualize=True)
     self.assertTrue(
         math.isclose(distance(point, expected), 0, abs_tol=0.0001), point)
     self.assertLess(t, SMALL_MAX_T)
Пример #6
0
    def test_fit_proportion(self):
        point_a = [0, 0]
        point_b = [1, 1]
        self.assertEqual(fit_proportion([point_a, point_b], [.5, .5]),
                         [.5, .5])
        point = fit_proportion([point_a, point_b], [.2, .8])
        self.assertTrue(
            math.isclose(distance(point, [.2, .2]), 0, abs_tol=.0001), point)
        point = fit_proportion([point_a, point_b], [.9, .1])
        self.assertTrue(
            math.isclose(distance(point, [.9, .9]), 0, abs_tol=.0001), point)

        # Test 3d as well
        point = fit_proportion([(0, 0, 0), (1, -1, 2)], [.9, .1])
        self.assertTrue(
            math.isclose(distance(point, [.9, -.9, 1.8]), 0, abs_tol=.0001),
            point)

        point = fit_proportion([(1, 2, 3), (3, 0, -3)], [.5, .5])
        self.assertTrue(
            math.isclose(distance(point, [2, 1, 0]), 0, abs_tol=.0001), point)
Пример #7
0
def find_closest(query_part, query_layer, query):
    assert len(query) > 0, "empty query - would simply return everything!"
    global activation_data
    result = []
    minimum_distance = None
    maximum_distance = None

    for candidate in activation_data:
        if query_part == candidate.part and query_layer == candidate.layer:
            sub_point = [candidate.point[axis] for axis, _ in query]
            target_point = [target for axis, target in query]
            distance = geometry.distance(sub_point, target_point)
            result += [(distance, candidate)]

            if minimum_distance is None or distance < minimum_distance:
                minimum_distance = distance

            if maximum_distance is None or distance > maximum_distance:
                maximum_distance = distance

    if len(result) == 0:
        return result

    q50 = maximum_distance * .5
    q25 = maximum_distance * .25
    q10 = maximum_distance * .1
    print("distance stats: [%.4f, %.4f] q10: %.4f q25: %.4f q50: %.4f" %
          (minimum_distance, maximum_distance, q10, q25, q50))
    sorted_result = sorted(result)
    cut10 = int(len(result) * 0.1)
    cut25 = int(len(result) * 0.25)
    cut50 = int(len(result) * 0.5)

    histogram_q10, q10 = build_histogram(sorted_result[:cut10])
    histogram_q25, q25 = build_histogram(sorted_result[:cut25])
    histogram_q50, q50 = build_histogram(sorted_result[:cut50])
    print("q10: %.4f q25: %.4f q50: %.4f" % (q10, q25, q50))
    print("histograms\n  q10: %s\n  q25: %s\n  q50: %s" %
          (adjutant.dict_as_str(histogram_q10),
           adjutant.dict_as_str(histogram_q25),
           adjutant.dict_as_str(histogram_q50)))
    return sorted_result, cut10, cut25, cut50
Пример #8
0
    def _fit_top_k(self, predictions):
        ## Fitting across top-k points.
        colour_probabilities = {}

        for output, probability in predictions.items():
            self.mapped_output(output)
            colour = self.output_colour(output)

            if colour not in colour_probabilities:
                colour_probabilities[colour] = 0

            colour_probabilities[colour] += probability

        colour, probability = max(colour_probabilities.items(),
                                  key=lambda item: item[1])
        total = sum(colour_probabilities.values())
        fit = geometry.fit_proportion((colour, [255, 255, 255]),
                                      (total - probability, probability))
        return "rgb(%d, %d, %d)" % tuple([round(i) for i in fit])
        maximum_distance = None

        for pair in itertools.combinations(
            [colour for colour in colour_probabilities.keys()], 2):
            distance = geometry.distance(pair[0], pair[1])

            if maximum_distance is None or distance > maximum_distance:
                maximum_distance = distance

        inverted = [
            item for item in mlbase.regmax(
                {c: 1.0 - p
                 for c, p in colour_probabilities.items()}).items()
        ]
        fit, _ = geometry.fit_point(
            [item[0] for item in inverted],
            [item[1] * maximum_distance for item in inverted])
        return "rgb(%d, %d, %d)" % tuple([round(i) for i in fit])
Пример #9
0
    def test_fit_point(self):
        reference_points_1 = [[0, -2, 3], [1, 0, -6], [2, 4, 9], [3, 6, -12]]
        #reference_points_1 = [[0, -2, 3], [1, 0, -6], [0, 4, 9], [3, 6, -12]]
        reference_points_2 = [[3, -2, 3], [2, 0, -6], [1, 4, 9], [0, 6, -12]]
        target_distances_1 = [5, 4, 3, 2]
        target_distances_2 = [1, 2, 3, 4]
        #target_distances_2 = [1, 1, 4, 3]

        point_11, t_11 = fit_point(reference_points_1,
                                   target_distances_1,
                                   visualize=True)
        self.assertTrue(all([not math.isnan(p) for p in point_11]), point_11)
        self.assertLess(t_11, BIG_MAX_T)

        point_12, t_12 = fit_point(reference_points_1,
                                   target_distances_2,
                                   visualize=True)
        self.assertTrue(all([not math.isnan(p) for p in point_12]), point_12)
        self.assertLess(t_12, BIG_MAX_T)

        point_21, t_21 = fit_point(reference_points_2,
                                   target_distances_1,
                                   visualize=True)
        self.assertTrue(all([not math.isnan(p) for p in point_21]), point_21)
        self.assertLess(t_21, BIG_MAX_T)

        point_22, t_22 = fit_point(reference_points_2,
                                   target_distances_2,
                                   visualize=True)
        self.assertTrue(all([not math.isnan(p) for p in point_22]), point_22)
        self.assertLess(t_22, BIG_MAX_T)

        self.assertGreater(distance(point_11, point_12), 1)
        self.assertGreater(distance(point_11, point_21), 1)
        self.assertGreater(distance(point_11, point_22), 1)

        self.assertGreater(distance(point_12, point_21), 1)
        self.assertGreater(distance(point_12, point_22), .75)

        self.assertGreater(distance(point_21, point_22), 1)
Пример #10
0
 def test_distance(self):
     self.assertEqual(distance([0, 1, 2], [0, 1, 2]), 0.0)
     self.assertEqual(distance([0, 1, 2], [-1, 1, 4]),
                      math.sqrt(1**2 + 2**2))
Пример #11
0
}

#for word, probability in predictions[key].items():
#    colour = self.colour_embeddings[word]
#
#    if colour not in interpolation_points:
#        interpolation_points[colour] = (word, probability)
#    else:
#        if interpolation_points[colour][1] < probability:
#            interpolation_points[colour] = (word, probability)
#
if len(interpolation_points) == 1:
    colours[key] = "rgb(%d, %d, %d)" % next(iter(interpolation_points.keys()))
else:
    maximum_distance = None

    for pair in itertools.combinations([colour for colour in interpolation_points.keys()], 2):
        distance = geometry.distance(pair[0], pair[1])

        if maximum_distance is None or distance > maximum_distance:
            maximum_distance = distance

    lowest_probability = min([p for w, p in interpolation_points.values()])
    highest_probability = max([p for w, p in interpolation_points.values()])
    maximum_domain = highest_probability + lowest_probability
    prediction_distances = [(w, maximum_distance + (-p * maximum_distance / maximum_domain)) for w, p in interpolation_points.values()]
    fit, _ = geometry.fit_point([colour_embeddings[item[0]] for item in prediction_distances], [item[1] for item in prediction_distances], epsilon=0.1, visualize=False)
    print("rgb(%d, %d, %d)" % tuple([round(i) for i in fit]))


Пример #12
0
def measure(lstm, data_dir, kind, keys):
    sequence_changes = {}
    global_minimum = {key: (None, None, None) for key in keys}
    global_maximum = {key: (None, None, None) for key in keys}

    for j, xy in enumerate(data.stream_data(data_dir, kind)):
        if j % 100 == 0:
            logging.debug("At the %d instance." % (j))

        sequence = tuple([item[0] for item in xy.x]) + (xy.y[-1][0], )
        stepwise_rnn = lstm.stepwise(handle_unknown=True)
        change_distances = {key: [] for key in keys}
        previous_states = {}
        minimum = {key: (None, None) for key in keys}
        maximum = {key: (None, None) for key in keys}

        for i, word_pos in enumerate(xy.x):
            result, instruments = stepwise_rnn.step(word_pos[0],
                                                    rnn.LSTM_INSTRUMENTS)

            for part, layer in lstm.part_layers():
                key = lstm.encode_key(part, layer)

                if key in keys:
                    current_state = instruments[part][layer]

                    if key in previous_states:
                        distance = geometry.distance(previous_states[key],
                                                     current_state)
                    else:
                        distance = geometry.hypotenuse(current_state)

                    change_distances[key] += [distance]
                    previous_states[key] = current_state

                    if minimum[key] == (None,
                                        None) or distance < minimum[key][0]:
                        minimum[key] = (distance, i)

                    if maximum[key] == (None,
                                        None) or distance > maximum[key][0]:
                        maximum[key] = (distance, i)

        for key in keys:
            if global_minimum[key] == (
                    None, None,
                    None) or minimum[key][0] < global_minimum[key][0]:
                global_minimum[key] = minimum[key] + (sequence, )
                # Only keeping track of the more notable sequence changes
                sequence_changes[sequence] = change_distances
                sequence_str, changes_str = stringify(
                    sequence, sequence_changes[sequence][key])
                logging.debug(
                    "Noting minimum for %s of %.4f @%d:\n  %s\n  %s" %
                    (key, minimum[key][0], minimum[key][1], sequence_str,
                     changes_str))

            if global_maximum[key] == (
                    None, None,
                    None) or maximum[key][0] > global_maximum[key][0]:
                global_maximum[key] = maximum[key] + (sequence, )
                # Only keeping track of the more notable sequence changes
                sequence_changes[sequence] = change_distances
                sequence_str, changes_str = stringify(
                    sequence, sequence_changes[sequence][key])
                logging.debug(
                    "Noting maximum for %s of %.4f @%d:\n  %s\n  %s" %
                    (key, maximum[key][0], maximum[key][1], sequence_str,
                     changes_str))

    return global_minimum, global_maximum, sequence_changes