示例#1
0
 def test_longer(self):
     ass = Assembly()
     ass.coalesce(np.array([[0., 0.], [2., 2.]]))
     labels = ass.coalesce(np.array([[1.9, 1.9], [3., 3.], [0.1, 0.1]]))
     self.assertTrue(np.array_equal(labels, [1, 2, 0]))
     self.assertTrue(
         np.array_equal(ass.points, [[0.1, 0.1], [1.9, 1.9], [3., 3.]]))
示例#2
0
 def test_coalesce(self):
     ass = Assembly()
     ass.coalesce(np.array([[0., 0.], [2., 2.]]))
     ass.coalesce(np.array([[1.9, 1.9], [3, 3], [0.1, 0.1]]))
     labels = ass.coalesce(np.array([[2.9, 2.9], [2.1, 2.1]]))
     self.assertTrue(np.array_equal(labels, [2, 1]))
     self.assertTrue(
         np.array_equal(ass.points, [[0.1, 0.1], [2.1, 2.1], [2.9, 2.9]]))
示例#3
0
class OC:
    """Wraps a mini batch MCMC algorithm"""
    def __init__(self, **kwargs):
        """
        Build a new OC instance
        """
        frame_size = kwargs.get('frame_size', 100)
        self.frame = deque([], frame_size)
        del kwargs['frame_size']
        self.algo = Batch(MCMC, **kwargs)
        self.ass = Assembly()

    def push_predict(self, points, columns):
        """
        push and predict data from the dataflow
        :param points: a chunk of data
        :param columns: the column names
        :return: the result that can be send to the dataviz server
        """
        arr = self._extend(points)
        self.algo.push(arr)
        centroids, labels = self.algo.predict(arr)
        return self._make_result(centroids, labels, columns)

    def _extend(self, points):
        """
        keep the given chunk in the fixed size frame. Old data can be reused
        if the chunk is smaller than the frame.
        :param points: a chunk of data
        :return: the content of the frame as a ndarray
        """
        self.frame.extend(points)
        return np.array(self.frame)

    def _make_result(self, centroids, labels, columns):
        """
        Build a result ready for the dataviz server. The result contains
        all known centers, even if empty for the current centroids
        :param centroids: the current centroids
        :param labels: the labels for the data in the frame
        :param columns: the column names
        :return: the result ready fo the dataviz server
        """
        indices = self.ass.coalesce(centroids)
        counts = np.zeros(len(self.ass.points))
        counts[indices] = np.bincount(labels)
        return {
            'centers': self.ass.points.tolist(),
            'counts': counts.tolist(),
            'columns': columns
        }

    def run(self):
        """Run the underlying algorithm"""
        return self.algo.run()
示例#4
0
 def test_init(self):
     ass = Assembly()
     labels = ass.coalesce(np.array([[0., 0.], [2., 2.]]))
     self.assertTrue(np.array_equal(labels, [0, 1]))
示例#5
0
 def test_shorter(self):
     ass = Assembly()
     ass.coalesce(np.array([[0., 0.], [2., 2.]]))
     labels = ass.coalesce(np.array([[1.9, 1.9]]))
     self.assertTrue(np.array_equal(labels, [1]))
     self.assertTrue(np.array_equal(ass.points, [[0., 0.], [1.9, 1.9]]))