Exemplo n.º 1
0
    def test_get_idxs_to_balance_class_count_other_highest(self):

        self.l[10:60, 1] = 0
        self.l[10:30, 1] = 1
        bal = Balancer(np.copy(self.l))
        counts = bal.get_class_count(other_clname=CLNAME_OTHER)
        assert_in(CLNAME_OTHER, counts.keys())

        assert_equals(counts[0], 10)
        assert_equals(counts[1], 20)
        assert_equals(counts[CLNAME_OTHER], 70)
        assert_equals(counts[CLNAME_OTHER], np.max(counts.values()),
                      "this test requires class count for %s to be highest!")
        tolerance_order = 1
        idxs = bal.get_idxs_to_balance_class_count(counts.values())
        assert_almost_equal(np.count_nonzero(np.logical_and(idxs >= 0,
                                                            idxs < 10)
                                             ),
                            10 + (70 - 10), tolerance_order)
        assert_almost_equal(np.count_nonzero(np.logical_and(idxs >= 10,
                                                            idxs < 30)
                                             ),
                            20 + (70 - 20), tolerance_order)
        assert_equals(np.count_nonzero(idxs >= 30),
                      70, tolerance_order)
Exemplo n.º 2
0
def balance_class_count_hdf5(fpath, keys,
                             key_label='label',
                             other_clname=CLNAME_OTHER):
    """ Resample keys in an HDF5 to generate a near balanced dataset.
    Returns a dictionary with resampled features and ground truth
    and indicies from the original label that were sampled.
    Not suitable for very large datasets.
    
    fpath -- path to HDF5 file
    keys -- keys to resample (e.g. features)
    Keyword arguments:
    key_label -- key for ground truth data in HDF5
    other_clname -- name for negative class (None if non-existent)
    """
    h_src = h5py.File(fpath, 'r')
    labls = h_src[key_label][:]
    bal = Balancer(np.squeeze(labls))
    class_count = bal.get_class_count(other_clname=other_clname)
    idxs = bal.get_idxs_to_balance_class_count(class_count.values())
    np.random.shuffle(idxs)  # shuffle the array along the first index of a multi-dimensional array, in-place

    dict_balanced = {key_label : labls[idxs]}
    for k in keys:
        dict_balanced[k] = h_src[k][:][idxs]
    return dict_balanced, idxs
Exemplo n.º 3
0
def balance_class_count_hdf5(fpath,
                             keys,
                             key_label='label',
                             other_clname=CLNAME_OTHER):
    """ Resample keys in an HDF5 to generate a near balanced dataset.
    Returns a dictionary with resampled features and ground truth
    and indicies from the original label that were sampled.
    Not suitable for very large datasets.
    
    fpath -- path to HDF5 file
    keys -- keys to resample (e.g. features)
    Keyword arguments:
    key_label -- key for ground truth data in HDF5
    other_clname -- name for negative class (None if non-existent)
    """
    h_src = h5py.File(fpath, 'r')
    labls = h_src[key_label][:]
    bal = Balancer(np.squeeze(labls))
    class_count = bal.get_class_count(other_clname=other_clname)
    idxs = bal.get_idxs_to_balance_class_count(class_count.values())
    np.random.shuffle(
        idxs
    )  # shuffle the array along the first index of a multi-dimensional array, in-place

    dict_balanced = {key_label: labls[idxs]}
    for k in keys:
        dict_balanced[k] = h_src[k][:][idxs]
    return dict_balanced, idxs
Exemplo n.º 4
0
    def test_get_class_count_no_other(self):

        counts = Balancer(np.copy(self.l)).get_class_count(other_clname=None)
        assert_is_instance(counts, dict, "Unexpected return instance type.")
        assert_list_equal(counts.keys(), range(self.l.shape[-1]),
                          "Expecting a key for each class.")
        for key in counts.keys():
            assert_equals(counts[key], np.sum(self.l[:, int(key)]),
                          "Unexpected count for class '%s'" % (key,))

        assert_greater(np.sum(counts.values()), 0)
Exemplo n.º 5
0
    def test_get_class_count_no_other(self):

        counts = Balancer(np.copy(self.l)).get_class_count(other_clname=None)
        assert_is_instance(counts, dict, "Unexpected return instance type.")
        assert_list_equal(counts.keys(), range(self.l.shape[-1]),
                          "Expecting a key for each class.")
        for key in counts.keys():
            assert_equals(counts[key], np.sum(self.l[:, int(key)]),
                          "Unexpected count for class '%s'" % (key, ))

        assert_greater(np.sum(counts.values()), 0)
Exemplo n.º 6
0
def get_class_count_hdf5(fpath, key_label='label', other_clname=CLNAME_OTHER):
    """ Count per-class instances in HDF5 and return a dictionary of class ids
    and per-class count

    fpath -- path to HDF5 file
    Keyword arguments:
    key_label -- key for ground truth data in HDF5
    other_clname -- name for negative class (None if non-existent)
    """
    h = h5py.File(fpath, 'r')
    b = Balancer(np.squeeze(h[key_label]))
    return b.get_class_count(other_clname=other_clname)
Exemplo n.º 7
0
def save_balanced_sampled_class_count_hdf5(fpath,
                                           keys,
                                           fpath_dst,
                                           key_label='label',
                                           other_clname=CLNAME_OTHER,
                                           chunks=None,
                                           target_count=None):
    """ Resample keys in an HDF5 to generate a near balanced dataset
    and save into a new HDF5.
    Returns indicies from the original label that were sampled.
    Not suitable for very large datasets.

    Classes with count < target_count will sub-sampled without replacement.
    Classes with count > target_count will get over-sampled.
    Classes with count equal to target_count will be copied.

    fpath -- path to source HDF5 file
    keys -- keys to resample (e.g. features)
    fpath_dst -- path to destination HDF5 file
    Keyword arguments:
    key_label -- key for ground truth data in HDF5
    other_clname -- name for negative class (None if non-existent)
    chunks -- forward chunks parameter to use during hdf5 writing
    target_count -- per-class count to target when sampling
    """
    if os.path.abspath(fpath) == os.path.abspath(fpath_dst):
        raise IOError("Cannot read and write to the same file (%s) (%s)" %
                      (fpath, fpath_dst))

    with h5py.File(fpath, 'r') as h_src:
        labls = h_src[key_label][:]
        bal = Balancer(np.squeeze(labls))
        class_count = bal.get_class_count(other_clname=other_clname)
        idxs = bal.get_idxs_to_balance_class_count(class_count.values(),
                                                   target_count)
        np.random.shuffle(
            idxs
        )  # shuffle the array along the first index of a multi-dimensional array, in-place
        with h5py.File(fpath_dst, 'w') as h_dst:
            h_dst[key_label] = labls[idxs]
            for k in keys:
                dataset_src = h_src[k]
                shape_new = list(dataset_src.shape)
                shape_new[0] = len(idxs)
                dataset_dst = h_dst.create_dataset(k,
                                                   tuple(shape_new),
                                                   dataset_src.dtype,
                                                   chunks=chunks)
                for idx_dst, idx_src in enumerate(idxs):
                    dataset_dst[idx_dst] = dataset_src[idx_src]
    return idxs
Exemplo n.º 8
0
def get_class_count_hdf5(fpath,
                         key_label='label',
                         other_clname=CLNAME_OTHER):
    """ Count per-class instances in HDF5 and return a dictionary of class ids
    and per-class count

    fpath -- path to HDF5 file
    Keyword arguments:
    key_label -- key for ground truth data in HDF5
    other_clname -- name for negative class (None if non-existent)
    """
    h = h5py.File(fpath, 'r')
    b = Balancer(np.squeeze(h[key_label]))
    return b.get_class_count(other_clname=other_clname)
Exemplo n.º 9
0
def save_balanced_sampled_class_count_hdf5(fpath,
                                           keys,
                                           fpath_dst,
                                           key_label='label',
                                           other_clname=CLNAME_OTHER,
                                           chunks=None,
                                           target_count=None
                                           ):
    """ Resample keys in an HDF5 to generate a near balanced dataset
    and save into a new HDF5.
    Returns indicies from the original label that were sampled.
    Not suitable for very large datasets.

    Classes with count < target_count will sub-sampled without replacement.
    Classes with count > target_count will get over-sampled.
    Classes with count equal to target_count will be copied.

    fpath -- path to source HDF5 file
    keys -- keys to resample (e.g. features)
    fpath_dst -- path to destination HDF5 file
    Keyword arguments:
    key_label -- key for ground truth data in HDF5
    other_clname -- name for negative class (None if non-existent)
    chunks -- forward chunks parameter to use during hdf5 writing
    target_count -- per-class count to target when sampling
    """
    if os.path.abspath(fpath) == os.path.abspath(fpath_dst):
        raise IOError("Cannot read and write to the same file (%s) (%s)" %
                      (fpath, fpath_dst))

    with h5py.File(fpath, 'r') as h_src:
        labls = h_src[key_label][:]
        bal = Balancer(np.squeeze(labls))
        class_count = bal.get_class_count(other_clname=other_clname)
        idxs = bal.get_idxs_to_balance_class_count(class_count.values(),
                                                   target_count)
        np.random.shuffle(idxs)  # shuffle the array along the first index of a multi-dimensional array, in-place
        with h5py.File(fpath_dst, 'w') as h_dst:
            h_dst[key_label] = labls[idxs]
            for k in keys:
                dataset_src = h_src[k]
                shape_new = list(dataset_src.shape)
                shape_new[0] = len(idxs)
                dataset_dst = h_dst.create_dataset(k, tuple(shape_new),
                                                   dataset_src.dtype,
                                                   chunks=chunks)
                for idx_dst, idx_src in enumerate(idxs):
                    dataset_dst[idx_dst] = dataset_src[idx_src]
    return idxs
Exemplo n.º 10
0
    def test_get_class_count_other_empty(self):

        other_clname = 'other_class_bin'
        counts = Balancer(np.copy(self.l)).get_class_count(other_clname=other_clname)
        assert_is_instance(counts, dict, "Unexpected return instance type.")
        assert_equals(len(counts.keys()), self.l.shape[-1] + 1,
                      "Expecting a key for each class + 1 for 'other'.")

        assert_in(other_clname, counts.keys())

        for key in counts.keys():
            if key == other_clname:
                assert_equals(counts[key], 0,
                              "Unexpected count for 'other' class")
            else:
                assert_equals(counts[key], np.sum(self.l[:, int(key)]),
                              "Unexpected count for class '%s'" % (key,))
Exemplo n.º 11
0
    def test_get_idxs_to_balance_class_count_other_not_highest(self):

        bal = Balancer(np.copy(self.l))
        counts = bal.get_class_count(other_clname=CLNAME_OTHER)
        assert_in(CLNAME_OTHER, counts.keys())

        assert_equals(counts[0], 10)
        assert_equals(counts[1], 50)
        assert_equals(counts[CLNAME_OTHER], 40)
        tolerance_order = 1
        idxs = bal.get_idxs_to_balance_class_count(counts.values())
        assert_almost_equal(
            np.count_nonzero(np.logical_and(idxs >= 0, idxs < 10)),
            10 + (50 - 10), tolerance_order)
        assert_equals(np.count_nonzero(np.logical_and(idxs >= 10, idxs < 60)),
                      50, 1)
        assert_almost_equal(np.count_nonzero(idxs >= 60), 40 + (50 - 40),
                            tolerance_order)
Exemplo n.º 12
0
    def test_get_class_count_other_empty(self):

        other_clname = 'other_class_bin'
        counts = Balancer(np.copy(
            self.l)).get_class_count(other_clname=other_clname)
        assert_is_instance(counts, dict, "Unexpected return instance type.")
        assert_equals(len(counts.keys()), self.l.shape[-1] + 1,
                      "Expecting a key for each class + 1 for 'other'.")

        assert_in(other_clname, counts.keys())

        for key in counts.keys():
            if key == other_clname:
                assert_equals(counts[key], 0,
                              "Unexpected count for 'other' class")
            else:
                assert_equals(counts[key], np.sum(self.l[:, int(key)]),
                              "Unexpected count for class '%s'" % (key, ))
Exemplo n.º 13
0
    def test_get_idxs_to_balance_class_count_other_not_highest(self):

        bal = Balancer(np.copy(self.l))
        counts = bal.get_class_count(other_clname=CLNAME_OTHER)
        assert_in(CLNAME_OTHER, counts.keys())

        assert_equals(counts[0], 10)
        assert_equals(counts[1], 50)
        assert_equals(counts[CLNAME_OTHER], 40)

        for target_count in [500]:#[10, 20, 500]:
            idxs = bal.sample_idxs_to_target_count(counts.values(),
                                                   target_count)

            assert_equals(idxs.size, (self.num_classes + 1) * target_count)
            assert_equals(np.count_nonzero(idxs < 10), target_count)
            assert_equals(np.count_nonzero(np.logical_and(idxs >= 10, idxs < 60)),
                          target_count)
            assert_equals(np.count_nonzero(idxs >= 60), target_count)
Exemplo n.º 14
0
    def test_get_idxs_to_balance_class_count_other_not_highest(self):

        bal = Balancer(np.copy(self.l))
        counts = bal.get_class_count(other_clname=CLNAME_OTHER)
        assert_in(CLNAME_OTHER, counts.keys())

        assert_equals(counts[0], 10)
        assert_equals(counts[1], 50)
        assert_equals(counts[CLNAME_OTHER], 40)

        for target_count in [500]:  #[10, 20, 500]:
            idxs = bal.sample_idxs_to_target_count(counts.values(),
                                                   target_count)

            assert_equals(idxs.size, (self.num_classes + 1) * target_count)
            assert_equals(np.count_nonzero(idxs < 10), target_count)
            assert_equals(
                np.count_nonzero(np.logical_and(idxs >= 10, idxs < 60)),
                target_count)
            assert_equals(np.count_nonzero(idxs >= 60), target_count)
Exemplo n.º 15
0
    def test_get_idxs_to_balance_class_count_other_not_highest(self):

        bal = Balancer(np.copy(self.l))
        counts = bal.get_class_count(other_clname=CLNAME_OTHER)
        assert_in(CLNAME_OTHER, counts.keys())

        assert_equals(counts[0], 10)
        assert_equals(counts[1], 50)
        assert_equals(counts[CLNAME_OTHER], 40)
        tolerance_order = 1
        idxs = bal.get_idxs_to_balance_class_count(counts.values())
        assert_almost_equal(np.count_nonzero(np.logical_and(idxs >= 0,
                                                            idxs < 10)
                                             ),
                            10 + (50 - 10), tolerance_order)
        assert_equals(np.count_nonzero(np.logical_and(idxs >= 10,
                                                      idxs < 60)
                                       ),
                      50, 1)
        assert_almost_equal(np.count_nonzero(idxs >= 60),
                            40 + (50 - 40), tolerance_order)
Exemplo n.º 16
0
    def test_get_class_count_other_non_empty(self):

        other_clname = 'foo'
        n, num_classes = self.l.shape
        # append label vector for 'other' class
        labls = np.vstack((self.l,
                           np.zeros((n * 2, num_classes), dtype=self.l.dtype)))
        counts = Balancer(labls).get_class_count(other_clname=other_clname)
        assert_is_instance(counts, dict, "Unexpected return instance type.")
        assert_equals(len(counts.keys()), self.l.shape[-1] + 1,
                      "Expecting a key for each class + 1 for 'other'.")

        assert_in(other_clname, counts.keys())

        for key in counts.keys():
            if key == other_clname:
                assert_equals(counts[key], n * 2,
                              "Unexpected count for '%s' class" % (other_clname,))
            else:
                assert_equals(counts[key], np.sum(self.l[:, int(key)]),
                              "Unexpected count for class '%s'" % (key,))
Exemplo n.º 17
0
    def test_get_idxs_to_balance_class_count_no_other(self):

        new_col = np.zeros((len(self.l), 1))
        labls = np.hstack((self.l, new_col))
        labls[60:, -1] = 1
        bal = Balancer(labls)
        counts = bal.get_class_count(other_clname=None)
        assert_not_in(CLNAME_OTHER, counts.keys())

        assert_equals(counts[0], 10)
        assert_equals(counts[1], 50)
        assert_equals(counts[2], 40)
        tolerance_order = 1
        idxs = bal.get_idxs_to_balance_class_count(counts.values())
        assert_almost_equal(
            np.count_nonzero(np.logical_and(idxs >= 0, idxs < 10)),
            10 + (50 - 10), tolerance_order)
        assert_equals(np.count_nonzero(np.logical_and(idxs >= 10, idxs < 60)),
                      50, 1)
        assert_almost_equal(np.count_nonzero(idxs >= 60), 40 + (50 - 40),
                            tolerance_order)
Exemplo n.º 18
0
    def test_get_idxs_to_balance_class_count_other_highest(self):

        self.l[10:60, 1] = 0
        self.l[10:30, 1] = 1
        bal = Balancer(np.copy(self.l))
        counts = bal.get_class_count(other_clname=CLNAME_OTHER)
        assert_in(CLNAME_OTHER, counts.keys())

        assert_equals(counts[0], 10)
        assert_equals(counts[1], 20)
        assert_equals(counts[CLNAME_OTHER], 70)
        assert_equals(counts[CLNAME_OTHER], np.max(counts.values()),
                      "this test requires class count for %s to be highest!")
        tolerance_order = 1
        idxs = bal.get_idxs_to_balance_class_count(counts.values())
        assert_almost_equal(
            np.count_nonzero(np.logical_and(idxs >= 0, idxs < 10)),
            10 + (70 - 10), tolerance_order)
        assert_almost_equal(
            np.count_nonzero(np.logical_and(idxs >= 10, idxs < 30)),
            20 + (70 - 20), tolerance_order)
        assert_equals(np.count_nonzero(idxs >= 30), 70, tolerance_order)
Exemplo n.º 19
0
    def test_get_class_count_other_non_empty(self):

        other_clname = 'foo'
        n, num_classes = self.l.shape
        # append label vector for 'other' class
        labls = np.vstack(
            (self.l, np.zeros((n * 2, num_classes), dtype=self.l.dtype)))
        counts = Balancer(labls).get_class_count(other_clname=other_clname)
        assert_is_instance(counts, dict, "Unexpected return instance type.")
        assert_equals(len(counts.keys()), self.l.shape[-1] + 1,
                      "Expecting a key for each class + 1 for 'other'.")

        assert_in(other_clname, counts.keys())

        for key in counts.keys():
            if key == other_clname:
                assert_equals(
                    counts[key], n * 2,
                    "Unexpected count for '%s' class" % (other_clname, ))
            else:
                assert_equals(counts[key], np.sum(self.l[:, int(key)]),
                              "Unexpected count for class '%s'" % (key, ))
Exemplo n.º 20
0
    def test_get_idxs_to_balance_class_count_no_other(self):

        new_col = np.zeros((len(self.l), 1))
        labls = np.hstack((self.l, new_col))
        labls[60:, -1] = 1
        bal = Balancer(labls)
        counts = bal.get_class_count(other_clname=None)
        assert_not_in(CLNAME_OTHER, counts.keys())

        assert_equals(counts[0], 10)
        assert_equals(counts[1], 50)
        assert_equals(counts[2], 40)
        tolerance_order = 1
        idxs = bal.get_idxs_to_balance_class_count(counts.values())
        assert_almost_equal(np.count_nonzero(np.logical_and(idxs >= 0,
                                                            idxs < 10)
                                             ),
                            10 + (50 - 10), tolerance_order)
        assert_equals(np.count_nonzero(np.logical_and(idxs >= 10,
                                                      idxs < 60)
                                       ),
                      50, 1)
        assert_almost_equal(np.count_nonzero(idxs >= 60),
                            40 + (50 - 40), tolerance_order)
Exemplo n.º 21
0
# Copyright Pololu Corporation.  For more information, see https://www.pololu.com/
from flask import Flask
from flask import render_template
from flask import redirect
from subprocess import call
app = Flask(__name__,
            static_folder='server_balboa_resources/static',
            template_folder='server_balboa_resources/templates')
app.debug = True

from a_star import AStar
a_star = AStar()

from balance import Balancer
balancer = Balancer()

import json

led0_state = False
led1_state = False
led2_state = False


@app.route("/")
def hello():
    return render_template("index.html")


@app.route("/status.json")
def status():
Exemplo n.º 22
0
 def test_get_class_count_other_default(self):
     counts = Balancer(np.copy(self.l)).get_class_count(other_clname=CLNAME_OTHER)
     assert_in(CLNAME_OTHER, counts.keys())
Exemplo n.º 23
0
 def test_get_class_count_other_default(self):
     counts = Balancer(np.copy(
         self.l)).get_class_count(other_clname=CLNAME_OTHER)
     assert_in(CLNAME_OTHER, counts.keys())