def get_slice_data_from_file(dataset, split, class_idx, gmm, nr_pos, nr_neg): samples = _get_samples(dataset, class_idx, data_type=split, nr_pos=nr_pos, nr_neg=nr_neg)[0] len_descs = gmm.k + 2 * gmm.d * gmm.k sstats, labels, info = SstatsMap( os.path.join(dataset.SSTATS_DIR, 'stats.tmp')).get_merged( samples, len_descs) sstats = sstats.reshape((-1, len_descs)) binary_labels = tuple_labels_to_list_labels(labels, class_idx) return SliceData(sstats, binary_labels, info)
def master(src_cfg, suffix_in, suffix_out, K, N, nr_processes, double_norm): D = 64 dataset = Dataset(src_cfg, nr_clusters=K) samples = [ str(sample) for sample in dataset.get_data('train')[0] + dataset.get_data('test')[0] ] if double_norm: worker = double_normalization suffix = '.double_norm' gmm = load_gmm( os.path.join(dataset.FEAT_DIR + suffix_in, 'gmm', 'gmm_%d' % K)) else: worker = merge suffix = '' gmm = None path_in = os.path.join(dataset.FEAT_DIR + suffix_in, 'statistics_k_%d' % dataset.VOC_SIZE, 'stats.tmp') path_out = os.path.join(dataset.FEAT_DIR + suffix_out, 'statistics_k_%d' % dataset.VOC_SIZE, 'stats.tmp' + suffix) sstats_in = SstatsMap(path_in) sstats_out = SstatsMap(path_out) len_sstats = dataset.VOC_SIZE + 2 * D * dataset.VOC_SIZE kwargs = { 'N': N, 'sstats_in': sstats_in, 'sstats_out': sstats_out, 'len_sstats': len_sstats, 'gmm': gmm } if nr_processes > 1: nr_samples_per_process = len(samples) / nr_processes + 1 for ii in xrange(nr_processes): mp.Process(target=worker, args=(samples[ii * nr_samples_per_process:(ii + 1) * nr_samples_per_process], ), kwargs=kwargs).start() else: worker(samples, **kwargs)
def setup(self): self.nr_dims = 10 np.random.seed(1) self.data = SstatsMap('expected_results/test_data') # Generate some data. self.xx = np.random.randn(3, self.nr_dims) self.info = { 'label': 4, 'nr_slices': 3, 'begin_frames': [1, 3, 10], 'end_frames': [2, 9, 15], 'nr_descs_per_slice': [10, 9, 13]}
class TestData(): def setup(self): self.nr_dims = 10 np.random.seed(1) self.data = SstatsMap('expected_results/test_data') # Generate some data. self.xx = np.random.randn(3, self.nr_dims) self.info = { 'label': 4, 'nr_slices': 3, 'begin_frames': [1, 3, 10], 'end_frames': [2, 9, 15], 'nr_descs_per_slice': [10, 9, 13]} def test_write_data(self): self.data.write('test_1', self.xx[0]) self.data.write('test_2', self.xx[1], info=self.info) self.data.write('test_3', self.xx[2], info=dict(label=10)) def test_read_data(self): # Nose tests in random order. self.data.write('test_1', self.xx[0]) self.data.write('test_2', self.xx[1], info=self.info) test_1_data = self.data.read('test_1') assert_array_almost_equal(self.xx[0], test_1_data) test_2_data = self.data.read('test_2') assert_array_almost_equal(self.xx[1], test_2_data) test_2_info = self.data.read_info('test_2') assert test_2_info == self.info @raises(Exception) def test_read_data_exception(self): self.data.read('file_that_does_not_exist') def test_check_data(self): self.data.write('test_1', self.xx) self.data.write('test_2', []) assert self.data.check(['test_1'], self.nr_dims, verbose=False) == True assert self.data.check( ['test_1'], self.nr_dims + 1, verbose=False) == False assert self.data.check( ['test_1', 'test_2'], self.nr_dims, verbose=False) == False