Пример #1
0
    def test_pca(self):
        django.setup()
        from koe.models import Feature, Aggregation, FullTensorData, Database
        from koe.ts_utils import bytes_to_ndarray, get_rawdata_from_binary

        database = Database.objects.get(name='Bellbird_TMI')
        features = Feature.objects.all().order_by('id')
        aggregations = Aggregation.objects.all().order_by('id')
        features_hash = '-'.join(
            list(map(str, features.values_list('id', flat=True))))
        aggregations_hash = '-'.join(
            list(map(str, aggregations.values_list('id', flat=True))))

        full_tensor = FullTensorData.objects.filter(
            database=database,
            features_hash=features_hash,
            aggregations_hash=aggregations_hash).first()
        if full_tensor is None:
            raise Exception('Tensor not found')

        full_sids_path = full_tensor.get_sids_path()
        full_bytes_path = full_tensor.get_bytes_path()

        sids = bytes_to_ndarray(full_sids_path, np.int32)
        full_data = get_rawdata_from_binary(full_bytes_path, len(sids))

        with tictoc('PCA'):
            dim_reduce_func = pca(n_components=50)
            dim_reduce_func.fit_transform(full_data)
Пример #2
0
    def _test_update(self, nupdate):
        _, arrs_for_update = create_random_id_based_dataset(nupdate)

        id2arr = {x: y for x, y in zip(self.ids, self.arrs)}

        # We want to make sure there are new ids (to be appended) and old ids (to be updated)
        nkeeps = nupdate // 2
        nnews = nupdate - nkeeps

        maxid = np.max(self.ids)
        new_ids = np.arange(maxid + 1, maxid + nnews + 1)
        keep_ids = self.ids[:nkeeps]

        ids_for_update = np.concatenate((keep_ids, new_ids))

        for x, y in zip(ids_for_update, arrs_for_update):
            id2arr[x] = y

        self.ids = np.array(list(id2arr.keys()))
        np.random.shuffle(self.ids)

        self.arrs = [id2arr[i] for i in self.ids]

        with tictoc('Test update {} items'.format(nupdate)):
            bs.store(ids_for_update, arrs_for_update, self.loc)

        retrieved_arrs = bs.retrieve(self.ids, self.loc)
        for id, retrieved_arr in zip(self.ids, retrieved_arrs):
            self.assertTrue(np.allclose(id2arr[id], retrieved_arr))
Пример #3
0
    def _test_update(self, nupdate):
        _, new_arrs = create_random_id_based_dataset()
        npoints = NUM_POINTS

        id2arr = {x: y for x, y in zip(self.ids, self.arrs)}

        # We want to make sure there are new ids (to be appended) and old ids (to be updated)
        while True:
            new_ids = np.arange(npoints * 10)
            np.random.shuffle(new_ids)
            new_ids = new_ids[:nupdate]
            nnew = np.array([x for x in new_ids if x not in self.ids])
            if 0 < len(nnew) < npoints:
                break

        for x, y in zip(new_ids, new_arrs):
            id2arr[x] = y

        self.ids = np.array(list(id2arr.keys()))
        np.random.shuffle(self.ids)

        self.arrs = [id2arr[i] for i in self.ids]

        with tictoc('Test update {} items'.format(nupdate)):
            bs.store(new_ids, new_arrs, self.index_filename,
                     self.value_filename)

        retrieved_arrs = bs.retrieve(self.ids, self.index_filename,
                                     self.value_filename)
        for id, retrieved_arr in zip(self.ids, retrieved_arrs):
            self.assertTrue(np.allclose(id2arr[id], retrieved_arr))
Пример #4
0
    def _test_retrieve_ids(self, limit=None):
        with tictoc('Test retrieving IDs limit={}'.format(limit)):
            ids = bs.retrieve_ids(self.loc, limit)

        if limit:
            min, max = limit
            self.assertGreaterEqual(ids.min(), min - bs.BATCH_SIZE)
            self.assertLessEqual(ids.max(), max + bs.BATCH_SIZE)
    def test_spectral_derivatives(self):
        cls = self.__class__
        with tictoc('test_spectral_derivatives'):
            hopsize = cls.window_length - cls.noverlap
            taper1 = cls.tapers[:, 0]
            taper2 = cls.tapers[:, 1]

            tapered1 = stft(y=cls.sig,
                            n_fft=cls.nfft,
                            win_length=cls.window_length,
                            hop_length=hopsize,
                            window=taper1,
                            center=False,
                            dtype=np.complex128)
            tapered2 = stft(y=cls.sig,
                            n_fft=cls.nfft,
                            win_length=cls.window_length,
                            hop_length=hopsize,
                            window=taper2,
                            center=False,
                            dtype=np.complex128)

            real1 = np.real(tapered1)
            real2 = np.real(tapered2)
            imag1 = np.imag(tapered1)
            imag2 = np.imag(tapered2)

            time_deriv = (-real1 * real2) - (imag1 * imag2)
            freq_deriv = (imag1 * real2) - (real1 * imag2)

            pfm = np.max(time_deriv,
                         axis=0) / (np.max(freq_deriv, axis=0) + 0.1)
            fm = np.arctan(pfm)
            cfm = np.cos(fm)
            sfm = np.sin(fm)
            derivs = (time_deriv * sfm + freq_deriv * cfm)
            derivs[0:3, :] = 0

            self.assertTrue(np.allclose(time_deriv, cls.time_deriv))
            self.assertTrue(np.allclose(freq_deriv, cls.freq_deriv))
            self.assertTrue(np.allclose(derivs, cls.derivs))

            derivs_abs = np.abs(derivs)

            row_thresh = 0.3 * np.mean(derivs_abs, axis=0)
            col_thresh = 100 * np.median(derivs_abs, axis=1)

            mask_row = derivs_abs <= row_thresh[None, :]
            mask_col = derivs_abs <= col_thresh[:, None]
            mask = (mask_row | mask_col)
            derivs[mask] = -0.1

            zcy, zcx = find_zc(derivs)
            zcx.sort()
            zcy.sort()

            self.assertTrue(np.allclose(zcx, cls.peaks_x))
            self.assertTrue(np.allclose(zcy, cls.peaks_y))
Пример #6
0
    def test_my_stft(self, ):
        with tictoc('test_my_stft'):
            s = my_stft(sig=self.sig,
                        fs=self.fs,
                        nfft=self.nfft,
                        window=self.window,
                        noverlap=self.noverlap)

            self.assertTrue(np.allclose(s, self.s))
Пример #7
0
    def test_librosa_stft(self, ):
        with tictoc('test_librosa_stft'):
            hoplength = self.window_size - self.noverlap
            s = librosa.stft(y=self.sig,
                             n_fft=self.nfft,
                             win_length=self.window_size,
                             hop_length=hoplength,
                             window=self.window,
                             center=False)

            self.assertTrue(np.allclose(s, self.s))
Пример #8
0
    def _test_store(self):
        with tictoc('Test storing'):
            bs.store(self.ids, self.arrs, self.loc)

        index_arr = []
        value_arr = []

        index_files = [
            x for x in os.listdir(self.loc) if x.startswith(bs.INDEX_PREFIX)
        ]
        batches = {}
        for index_file in index_files:
            batch_begin, batch_end = list(
                map(int, index_file[len(bs.INDEX_PREFIX):].split('-')))
            batches[batch_begin] = (batch_begin, batch_end, index_file)

        batch_begins = sorted(list(batches.keys()))
        for batch_begin in batch_begins:
            batch_begin, batch_end, index_file = batches[batch_begin]

            batch_part = index_file[len(bs.INDEX_PREFIX):]
            index_file_path = os.path.join(self.loc, index_file)
            value_file_path = os.path.join(self.loc,
                                           bs.VALUE_PREFIX + batch_part)

            index_arr_ = np.fromfile(index_file_path, dtype=np.int32).reshape(
                (-1, bs.INDEX_FILE_NCOLS))
            assert len(index_arr_) <= bs.BATCH_SIZE
            value_arr_ = np.fromfile(value_file_path, dtype=np.float32)

            index_arr.append(index_arr_)
            value_arr.append(value_arr_)

        index_arr = np.concatenate(index_arr).reshape(
            (-1, bs.INDEX_FILE_NCOLS))
        value_arr = np.concatenate(value_arr)

        nids = len(index_arr)
        self.assertEqual(nids, len(self.ids))
        self.assertTrue(np.allclose(self.sorted_ids, index_arr[:, 0]))
        arrs_ravel = np.concatenate([x.ravel() for x in self.sorted_arrs])
        self.assertTrue(np.allclose(value_arr, arrs_ravel))

        for id, arr, stored_index in zip(self.sorted_ids, self.sorted_arrs,
                                         index_arr):
            stored_id, _, _, stored_dim0, stored_dim1 = stored_index

            arr_size = np.size(arr)
            self.assertEqual(id, stored_id)
            self.assertEqual(stored_dim0, arr.shape[0] if arr.ndim >= 1 else 0)
            self.assertEqual(stored_dim1, arr.shape[1] if arr.ndim == 2 else 0)
            self.assertEqual(
                max(1, stored_dim0) * max(stored_dim1, 1), arr_size)
Пример #9
0
def extract_rawdata(ids, features):
    storage_loc_template = get_storage_loc_template()
    data_by_id = {id: [] for id in ids}

    for feature in features:
        storage_loc = storage_loc_template.format(feature.name)
        with tictoc('{}'.format(feature.name)):
            feature_values = bs.retrieve(ids, storage_loc)
            for id, feature_value in zip(ids, feature_values):
                data_by_id[id].append(feature_value)

    data = []
    for id in ids:
        feature_values = data_by_id[id]
        data.append(feature_values)

    return data
Пример #10
0
def extract_rawdata(f2bs, ids, features):
    # ids = np.array([19078])
    data_by_id = {id: [] for id in ids}

    for feature in features:
        index_filename, value_filename = f2bs[feature]
        with tictoc('{}'.format(feature.name)):
            feature_values = binstorage.retrieve(ids, index_filename, value_filename)
            for id, feature_value in zip(ids, feature_values):
                data_by_id[id].append(feature_value)

    data = []
    for id in ids:
        feature_values = data_by_id[id]
        data.append(feature_values)

    return data
Пример #11
0
    def test_scipy_stft(self):
        with tictoc('test_scipy_stft'):
            f, t, s = signal.stft(self.sig,
                                  fs=self.fs,
                                  window=self.window,
                                  nperseg=self.window_size,
                                  padded=False,
                                  noverlap=self.noverlap,
                                  nfft=self.nfft,
                                  return_onesided=True,
                                  boundary=None)

            # Scipy's STFT is unscaled - where as Matlab's and librosa's are.
            s *= self.window.sum()

            self.assertTrue(np.allclose(f, self.f))
            self.assertTrue(np.allclose(t, self.t))
            self.assertTrue(np.allclose(s, self.s))
Пример #12
0
    def _test_retrieve(self, nselected):
        selected_ids = copy.deepcopy(self.ids)
        np.random.shuffle(selected_ids)
        selected_ids = selected_ids[:nselected]

        selected_ids_inds = [
            np.where(self.ids == x)[0][0] for x in selected_ids
        ]
        selected_arrs = [self.arrs[i] for i in selected_ids_inds]

        with tictoc('Test retrieving {} items'.format(nselected)):
            retrieved_arrs = bs.retrieve(selected_ids, self.index_filename,
                                         self.value_filename)

        self.assertEqual(len(selected_ids), len(retrieved_arrs))
        for i in range(len(selected_ids)):
            selected_arr = selected_arrs[i]
            retrieved_arr = retrieved_arrs[i]

            self.assertTrue(np.allclose(selected_arr, retrieved_arr))
Пример #13
0
    def _test_store(self):
        with tictoc('Test storing'):
            bs.store(self.ids, self.arrs, self.index_filename,
                     self.value_filename)
        index_filesize = os.path.getsize(self.index_filename)
        index_memory_usage = len(self.ids) * bs.INDEX_FILE_NCOLS * 4

        value_filesize = os.path.getsize(self.value_filename)
        value_memory_usage = sum([np.size(x) for x in self.arrs]) * 4

        self.assertEqual(index_filesize, index_memory_usage)
        self.assertEqual(value_filesize, value_memory_usage)

        with open(self.index_filename, 'rb') as f:
            index_arr = np.fromfile(f, dtype=np.int32)
            nids = len(index_arr) // bs.INDEX_FILE_NCOLS

            self.assertEqual(nids, len(self.ids))

            index_arr = index_arr.reshape((nids, bs.INDEX_FILE_NCOLS))
            for i in range(nids):
                id = self.ids[i]
                arr = self.arrs[i]

                arr_size = np.size(arr)
                id_, beg, end, dim0, dim1 = index_arr[i]

                self.assertEqual(id, id_)
                self.assertEqual(end - beg, arr_size)
                self.assertEqual(dim0, arr.shape[0] if arr.ndim >= 1 else 0)
                self.assertEqual(dim1, arr.shape[1] if arr.ndim == 2 else 0)
                self.assertEqual(max(1, dim0) * max(dim1, 1), arr_size)

        with open(self.value_filename, 'rb') as f:
            value_arr = np.fromfile(f, dtype=np.float32)
            self.assertEqual(len(value_arr),
                             sum([np.size(arr) for arr in self.arrs]))

            arrs_ravel = np.concatenate([x.ravel() for x in self.arrs])
            self.assertTrue(np.allclose(value_arr, arrs_ravel))
Пример #14
0
    def _test_retrieve(self, nselected, shuffle=True):
        selected_ids = copy.deepcopy(self.ids)
        if shuffle:
            np.random.shuffle(selected_ids)
        selected_ids = selected_ids[:nselected]

        selected_ids_inds = [
            np.where(self.ids == x)[0][0] for x in selected_ids
        ]
        selected_arrs = [self.arrs[i] for i in selected_ids_inds]

        with tictoc('Test retrieving {} items shuffle={}'.format(
                nselected, shuffle)):
            retrieved_arrs = bs.retrieve(selected_ids, self.loc)

        self.assertEqual(len(selected_ids), len(retrieved_arrs))
        for i in range(len(selected_ids)):
            selected_arr = selected_arrs[i]
            retrieved_arr = retrieved_arrs[i]

            try:
                self.assertTrue(np.allclose(selected_arr, retrieved_arr))
            except TypeError:
                pass
Пример #15
0
    def _testxcorr2(self, template, image, result):
        with tictoc('Template size = {}, image size = {}'.format(
                template.shape, image.shape)):
            result_ = normxcorr2(template, image)

        self.assertTrue(np.allclose(result_, result))