def test_load_labels_from_txt(self): filename_data, filename_labels = get_labeled_txt() df = AudioDataFile().load(filename_data, formatter=AIFFormatter()) df.load_labels(filename_labels, labels_formatter=TXTLabelsFormatter(), label="whale") assert [0, 1] in df.get_labeled_data().labels.unique() assert "whale" in df.name_label
def test_concatenate_data_files(self): fns = get_5_file_names() dfs = [] for d in fns: dfs.append(AudioDataFile().load(d, formatter=AIFFormatter())) big_df = AudioDataFile().concatenate(dfs) assert big_df.duration.seconds >= sum( [d.duration.seconds for d in dfs])
def test_copy_dataframe(self): filename_data, filename_labels = get_labeled_txt() df = AudioDataFile().load(filename_data, formatter=AIFFormatter()) df.load_labels(filename_labels, labels_formatter=TXTLabelsFormatter(), label="whale") new_df = AudioDataFile(df) np.testing.assert_equal(new_df.metadata["labels"], df.metadata["labels"])
def test_add_remove_datafile(self): filename = get_file_name() df = AudioDataFile() df.load(filename, formatter=AIFFormatter()) ds = OneDataFileOut() ds.add_data_file(df) assert len(ds.datafiles) == 1 ds.remove_data_file(df) assert len(ds.datafiles) == 0
def test_add_windows(self): filename_data, filename_labels = get_labeled_txt() df = AudioDataFile().load(filename_data, formatter=AIFFormatter()) df.load_labels(filename_labels, labels_formatter=TXTLabelsFormatter(), label="whale") st1 = df.data.index[0] en1 = df.data.index[5] st2 = df.metadata["labels"][0][0] en2 = st2 + 4 # No windows added yet assert df.parameters["number_of_windows"] == 0 d = df.get_window(0) assert all(d == df.data) # Add windows # First an incorrect one with pytest.raises(AttributeError): df.add_window(st1 + 100, en1 - 1) df.add_window(st1, en1) # Length 6 df.add_window(st2, en2) # Length 4 assert df.parameters["number_of_windows"] == 2 assert str(df) == "AudioDataFile (2 windows)"
def test_sliding_windows(): filename = get_file_name() p = SlidingWindows() p.parameters["sliding_window_width"] = "13s" p.parameters["overlap"] = 0.12 df = AudioDataFile() df.load(filename, formatter=AIFFormatter()) p.parameters["data"] = df new_df = p.transform() new_df.get_windows_data_frame() assert p.description != ""
def test_load(self): filename = get_file_name() df = AudioDataFile() # No attribute data assert str(df) == "AudioDataFile" with pytest.raises(RuntimeError): df.duration with pytest.raises(RuntimeError): df.start_time with pytest.raises(RuntimeError): df.end_time df.load(filename, formatter=AIFFormatter()) assert df.duration.seconds > 0 assert str(df) == "AudioDataFile (0 days 00:14:59.999500)"
def get_validation(self): if not self.parameters["validation"]: return None for i in list(range(len(self.datafiles) - 1)) + [-1]: curr_datafiles = self.datafiles.copy() validation = curr_datafiles.pop(i + 1) yield AudioDataFile().__class__().concatenate([validation])
def get_testing(self): if not self.parameters["testing"]: return None for i in range(len(self.datafiles)): curr_datafiles = self.datafiles.copy() testing = curr_datafiles.pop(i) yield AudioDataFile().__class__().concatenate([testing])
def test_scale(): file_name = get_file_name() df = AudioDataFile().load(file_name, formatter=AIFFormatter()) p = Scale() p.parameters["data"] = df result = p.transform() assert p.description != "" assert type(result) is df.__class__
def test_save(self): filename = get_file_name() df = AudioDataFile() df.load(filename, formatter=AIFFormatter()) df.save("tmp.h5", formatter=HDF5Formatter()) df2 = AudioDataFile().load("tmp.h5", formatter=HDF5Formatter()) assert df2.duration.seconds > 0
def test_identity(): file_name = get_file_name() df = AudioDataFile().load(file_name, formatter=AIFFormatter()) f = Identity() f.parameters["data"] = df t = f.transform() np.testing.assert_allclose(df.data.values.ravel(), t.data.values.ravel()) assert f.description != "" assert t.data.values.ndim == 2
def get_training(self): if not self.parameters["training"]: return None for i in list(range(len(self.datafiles) - 1)) + [-1]: curr_datafiles = self.datafiles.copy() if self.parameters["validation"]: curr_datafiles.pop(i + 1) # i + 1 is validation if self.parameters["testing"]: curr_datafiles.pop(i) # i is testing yield AudioDataFile().concatenate(curr_datafiles)
def test_skewness(): file_name = get_file_name() df = AudioDataFile().load(file_name, formatter=AIFFormatter()) df.data -= df.data.mean() f = Skewness() f.parameters["data"] = df t = f.transform() assert t.data.values.shape[1] == 1 assert f.description != "" assert t.data.values.ndim == 2
def test_spectral_frames(): file_name = get_file_name() df = AudioDataFile().load(file_name, formatter=AIFFormatter()) df.data -= df.data.mean() f = SpectralFrames() f.parameters["sampling_rate"] = df.sampling_rate f.parameters["data"] = df t = f.transform() assert t.data.values.shape[0] == 1 assert f.description != "" assert t.data.values.ndim == 2
def test_mfcc(): file_name = get_file_name() df = AudioDataFile().load(file_name, formatter=AIFFormatter()) df.data -= df.data.mean() f = MFCC() f.parameters["sampling_rate"] = df.sampling_rate f.parameters["n_components"] = 25 f.parameters["data"] = df t = f.transform() assert t.data.values.shape[1] == 25 assert f.description != "" assert t.data.values.ndim == 2
def test_get_training_testing_validation_set(self): ds = OneDataFileOut() file_names = get_5_file_names() for filename in file_names: ds.add_data_file(AudioDataFile().load(filename, formatter=AIFFormatter())) assert len(ds.datafiles) == 5 training = ds.get_training() testing = ds.get_testing() validation = ds.get_validation() expected_iterations = ds.iterations i = 0 for tr, te, val in zip(training, testing, validation): i += 1 assert expected_iterations == i
def test_get_training_testing_set(self): ds = OneDataFileOut() ds.parameters["validation"] = False # Disable validation set generation file_names = get_5_file_names() for filename in file_names: ds.add_data_file(AudioDataFile().load(filename, formatter=AIFFormatter())) assert len(ds.datafiles) == 5 training = ds.get_training() testing = ds.get_testing() expected_iterations = ds.iterations i = 0 for tr, te in zip(training, testing): i += 1 assert expected_iterations == i
def test_segments_repr(self): filename_data, filename_labels = get_labeled_txt() df = AudioDataFile().load(filename_data, formatter=AIFFormatter()) df.load_labels(filename_labels, labels_formatter=TXTLabelsFormatter(), label="whale") # Make 5 windows st = df.start_time step = df.duration / 5 out = AudioSegments() assert str(out) == "AudioSegments" for i in range(5): df.add_window(st + i * step, st + (i + 1) * step) out.add_segment(*df.get_window(i)) assert str(out) == "AudioSegments (5 segments)"
def build_data_file(self, params: dict): """""" available_data_files = getters.get_available_data_files() available_formatters = getters.get_available_formatters() # Load every small input data file and concatenate all into the big data file dfs = [] for elem in params["input_data"]: self.logger.info(f"Loading and appending file {elem['file_name']}") file_name = elem["file_name"] data_file_name = elem["data_file"] formatter_name = elem["formatter"] df = available_data_files[data_file_name]() fmt = available_formatters[formatter_name]() df.load(file_name=file_name, formatter=fmt) dfs.append(df) big_df = AudioDataFile().concatenate(dfs) return {"input_data": big_df}
def load_feature_datafile(self): filename_data, filename_labels = get_labeled_txt() df = AudioDataFile().load(filename_data, formatter=AIFFormatter()) df.load_labels(filename_labels, labels_formatter=TXTLabelsFormatter(), label="whale") # Make 5 windows st = df.start_time step = df.duration / 5 out = AudioSegments() for i in range(5): df.add_window(st + i * step, st + (i + 1) * step) out.add_segment(*df.get_window(i)) df = out # Get features f1 = Energy() f1.parameters["data"] = df f2 = Kurtosis() f2.parameters["data"] = df f1 = f1.transform() f2 = f2.transform() return FeatureDataFile().concatenate([f1, f2])
def test_sampling_rate(self): filename = get_file_name() df = AudioDataFile() assert df.sampling_rate is None df.load(filename, formatter=AIFFormatter()) assert df.sampling_rate > 0
def test_get_window(self): filename_data, filename_labels = get_labeled_txt() df = AudioDataFile().load(filename_data, formatter=AIFFormatter()) df.load_labels(filename_labels, labels_formatter=TXTLabelsFormatter(), label="whale") st1 = df.data.index[0] en1 = df.data.index[5] st2 = df.metadata["labels"][0][0] en2 = st2 + 4 df.add_window(st1, en1) # Length 6 df.add_window(st2, en2) # Length 4 wdf = df.get_windows_data_frame() assert len(wdf) == 2 w1, l1 = df.get_window(0) w2, l2 = df.get_window(1) assert len(w1) == 6 assert len(w2) == 4 # Out of range index assert df.get_window(100) is None # Other label treatments df.parameters["labels_treatment"] = "mode" df.get_window(0) df.parameters["labels_treatment"] = "mean" _, l1mn = df.get_window(0) assert type(l1mn) is float with pytest.raises(ValueError): df.parameters["labels_treatment"] = "fourier" # There is actually no labels treatment called fourier df.get_window(0) # Raises value error