def analyse(self, workable): nmf = fluid.nmf( workable, iterations=self.iterations, components=self.components, fftsettings=self.fftsettings, ) bases = get_buffer(nmf.bases, "numpy") bases_smoothed = np.zeros_like(bases) for i, x in enumerate(bases): bases_smoothed[i] = savgol_filter(x, self.smoothing, self.polynomial) clusterer = hdbscan.HDBSCAN( min_cluster_size=self.min_cluster_size, min_samples=self.min_samples, cluster_selection_method=self.cluster_selection_method, ) cluster_labels = clusterer.fit_predict(bases_smoothed) unique_clusters = list(dict.fromkeys(cluster_labels)) sound = get_buffer(nmf.resynth, "numpy") for x in unique_clusters: summed = np.zeros_like( sound[0]) # make an empty numpy array of same size base = Path(workable).name output = self.output / f"{base}_{x}.wav" for idx, cluster in enumerate(cluster_labels): if cluster == x: summed += sound[idx] wavfile.write(output, 44100, summed)
def analyse(self, workable): noveltyslice = fluid.noveltyslice( workable, feature=self.feature, fftsettings=self.fftsettings, filtersize=self.filtersize, minslicelength=self.minslicelength, threshold=self.threshold, ) self.buffer[workable] = [int(x) for x in get_buffer(noveltyslice)]
def analyse(self, workable): noveltyslice = fluid.noveltyslice( workable, feature=self.feature, fftsettings=self.fftsettings, filtersize=self.filtersize, kernelsize=self.kernelsize, minslicelength=self.minslicelength, threshold=self.threshold, ) self.buffer[str(workable)] = get_buffer(noveltyslice)
def analyse(self, workable): hsh = create_hash(workable, self.identity) cache = self.process.cache / f"{hsh}.wav" if not cache.exists(): slice_output = get_buffer( fluid.onsetslice( workable, indices=cache, fftsettings=self.fftsettings, filtersize=self.filtersize, framedelta=self.framedelta, metric=self.metric, minslicelength=self.minslicelength, threshold=self.threshold, ), "numpy", ) else: slice_output = get_buffer(cache) self.buffer[str(workable)] = slice_output.tolist()
def analyse(self, workable): hsh = create_hash(workable, self.identity) cache = self.process.cache / f"{hsh}.npy" if cache.exists(): mfcc = np.load(cache, allow_pickle=True) else: mfcc = get_buffer( fluid.mfcc( str(workable), fftsettings=self.fftsettings, numbands=self.numbands, numcoeffs=self.numcoeffs, minfreq=self.minfreq, maxfreq=self.maxfreq, ), "numpy", ) np.save(cache, mfcc) self.buffer[str(workable)] = mfcc.tolist()
def analyse(self, workable): slices = self.input[workable] if len(slices) == 1: self.buffer[workable] = slices return count = 0 standardise = StandardScaler() model = AgglomerativeClustering(n_clusters=self.numclusters) while (count + self.windowsize) <= len(slices): indices = slices[ count:count + self.windowsize] # create a section of the indices in question data = [] for _, (start, end) in enumerate(zip(indices, indices[1:])): mfccs = fluid.mfcc( workable, fftsettings=self.fftsettings, startframe=int(start), numframes=int(end - start), ) stats = get_buffer( fluid.stats(mfccs, numderivs=self.numderivs), "numpy") data.append(stats.flatten()) data = standardise.fit_transform(data) cluster = model.fit(data) cur = -2 for j, c in enumerate(cluster.labels_): prev = cur cur = c if cur == prev: try: slices.pop(j + count) except IndexError: pass # FIXME why are some indices erroring? count += 1 self.buffer[workable] = slices
def analyse(self, workable): hsh = create_hash(workable, self.identity) cache = self.process.cache / f"{hsh}.npy" if cache.exists(): loudness = np.load(cache, allow_pickle=True) else: loudness = get_buffer( fluid.loudness(workable["file"], windowsize=self.windowsize, hopsize=self.hopsize, kweighting=self.kweighting, truepeak=self.truepeak, numframes=workable["numframes"], startframe=workable["startframe"]), "numpy", ) np.save(cache, loudness) workable["feature"] = loudness.tolist() self.buffer[workable["id"]] = workable
def make_image(audio_file): data = get_buffer(audio_file, 'numpy') p = image_dir / audio_file.name length = data.shape[0] if length < w * h: diff = (w * h) - length data = np.pad(data, (diff - 1, 1), 'wrap') container = np.zeros(shape=(w, h, 3)) for i, point in enumerate(data): x = i % w - 1 y = round((i + 1) / h) try: scale = ((point + 1) * 0.5) * 256 container[x][y] = [scale, scale, scale] except: pass img = Image.fromarray(container.astype('uint8'), 'RGB') img.save(p.with_suffix('.png'))
def analyse(self, workable): hsh = create_hash(workable, self.identity) cache = self.process.cache / f"{hsh}.npy" if cache.exists(): pitch = np.load(cache, allow_pickle=True) else: pitch = get_buffer( fluid.pitch(workable["file"], algorithm=self.algorithm, minfreq=self.minfreq, maxfreq=self.maxfreq, unit=self.unit, fftsettings=self.fftsettings, numframes=workable["numframes"], startframe=workable["startframe"]), "numpy", ) np.save(cache, pitch) workable["features"] = pitch.tolist() self.buffer[workable["id"]] = workable
def analyse_items(self): # TODO import this function from a global place that can be used in CORPUS too median_loudness = {} for x in self.input: hsh = create_hash(x, self.min_loudness, self.max_loudness) cache = self.process.cache / f"{hsh}.npy" if not cache.exists(): med_loudness = get_buffer( stats(loudness(x, hopsize=4410, windowsize=17640)), "numpy") np.save(cache, med_loudness) else: med_loudness = np.load(cache, allow_pickle=True) median_loudness[str(x)] = med_loudness[0][5] vals = np.array([x for x in median_loudness.values()]) min_perc = np.percentile(vals, self.min_loudness) max_perc = np.percentile(vals, self.max_loudness) self.output = [ k for k, v in median_loudness.items() if v <= max_perc and v >= min_perc ]
def loudness(self, min_loudness: int = 0, max_loudness: int = 100): hopsize = 4410 windowsize = 17640 with Progress() as progress: task = progress.add_task("[cyan]Corpus Filtering: Loudness", total=len(self.items)) median_loudness = {} for x in self.items: hsh = create_hash(x, hopsize, windowsize) # Make sure a sane temporary path exists tmp = Path("/tmp") / "ftis_cache" tmp.mkdir(exist_ok=True) cache = tmp / f"{hsh}.npy" if not cache.exists(): med_loudness = get_buffer( stats( loudness(x, hopsize=hopsize, windowsize=windowsize)), "numpy") np.save(cache, med_loudness) else: med_loudness = np.load(cache, allow_pickle=True) median_loudness[str(x)] = med_loudness[0][5] progress.update(task, advance=1) # Get percentiles and filter vals = np.array([x for x in median_loudness.values()]) min_perc = np.percentile(vals, min_loudness) max_perc = np.percentile(vals, max_loudness) self.items = [ k for k, v in median_loudness.items() if v <= max_perc and v >= min_perc ] return self
from sklearn.preprocessing import StandardScaler from sklearn.cluster import AgglomerativeClustering from datetime import datetime THRESHOLD = 0.47 WINDOWSIZE = 25 HOPSIZE = 1 media = Path("../reaper/source/media/") source = media / "02-200420_0928.wav" source = source.resolve() output = Path("slices").resolve() print('Slicing') slices = get_buffer( fluid.noveltyslice(source, threshold=THRESHOLD, fftsettings=[2048, -1, -1])) slices = [int(x) for x in slices] # clustering standardise = StandardScaler() original_slices = list(slices) # make a templated copy tracks = {} pos = 0 for i, (start, end) in enumerate(zip(original_slices, original_slices[1:])): start = (start / 44100) end = (end / 44100) item = {
return [y - x for x, y in zip(arr, arr[1:])] # files = [x for x in Path("outputs/micro_segmentation/2_ExplodeAudio").iterdir()] # clusters = read_json("outputs/micro_clustering/5_HDBSCluster.json") # files = [x for x in clusters["37"]] files = [x for x in Path("outputs/concat").iterdir() if x.suffix == ".wav"] print(files) d = {} for i, f in enumerate(files): print(f) print(i / len(files)) ts = get_buffer(fluid.transientslice(f)) if ts[0] != 0: ts.insert(0, 0) if len(ts) <= 2 and ts[0] == 0.0: d[str(f)] = -1 else: # Let's grab the orderedness of the onsets norm = normalise(ts) average = mean(norm) robustified = [x / average for x in norm] first_deriv = deriv(robustified) d[str(f)] = stdev(first_deriv) mi = 99999
MINDIST = 0.05 # UMAP minimum distance PLOT = True #HDBSCAN HDBCLUSTSIZE = 3 HDBSAMPS = 1 media = Path("../reaper/source/media/") source = media / "twovoice.wav"# "06-xbox controller-200518_1319.wav"# "02-200420_0928.wav" data, labels = [], [] if not resynth.exists(): # let's not redo long NMF's each time nmf = fluid.nmf(source, resynth=resynth, iterations=50, components=) if not features.exists(): mfcc = fluid.mfcc(resynth, features=features, minfreq=500, maxfreq=15000, numcoeffs=13) stats = get_buffer(fluid.stats(features, numderivs=1)) # flatten statistics because its channels...channels...channels flatstats = [] for i in range(NMFCOMPONENTS): offset = i * NUMCOEFFS temp = [] for j in range(NUMCOEFFS): for x in stats[j+offset]: temp.append(x) flatstats.append(temp) # standardise data print('Standardising Data') standardise = StandardScaler() data = np.array(flatstats)
# media = Path("../reaper/source/media/") media = Path("../reaper/highgain_source/bounces/") source = media / "highgain_source-002.wav" #"twovoice.wav"# "06-xbox controller-200518_1319.wav"# "02-200420_0928.wav" bases = Path("bases.wav") resynth = Path("resynth.wav") data = [] if not bases.exists() or not resynth.exists(): nmf = fluid.nmf(source, resynth=resynth, bases=bases, iterations=100, components=NMFCOMPONENTS) bases = get_buffer(bases, "numpy") bases_smoothed = np.zeros_like(bases) # lets smooth the bases a bit for i, x in enumerate(bases): bases_smoothed[i] = savgol_filter(x, 11, 2) # clustering print(f'Clustering data') clusterer = hdbscan.HDBSCAN(min_cluster_size=HDBCLUSTSIZE, min_samples=HDBSAMPS) cluster_labels = clusterer.fit_predict(bases) unique_clusters = list(dict.fromkeys(cluster_labels)) sound = get_buffer(resynth, "numpy")
HDBCLUSTSIZE = 2 HDBSAMPS = 1 media = Path("../reaper/source/media/") source = media / "twovoice.wav" # "06-xbox controller-200518_1319.wav"# "02-200420_0928.wav" output = Path("slices").resolve() activation_pickle = Path("activations.wav") data, labels = [], [] nmf = fluid.nmf(source, activations=activation_pickle.resolve(), iterations=50, components=NMFCOMPONENTS) activations = get_buffer(nmf.activations, "numpy") stats = get_buffer(fluid.stats(nmf.activations, numderivs=1), "numpy") # clustering print(f'Clustering data') clusterer = hdbscan.HDBSCAN(min_cluster_size=HDBCLUSTSIZE, min_samples=HDBSAMPS) cluster_labels = clusterer.fit_predict(stats) unique_clusters = list(dict.fromkeys(cluster_labels)) sound = get_buffer(nmf.resynth, "numpy") # Format everything into a lovely little reaper project # initiate jinja2 business env = jinja2.Environment(loader=jinja2.FileSystemLoader(['../RPRTemplates']))