def analyse(self, workable): hsh = create_hash(workable, self.identity) cache = self.process.cache / f"{hsh}.npy" if cache.exists(): mfcc = np.load(cache, allow_pickle=True) else: mfcc = get_buffer( fluid.mfcc( str(workable), fftsettings=self.fftsettings, numbands=self.numbands, numcoeffs=self.numcoeffs, minfreq=self.minfreq, maxfreq=self.maxfreq, ), "numpy", ) np.save(cache, mfcc) self.buffer[str(workable)] = mfcc.tolist()
def analyse(self, workable): slices = self.input[workable] if len(slices) == 1: self.buffer[workable] = slices return count = 0 standardise = StandardScaler() model = AgglomerativeClustering(n_clusters=self.numclusters) while (count + self.windowsize) <= len(slices): indices = slices[ count:count + self.windowsize] # create a section of the indices in question data = [] for _, (start, end) in enumerate(zip(indices, indices[1:])): mfccs = fluid.mfcc( workable, fftsettings=self.fftsettings, startframe=int(start), numframes=int(end - start), ) stats = get_buffer( fluid.stats(mfccs, numderivs=self.numderivs), "numpy") data.append(stats.flatten()) data = standardise.fit_transform(data) cluster = model.fit(data) cur = -2 for j, c in enumerate(cluster.labels_): prev = cur cur = c if cur == prev: try: slices.pop(j + count) except IndexError: pass # FIXME why are some indices erroring? count += 1 self.buffer[workable] = slices
print(f"At window size: {window}") for nclusters in range(2, window): print(f"At cluster size: {nclusters}") model = AgglomerativeClustering(n_clusters=nclusters) count = 0 slices = list(original_slices) # recopy the original so we start fresh while (count + window) <= len(slices): indices = slices[ count:count + window] #create a section of the indices in question data = [] for i, (start, end) in enumerate(zip(indices, indices[1:])): mfcc = fluid.mfcc(source, fftsettings=[2048, -1, -1], startframe=start, numframes=end - start) stats = get_buffer(fluid.stats(mfcc, numderivs=1), "numpy") data.append(stats.flatten()) data = standardise.fit_transform(data) # might not be necessary to reduce as the dimensions are already quite low # redux = UMAP(n_components=COMPONENTS, n_neighbors=NEIGHBOURS, min_dist=MINDIST, random_state=42).fit_transform(data) cluster = model.fit(data) cur = -2 for j, c in enumerate(cluster.labels_):
NEIGHBOURS = 4 # UMAP neighbours MINDIST = 0.05 # UMAP minimum distance PLOT = True #HDBSCAN HDBCLUSTSIZE = 3 HDBSAMPS = 1 media = Path("../reaper/source/media/") source = media / "twovoice.wav"# "06-xbox controller-200518_1319.wav"# "02-200420_0928.wav" data, labels = [], [] if not resynth.exists(): # let's not redo long NMF's each time nmf = fluid.nmf(source, resynth=resynth, iterations=50, components=) if not features.exists(): mfcc = fluid.mfcc(resynth, features=features, minfreq=500, maxfreq=15000, numcoeffs=13) stats = get_buffer(fluid.stats(features, numderivs=1)) # flatten statistics because its channels...channels...channels flatstats = [] for i in range(NMFCOMPONENTS): offset = i * NUMCOEFFS temp = [] for j in range(NUMCOEFFS): for x in stats[j+offset]: temp.append(x) flatstats.append(temp) # standardise data print('Standardising Data') standardise = StandardScaler()