Пример #1
0
    def load(self, saveDataAsH5=True):
        h5Path = os.path.join(self.inPath, "data.h5")
        print(h5Path)
        # h5Path = '/Users/Yana/Documents/myself_doc/008_Python_Project/acapellabot/train_data/data.h5'
        if os.path.isfile(h5Path):
            h5f = h5py.File(h5Path, "r")
            self.x = h5f["x"][:]
            self.y = h5f["y"][:]
        else:
            # Hash bins for each camelot key so we can merge
            # in the future, this should be a generator w/ yields in order to eat less memory

            count = 0

            for dirPath, dirNames, fileNames in os.walk(os.path.join(self.inPath, 'noisy')):
                for fileName in filter(lambda f: (f.endswith(".mp3") or f.endswith(".wav")) and not f.startswith("."),
                                       fileNames):
                    audio, sampleRate = conversion.loadAudioFile(os.path.join(self.inPath, 'noisy', fileName))
                    noisy_spectrogram, _ = conversion.audioFileToSpectrogram(audio, self.fftWindowSize)

                    audio, sampleRate = conversion.loadAudioFile(os.path.join(self.inPath, 'clean', fileName))
                    clean_spectrogram, _ = conversion.audioFileToSpectrogram(audio, self.fftWindowSize)

                    if noisy_spectrogram.shape[1] < clean_spectrogram.shape[1]:
                        newInstrumental = np.zeros(clean_spectrogram.shape)
                        newInstrumental[:noisy_spectrogram.shape[0], :noisy_spectrogram.shape[1]] = noisy_spectrogram
                        noisy_spectrogram = newInstrumental
                    elif clean_spectrogram.shape[1] < noisy_spectrogram.shape[1]:
                        newAcapella = np.zeros(noisy_spectrogram.shape)
                        newAcapella[:clean_spectrogram.shape[0], :clean_spectrogram.shape[1]] = clean_spectrogram
                        clean_spectrogram = newAcapella
                    # simulate a limiter/low mixing (loses info, but that's the point)
                    # I've tested this against making the same mashups in Logic and it's pretty close
                    # mashup = np.maximum(clean_spectrogram, noisy_spectrogram)
                    # AttributeError: 'list' object has no attribute 'shape'chop into slices so everything's the same size in a batch
                    dim = SLICE_SIZE
                    mashupSlices = chop(noisy_spectrogram, dim)
                    acapellaSlices = chop(clean_spectrogram, dim)
                    count += 1
                    if acapellaSlices.__len__()>0:
                        self.x.extend(mashupSlices)
                        self.y.extend(acapellaSlices)
                        console.info(count, "Created spectrogram for", fileName,  "with length", acapellaSlices.__len__())

            # Add a "channels" channel to please the network
            self.x = np.array(self.x)[:, :, :, np.newaxis]
            self.y = np.array(self.y)[:, :, :, np.newaxis]

            console.info('Train data shape: x: ', self.x.shape, '   y: ', self.y.shape)
            # Save to file if asked
            if saveDataAsH5:
                h5f = h5py.File(h5Path, "w")
                h5f.create_dataset("x", data=self.x)
                h5f.create_dataset("y", data=self.y)
                h5f.close()
Пример #2
0
    def isolateVocals(self, path, fftWindowSize, phaseIterations=10):
        console.log("Attempting to isolate vocals from", path)
        audio, sampleRate = conversion.loadAudioFile(path)
        spectrogram, phase = conversion.audioFileToSpectrogram(audio, fftWindowSize=fftWindowSize)
        console.log("Retrieved spectrogram; processing...")

        expandedSpectrogram = conversion.expandToGrid(spectrogram, self.peakDownscaleFactor)
        expandedSpectrogramWithBatchAndChannels = expandedSpectrogram[np.newaxis, :, :, np.newaxis]

        print(expandedSpectrogramWithBatchAndChannels.shape)
        # 预测
        predictedSpectrogramWithBatchAndChannels = self.model.predict(expandedSpectrogramWithBatchAndChannels)
        predictedSpectrogram = predictedSpectrogramWithBatchAndChannels[0, :, :, 0]  # o /// o
        newSpectrogram = predictedSpectrogram[:spectrogram.shape[0], :spectrogram.shape[1]]
        console.log("Processed spectrogram; reconverting to audio")

        newAudio = conversion.spectrogramToAudioFile(newSpectrogram, sampleRate, fftWindowSize=fftWindowSize, phaseIterations=phaseIterations)
        pathParts = os.path.split(path)
        fileNameParts = os.path.splitext(pathParts[1])
        outputFileNameBase = os.path.join(pathParts[0], fileNameParts[0] + "_unet")
        console.log("Converted to audio; writing to", outputFileNameBase)

        conversion.saveAudioFile(newAudio, outputFileNameBase + ".wav", sampleRate)
        conversion.saveSpectrogram(newSpectrogram, outputFileNameBase + ".png")
        conversion.saveSpectrogram(spectrogram, os.path.join(pathParts[0], fileNameParts[0]) + ".png")
        console.log("Vocal isolation complete")