def save_separated_audiofiles(self):
        # check and see if directory exists
        if self.directory != "" and ("/" in self.directory):
            directoryLevels = self.directory.split("/")
            for ixLevel, directoryLevel in enumerate(directoryLevels):
                if ixLevel == 0:
                    LevelPath = directoryLevel
                else:
                    LevelPath += "/" + directoryLevel
                if not os.path.isdir(LevelPath):
                    os.mkdir(LevelPath)

        # Create audio writer object
        if self.fsIsSpecified == "False":  # Notify user if sampling rate not specified
            print(
                "Sample Rate not specified for writing the audio files. Assumed Fs (Hz) is "
                + str(self.fs))

        if self.formatIsSpecified == "False":  # Notify user if format not specified
            print(
                "File format not specified for writing the audio files. Assumed format is "
                + str(self.format))

        MonoWriter = es.MonoWriter(sampleRate=self.fs, format=self.format)
        MonoWriter.configure(filename=self.directory + self.filename +
                             "_percussive." + self.format)
        MonoWriter(array(self.x_p))

        MonoWriter = es.MonoWriter(sampleRate=self.fs, format=self.format)
        MonoWriter.configure(filename=self.directory + self.filename +
                             "_harmonic." + self.format)
        MonoWriter(array(self.x_h))
    def bassline_harmonic_anal(self):
        params = dict()
        bassline_audio, params["sampleRate"] = self.harmonic_canvas.get_audio()

        params["frameSize"] = int(
            self.harmonic_canvas.frame_size_comboBox.currentText())
        params["fftSize"] = int(
            self.harmonic_canvas.fft_size_comboBox.currentText())
        params["hopSize"] = int(
            eval(self.harmonic_canvas.hop_size_comboBox.currentText()) *
            params["frameSize"])

        params["fftSize"] = params["hopSize"]

        params["maxFrequency"] = 20000
        params["minFrequency"] = 20

        params["maxnSines"] = 100
        params["magnitudeThreshold"] = -85
        params['minSineDur'] = .05
        params["freqDevOffset"] = 50
        params["freqDevSlope"] = .1

        _, sine_audio, res_audio = self.analysis_synthesis_spr_model_standard(
            params, bassline_audio)

        #filename = self.harmonic_canvas.
        har_sav_location = self.bassline_files_comboBox.currentText(
        )[:-4].replace("_harmonic", "_spr_sine.wav")
        res_sav_location = self.bassline_files_comboBox.currentText(
        )[:-4].replace("_harmonic", "_spr_res.wav")
        per_sav_location = self.drum_files_comboBox.currentText()[:-4].replace(
            "_percussive", "_with_spr_res.wav")

        drums_audio, sample_rate = self.percussive_canvas.get_audio()

        self.StatusBarSignal.emit("Separating Sinusoidal And Residual Parts")
        Length = min(len(sine_audio), len(res_audio), len(drums_audio))
        drums_audio_plus_residual = drums_audio[-Length:] + res_audio[
            -Length:]  # add residual to percussive part

        self.StatusBarSignal.emit(
            "Writing Separated Files and the Enhanced Percussion Audio")
        es.MonoWriter(filename=har_sav_location, format="wav")(sine_audio)
        es.MonoWriter(filename=res_sav_location, format="wav")(res_audio)
        es.MonoWriter(filename=per_sav_location,
                      format="wav")(drums_audio_plus_residual)

        self.find_separated_files()  # update drop down file lists
Пример #3
0
def energyThresholdAudio(soundfilesList):


    for sound in soundfilesList:
        RMS = esst.RMS()
        audioLoader = esst.MonoLoader(filename=sound)
        audio = audioLoader()


        start=0
        end=0
        thresh=0.05
        rms_vals=[]
        for frame in esst.FrameGenerator(audio, frameSize=2048, hopSize=1024, startFromZero=True):
            rms = RMS(frame)
            rms_vals.append(float(rms))
        rms_vals  = np.array(rms_vals)

        higher=np.where(rms_vals >= thresh)[0]
        if len(higher) > 1:
            start=higher[0]
            end=higher[-1]

        else:
            continue

        newAudio = audio[start*1024:end*1024]
        writer = esst.MonoWriter(filename=sound, format="mp3")
        writer(newAudio)
        print (sound)
Пример #4
0
    def clear_end(self):
        print('enter clear_end')
        if len(self.record) != 0:
            oname = "onlinetemp.wav"
            es.MonoWriter(filename=oname, sampleRate=16000)(self.record)

            c_pBuf = create_string_buffer('', 10000)

            # temp = c_char('')
            # pi = POINTER(c_char)(temp)
            # lib.process(oname, byref(pi))
            # data = pi.content

            lib.process(oname, c_pBuf)
            data = string_at(c_pBuf)

            if len(data) == 0:
                return

            print(data)
            s = self.parent.parent.nameDict[self.last][:-4] + ": " + data
            self.writeToOutput(s)

            self.num = self.num + 1
            self.record = []
Пример #5
0
def compute_all_features(file_struct, audio_beats=False, overwrite=False):
    """Computes all the features for a specific audio file and its respective
        human annotations. It creates an audio file with the estimated
        beats if needed."""

    # Output file
    out_file = file_struct.features_file

    if os.path.isfile(out_file) and not overwrite:
        return  # Do nothing, file already exist and we are not overwriting it

    # Compute the features for the given audio file
    audio, features = compute_features_for_audio_file(file_struct.audio_file)

    # Save output as audio file
    if audio_beats:
        logging.info("Saving Beats as an audio file")
        marker = ES.AudioOnsetsMarker(onsets=features["beats"], type='beep',
                                      sampleRate=msaf.Anal.sample_rate)
        marked_audio = marker(audio)
        ES.MonoWriter(filename='beats.wav',
                      sampleRate=msaf.Anal.sample_rate)(marked_audio)

    # Read annotations if they exist in path/references_dir/file.jams
    if os.path.isfile(file_struct.ref_file):
        jam = jams2.load(file_struct.ref_file)

        # If beat annotations exist, compute also annotated beatsyn features
        if jam.beats != []:
            logging.info("Reading beat annotations from JAMS")
            annot = jam.beats[0]
            annot_beats = []
            for data in annot.data:
                annot_beats.append(data.time.value)
            annot_beats = essentia.array(np.unique(annot_beats).tolist())
            annot_mfcc, annot_hpcp, annot_tonnetz = compute_features(
                audio, annot_beats)

    # Save output as json file
    logging.info("Saving the JSON file in %s" % out_file)
    yaml = YamlOutput(filename=out_file, format='json')
    pool = essentia.Pool()
    pool.add("beats.times", features["beats"])
    pool.add("beats.confidence", features["beats_conf"])
    pool.set("analysis.sample_rate", msaf.Anal.sample_rate)
    pool.set("analysis.frame_rate", msaf.Anal.frame_size)
    pool.set("analysis.hop_size", msaf.Anal.hop_size)
    pool.set("analysis.window_type", msaf.Anal.window_type)
    pool.set("analysis.mfcc_coeff", msaf.Anal.mfcc_coeff)
    pool.set("timestamp",
             datetime.datetime.today().strftime("%Y/%m/%d %H:%M:%S"))
    save_features("framesync", pool, features["mfcc"], features["hpcp"],
                  features["tonnetz"])
    save_features("est_beatsync", pool, features["bs_mfcc"],
                  features["bs_hpcp"], features["bs_tonnetz"])
    if os.path.isfile(file_struct.ref_file) and jam.beats != []:
        save_features("ann_beatsync", pool, annot_mfcc, annot_hpcp,
                      annot_tonnetz)
    yaml(pool)
Пример #6
0
    def recognize(self, audio_list):
        print("len")
        print(len(audio_list))
        for audio in audio_list:
            #newaudio = es.Resample(outputSampleRate=16000)(audio)
            #es.MonoWriter(filename=self._TEMP_FILE_NAME, sampleRate=16000)(newaudio)
            es.MonoWriter(filename=self._TEMP_FILE_NAME,
                          sampleRate=16000)(audio)
            #name = str(self.count) + "test.wav"
            #self.count = self.count + 1
            #es.MonoWriter(filename=name, sampleRate=16000)(audio)
            voice = open(self._TEMP_FILE_NAME, "rb")
            result = self.recognizer.identify_speaker(voice)

            data = ""

            if result != self.last and len(self.record) > 0:
                #oname = str(self.num) + "rec.wav"
                #es.MonoWriter(filename = oname, sampleRate=16000)(self.record)
                #self.num = self.num + 1
                oname = "onlinetemp.wav"
                es.MonoWriter(filename=oname, sampleRate=16000)(self.record)

                c_pBuf = create_string_buffer('', 10000)

                #temp = c_char('')
                #pi = POINTER(c_char)(temp)
                #lib.process(oname, byrihh(pi))
                #data = pi.content

                lib.process(oname, c_pBuf)
                data = string_at(c_pBuf)

                if len(data) > 0:
                    print(data)
                    s = self.parent.parent.nameDict[result][:-4] + ": " + data
                    self.writeToOutput(s)

                    self.record = []
                    self.record = self.record + audio
                    self.last = result
            else:
                self.record = self.record + audio
                self.last = result

            print result
Пример #7
0
def compute_all_features(audio_file, audio_beats=False):
    """Computes all the features for a specific audio file and its respective
        human annotations.

    Returns
    -------
    features : dict
        Dictionary with the following features:
            mfcc : np.array
                Mel Frequency Cepstral Coefficients representation
            hpcp : np.array
                Harmonic Pitch Class Profiles
            tonnets : np.array
                Tonal Centroids (or Tonnetz)
    """

    # Makes sure the output features folder exists
    utils.ensure_dir(OUTPUT_FEATURES)
    features_file = os.path.join(OUTPUT_FEATURES,
                                 os.path.basename(audio_file) + ".json")

    # If already precomputed, read and return
    if os.path.exists(features_file):
        with open(features_file, "r") as f:
            features = json.load(f)
        return list_to_array(features)

    # Load Audio
    logging.info("Loading audio file %s" % os.path.basename(audio_file))
    audio = ES.MonoLoader(filename=audio_file, sampleRate=SAMPLE_RATE)()
    duration = len(audio) / float(SAMPLE_RATE)

    # Estimate Beats
    features = {}
    ticks, conf = compute_beats(audio)
    ticks = np.concatenate(([0], ticks, [duration]))  # Add first and last time
    ticks = essentia.array(np.unique(ticks))
    features["beats"] = ticks.tolist()

    # Compute Beat-sync features
    features["mfcc"], features["hpcp"], features["tonnetz"] = \
        compute_beatsync_features(ticks, audio)

    # Save output as audio file
    if audio_beats:
        logging.info("Saving Beats as an audio file")
        marker = ES.AudioOnsetsMarker(onsets=ticks, type='beep',
                                      sampleRate=SAMPLE_RATE)
        marked_audio = marker(audio)
        ES.MonoWriter(filename='beats.wav',
                      sampleRate=SAMPLE_RATE)(marked_audio)

    # Save features
    with open(features_file, "w") as f:
        json.dump(features, f)

    return list_to_array(features)
Пример #8
0
def add_click(infile, beats, outfile):
    """
    Adds a click track to a song according to the specified beats.

    This is used for debugging beat alignment.
    """
    audio = standard.MonoLoader(filename=infile)()
    marker = standard.AudioOnsetsMarker(onsets=beats, type='beep')
    marked_audio = marker(audio)
    standard.MonoWriter(filename=outfile)(marked_audio)
Пример #9
0
def HPFilter(audio, cutoff):

    HPF = es.HighPass(cutoffFrequency=cutoff)

    filtered_audio = HPF(audio)

    writer = es.MonoWriter(filename='holst_test.wav')

    writer(filtered_audio)

    return filtered_audio
Пример #10
0
def mix(audio1, audio2, sr):
    """
    Function to mix audios with a normalised loudness
    :param audio1: Audio vector to normalize
    :param audio2: Audio vector to normalize
    :param sr: Sample rate of the final mix
    :return: Audio vector of the normalised mix
    """
    if audio1.ndim > 1:
        audio1 = std.MonoMixer()(audio1, audio1.shape[1])
    if audio2.ndim > 1:
        audio2 = std.MonoMixer()(audio2, audio2.shape[1])
    std.MonoWriter(filename='temporal1.wav', sampleRate=sr)(audio1)
    std.MonoWriter(filename='temporal2.wav', sampleRate=sr)(audio2)

    stream1 = (ffmpeg.input('temporal1.wav').filter('loudnorm'))

    stream2 = (ffmpeg.input('temporal2.wav').filter('loudnorm'))
    merged_audio = ffmpeg.filter([stream1, stream2], 'amix')
    ffmpeg.output(merged_audio, 'temporal_o.wav').overwrite_output().run()

    audio_numpy = std.MonoLoader(filename='temporal_o.wav')()
    return audio_numpy
Пример #11
0
 def save(self,
          file_path: Path = Path.cwd() / ".temp" / "audio.wav") -> None:
     """
     Export the AudioFile at the newly given path.
     """
     try:
         file_path.parent.mkdir()
     except FileExistsError:
         pass
     if len(self.audio.shape) != 2:
         estd.MonoWriter(
             filename=file_path.as_posix(),
             format=file_path.suffix[1:],
             sampleRate=self.sample_rate,
         )(self.audio)
     else:
         estd.AudioWriter(
             filename=file_path.as_posix(),
             format=file_path.suffix[1:],
             sampleRate=self.sample_rate,
         )(self.audio)
    for f in find_files(in_folder, '.wav'):
        audio = es.MonoLoader(filename=f, sampleRate=fs)()
        original_len = len(audio)

        start_jump = original_len // 4
        if audio[start_jump] > 0:
            # Want at least a gap of .5
            end = next(idx for idx, i in enumerate(audio[start_jump:])
                       if i < -.3)
        else:
            end = next(idx for idx, i in enumerate(audio[start_jump:])
                       if i > .3)

        end_jump = start_jump + end

        audio = np.hstack([audio[:start_jump], audio[end_jump:]])

        text = ['{}\t0.0\tevent\n'.format(start_jump / float(fs))]

        if not os.path.exists(out_folder):
            os.mkdir(out_folder)

        f_name = ''.join(os.path.basename(f).split('.')[:-1])
        with open('{}/{}_prominent_jump.lab'.format(out_folder, f_name),
                  'w') as o_file:
            o_file.write(''.join(text))

        es.MonoWriter(
            filename='{}/{}_prominent_jump.wav'.format(out_folder, f_name))(
                esarr(audio))
Пример #13
0
    in_folder = '/home/pablo/data/sns-small/samples'
    out_folder = '/home/pablo/reps/essentia/test/QA-audio/Hum/Songs50HzHum'
    fs = 44100.
    files = [x for x in find_files(in_folder, 'flac')]
    if not files:
        print('no files found!')

    for f in files:
        try:
            audio = es.MonoLoader(filename=f, sampleRate=fs)()
        except Exception:
            print('{} was not loaded'.format(f))
            continue

        fs = 44100.
        t = np.linspace(0, len(audio) / fs, len(audio))

        freq = 50

        sinusoid = np.sin(2 * PI * freq * t)

        signal = np.array(.95 * audio + .005 * sinusoid, dtype=np.float32)

        if not os.path.exists(out_folder):
            os.mkdir(out_folder)

        f_name = ''.join(os.path.basename(f).split('.')[:-1])

        es.MonoWriter(filename='{}/{}_hum.wav'.format(out_folder, f_name))(
            esarr(signal))
Пример #14
0
    fs = 44100.
    files = [x for x in find_files(in_folder, 'wav')]
    if not files:
        print('no files found!')

    for f in files:
        try:
            audio = es.MonoLoader(filename=f, sampleRate=fs)()
        except Exception:
            print('{} was not loaded'.format(f))
            continue

        original_len = len(audio)

        start_jump = original_len // 4

        end_jump = start_jump + int(np.abs(np.random.randn()) * fs)

        audio[start_jump:end_jump] = np.zeros(end_jump - start_jump)

        text = ['{}\t{}\tevent\n'.format(start_jump / float(fs), end_jump / float(fs))]

        if not os.path.exists(out_folder):
            os.mkdir(out_folder)

        f_name = ''.join(os.path.basename(f).split('.')[:-1])
        with open('{}/{}_gap.lab'.format(out_folder, f_name), 'w') as o_file:
            o_file.write(''.join(text))

        es.MonoWriter(filename='{}/{}_gap.wav'.format(out_folder, f_name))(esarr(audio))
import matplotlib.pylab as plt

if mode == 'standard':

  # create an audio loader and import audio file
  loader = std.MonoLoader(filename = inputFilename, sampleRate = 44100)
  audio = loader()

  print("Duration of the audio sample [sec]:")
  print(len(audio)/44100.0)

  w = std.Windowing(type = "hann");
  fft = std.FFT(size = framesize);
  ifft = std.IFFT(size = framesize);
  overl = std.OverlapAdd (frameSize = framesize, hopSize = hopsize);
  awrite = std.MonoWriter (filename = outputFilename, sampleRate = 44100);


  for frame in std.FrameGenerator(audio, frameSize = framesize, hopSize = hopsize):
    # STFT analysis
    infft = fft(w(frame))
    
    # here we could apply spectral transformations
    outfft = infft

    # STFT synthesis
    ifftframe = ifft(outfft)
    out = overl(ifftframe)    

    if counter >= (framesize/(2*hopsize)):
      audioout = np.append(audioout, out)
Пример #16
0
    data = np.genfromtxt(os.path.join(folder, mbid, mbid + '-bangu.csv'),
                         delimiter=',')

    loader = es.MonoLoader(filename=os.path.join(folder, 'ban.mp3'))
    ban = loader()

    loader = es.MonoLoader(filename=os.path.join(folder, 'gu.mp3'))
    gu = loader()

    loader = es.MonoLoader(filename=os.path.join(folder, mbid, mbid +
                                                 '-acc.mp3'))
    acc = loader()

    print(acc.size / 44100)

    new = np.zeros(acc.size)

    for i in range(len(data)):
        start = int(round(data[i, 0] * 44100))
        if str(data[i, 2])[-1] == '0' or str(data[i, 2])[-1] == '1':
            end = start + ban.size
            new[start:end] = ban
        else:
            end = start + gu.size
            new[start:end] = gu

    newFile = os.path.join(folder, mbid, mbid + '-bangu.wav')
    es.MonoWriter(filename=newFile)(essentia.array(new))

#    newFile = os.path.join(mbid, mbid+'-bangu.mp3')
#    es.MonoWriter(filename=newFile, format='mp3')(essentia.array(new))
Пример #17
0
    #Snare
    for i in range(4 * num_4_4):
        snareAudio[int(beat_pos[i] * fs):int(beat_pos[i] * fs) +
                   snareArray.size] = amplitude_snare[i] * snareGroove_4_4[
                       i % 4] * snareArray
    for i in range(5 * num_5_4):
        snareAudio[int(beat_pos[i + 4 * num_4_4] *
                       fs):int(beat_pos[i + 4 * num_4_4] * fs) +
                   snareArray.size] = amplitude_snare[
                       i + 4 * num_4_4] * snareGroove_5_4[i % 5] * snareArray

    #Hat
    for i in range(num_beats):
        hatAudio[int(beat_pos[i] * fs):int(beat_pos[i] * fs) +
                 hatArray.size] = amplitude_hat[i] * hatArray

    #Adding together into one.
    x = float(randint(80, 100)) / 100
    file_time = fs * (beat_pos[num_beats - 1] + x)
    print x, file_time / fs

    for i in range(int(file_time)):
        kickAudio[i] = kickAudio[i] + snareAudio[i] + hatAudio[i]

    #Writing to wav file
    writer = ess.MonoWriter(filename=str(OUTPUT_DIRECTORY_AUDIO) +
                            str(indexx) + ".wav",
                            format="wav")
    write = writer(kickAudio[0:int(file_time)].astype('single'))
Пример #18
0
            # Compute beat positions and BPM
            rhythm_extractor = esst.RhythmExtractor2013(method="multifeature")
            bpm, beats, beats_confidence, _, beats_intervals = rhythm_extractor(
                audio)

            # print("BPM:", bpm)
            # print("Beat positions (sec.):", beats)
            print("Beat estimation confidence:", beats_confidence)

            for i in range(len(beats)):
                trim = esst.Trimmer(startTime=[*map(lambda x: x - 0.01, beats)][i],
                     endTime=[*map(lambda x: x + 0.15, beats)][i]) \
                 (audio)

                if len(mixed_sample):
                    trim = np.resize(trim, mixed_sample.size)
                    stereo_mix = esst.StereoMuxer()(mixed_sample, trim)
                    esst.MonoMixer()(stereo_mix, 2)
                else:
                    mixed_sample = trim
            output_array = np.concatenate([output_array, mixed_sample])

esst.MonoWriter(filename=f"../samples/mix_beat{str(i)}.mp3")(output_array)

# Mark beat positions on the audio and write it to a file
# Let's use beeps instead of white noise to mark them, as it's more distinctive
# marker = AudioOnsetsMarker(onsets=beats, type='beep')
# marked_audio = marker(audio)
# MonoWriter(filename='../samples/dubstep_beats.flac')(marked_audio)
Пример #19
0
def getWaveSection(audio):
    # Set sizes
    n_samples = len(audio)
    n_of_blocks = 20
    samples_per_block = int(n_samples / n_of_blocks)

    # Final dictionary
    wave_data = {}

    # For each block
    for i in range(0, n_of_blocks):
        # x0, x1, avg_mood, avg_bpm, strong_peak, energy bands
        block = {
            "x0": 0,
            "x1": 0,
            "avg_mood": 0,
            "avg_bpm": 0,
            "strong_peak": 0,
            "energy_band": ""
        }

        ## Add x0 and x1
        x0 = i * samples_per_block
        x1 = samples_per_block * (i + 1)
        block["x0"] = x0
        block["x1"] = x1

        # Take a sample of the audio file...
        samples = audio[x0:x1]

        # Create temporary file for audio sample
        # monowriter to .tmp.wav
        es.MonoWriter(filename='.tmp.wav')(samples)

        # Extract lowlevel features
        subprocess.run([
            "lib/essentia/streaming_extractor_music", '.tmp.wav', '.l_tmp',
            profile_low
        ])

        # Create temporary file for lowlevel features
        # Read low level descriptors file
        with open('.l_tmp', 'r', encoding='utf-8') as f:
            lowlevel_content = json.load(f)
            f.close()

        # Read with highlevel extractor
        subprocess.run([
            "essentia_streaming_extractor_music_svm", '.l_tmp', '.h_tmp',
            profile_wavesection
        ])

        # Read high level descriptors file
        with open('.h_tmp', 'r', encoding='utf-8') as f:
            highlevel_content = json.load(f)
            f.close()

        ## Add avg energy
        block["strong_peak"] = lowlevel_content["lowlevel"][
            "spectral_strongpeak"]["mean"]

        ## Add avg bpm
        block["avg_bpm"] = lowlevel_content["rhythm"]["bpm"]

        ## Add avg bpm
        block["avg_mood"] = highlevel_content["highlevel"]["mood_happy"][
            "all"]["happy"]

        ## Spectral energy band
        energy_band = {}

        energy_band["low"] = lowlevel_content["lowlevel"][
            "spectral_energyband_low"]["mean"]

        energy_band["middle_low"] = lowlevel_content["lowlevel"][
            "spectral_energyband_middle_low"]["mean"]

        energy_band["middle_high"] = lowlevel_content["lowlevel"][
            "spectral_energyband_middle_high"]["mean"]

        energy_band["high"] = lowlevel_content["lowlevel"][
            "spectral_energyband_high"]["mean"]

        block["energy_band"] = energy_band

        # Insert in dictionary with key = i
        wave_data[i] = block

    # Delete temporary files
    os.remove('.tmp.wav')
    os.remove('.l_tmp')
    os.remove('.h_tmp')

    return wave_data
Пример #20
0
import os
import essentia.standard as ess

for root, dirs, files in os.walk(
        "/media/kushagra/529EC5229EC50009/Users/kushagra/Master_Datasets/SMC_MIREX/SMC_MIREX/SMC_MIREX_Audio"
):
    for f in files:
        #Use Essentia to load the method
        command = root + '/' + f
        audioLoader = ess.MonoLoader(filename=str(command))
        audio = audioLoader()
        filt = ess.BandReject(bandwidth=1700, cutoffFrequency=300)
        filtof = filt(audio)
        writer = ess.MonoWriter(
            filename=
            "/media/kushagra/529EC5229EC50009/Users/kushagra/Master_Datasets/SMC_MIREX/SMC_MIREX/Augmentations/"
            + str(f),
            format="wav")
        write = writer(filtof)
def create_excerpt(audio_path, time, name):
    """
    Given audio path and times, transcribes it and creates new midis and wav
    files for the given excerpts. `name` is the file name without extension and
    transcription number.
    """

    full_audio = esst.EasyLoader(filename=audio_path, sampleRate=SR)()
    start_audio, _ = find_start_stop(full_audio, sample_rate=SR, seconds=True)
    original = midipath2mat(audio_path[:-4] + '.mid')

    # compute score path
    score_path = './my_scores/' + os.path.basename(audio_path)[:-8] + '.mid'
    score = midipath2mat(score_path)

    # transcribe
    data = pickle.load(open(TEMPLATE_PATH, 'rb'))
    transcription_0, _, _, _ = proposed.transcribe(full_audio,
                                                   data,
                                                   score=score)

    transcription_1 = magenta_transcription.transcribe(full_audio, SR)

    # transcription_2, _, _, _ = proposed.transcribe(full_audio,
    #                                                data,
    #                                                score=None)

    # chose another interpretation
    performance = '01'
    if audio_path[-6:-4] == '01':
        performance = '02'
    other = midipath2mat(audio_path[:-6] + performance + '.mid')

    # segment all the scores and audios
    full_audio = esst.EasyLoader(filename=audio_path, sampleRate=OUT_SR)()
    original_audio = full_audio[round(time[0][0] * OUT_SR):round(time[0][1] *
                                                                 OUT_SR)]
    other_time = remap_original_in_other(original, other, time[0])
    original = segment_mat(original, time[0][0], time[0][1], start_audio)
    other = segment_mat(other, other_time[0], other_time[1], start_audio)
    transcription_0 = segment_mat(transcription_0, time[0][0], time[0][1],
                                  start_audio)
    transcription_1 = segment_mat(transcription_1, time[0][0], time[0][1],
                                  start_audio)
    # transcription_2 = segment_mat(transcription_2, time[0][0], time[0][1],
    #                               start_audio)

    # write scores to `to_be_synthesized` and audios to `excerpts`
    if not os.path.exists('to_be_synthesized'):
        os.mkdir('to_be_synthesized')
    midi_path = os.path.join('to_be_synthesized', name)
    mat2midipath(original, midi_path + 'orig.mid')
    mat2midipath(other, midi_path + 'other.mid')
    mat2midipath(transcription_0, midi_path + 'proposed.mid')
    mat2midipath(transcription_1, midi_path + 'magenta.mid')
    # mat2midipath(transcription_2, midi_path + 'vienna.mid')

    if not os.path.exists('audio'):
        os.mkdir('audio')
    audio_path = os.path.join('audio', name) + 'target.' + FORMAT

    # write audio
    if os.path.exists(audio_path):
        os.remove(audio_path)
    esst.MonoWriter(filename=audio_path,
                    sampleRate=OUT_SR,
                    format=FORMAT,
                    bitrate=256)(original_audio)
Пример #22
0
def sliceDrums_from_annotations_SDtrainset(instrument_name, segments_dir,
                                           song_dict, fs):
    """
        Input:  instrument_name: str woth a key in the song_dict
                segments_dir : str with path where slices are saved
                song_dict : dict containing audio stream and annotations
                fs :  sampling rate to properly save the files

        This function slices audio stream based on annotations and save each slice in a individual wav file, 
        each on the corresponent folder = segmens_dir/song_name/instrument/file.wav
        
        Adapted to routines recorded in the studio

        This function could be combined with the feature extraction in the next cells, but having the slices
        saved allows us to do data augmentation combining individual samples to get more instances of all the combinations
    """
    song = song_dict[instrument_name]
    x_seg_dir = os.path.join(segments_dir, instrument_name)

    od_complex = OnsetDetection(method='complex')
    w = Windowing(type='hann')
    fft = FFT()  # this gives us a complex FFT
    c2p = CartesianToPolar(
    )  # and this turns it into a pair (magnitude, phase)
    onsets = Onsets()

    file_count = 0

    for audio in song['audios']:
        x = audio
        duration = float(len(x)) / fs

        x = x / np.max(np.abs(x))

        t = np.arange(len(x)) / float(fs)

        #Essentia beat tracking
        pool = Pool()
        for frame in FrameGenerator(x, frameSize=1024, hopSize=512):
            mag, phase, = c2p(fft(w(frame)))
            pool.add('features.complex', od_complex(mag, phase))

        onsets_list = onsets(array([pool['features.complex']]), [1])
        first_onset = int(onsets_list[0] * fs)

        print(first_onset)
        if not os.path.exists(segments_dir):  #creating the directory
            os.mkdir(segments_dir)
        segments_dir__ = os.path.join(segments_dir, instrument_name)
        if not os.path.exists(segments_dir__):  #creating the directory
            os.mkdir(segments_dir__)

        n_notes = len(song['annotations'])
        annotations = song['annotations']
        for i in range(1, n_notes):
            if i != n_notes - 1 and i != 0:
                x_seg = audio[(annotations[i][2] - 3000 +
                               first_onset):(annotations[i + 1][2] - 3000 +
                                             first_onset)]

            if len(x_seg) < 5000 or np.max(np.abs(x_seg)) < 0.05:
                continue

            x_seg = x_seg / np.max(np.abs(x_seg))

            if not os.path.exists(x_seg_dir):  #creating the directory
                os.mkdir(x_seg_dir)
            path, dirs, files = next(os.walk(x_seg_dir))
            dir_n_files = len(files)
            if annotations[i][1] == 'N':
                continue
            filename = os.path.join(
                x_seg_dir, instrument_name + '_' + str(dir_n_files) + '.wav')
            ess.MonoWriter(filename=filename, format='wav',
                           sampleRate=fs)(x_seg)
            file_count = file_count + 1

    print(instrument_name + ": " + str(file_count))
        monoLoader = es.MonoLoader(filename=mixFile, sampleRate=44100)
        x = monoLoader()[:nSeconds * 44100]

        _stft = stft(x,
                     n_fft=fftSize,
                     hop_length=hopSize,
                     win_length=frameSize,
                     window=winType)

        X_H, X_P = hpss(_stft,
                        kernel_size=150)  # Get harmonic and percussive stfts

        x_h = istft(
            X_H, hop_length=hopSize,
            win_length=frameSize)  # Convert stfts to time domain signals
        x_p = istft(X_P, hop_length=hopSize, win_length=frameSize)

        MonoWriter = es.MonoWriter(sampleRate=44100,
                                   format="mp3")  # Write to file
        MonoWriter.configure(filename=saveFolderLoc + filename +
                             "_median_percussive.mp3")
        MonoWriter(array(x_p))

        MonoWriter = es.MonoWriter(sampleRate=44100,
                                   format="mp3")  # Write to file
        MonoWriter.configure(filename=saveFolderLoc + filename +
                             "_median_harmonic.mp3")
        MonoWriter(array(x_h))

print("DONE")
Пример #24
0
        raga = metadata['raaga'][0]['common_name']
    except IndexError:
        continue

    raga_dir = os.path.join(output_dir, raga)
    os.makedirs(raga_dir, exist_ok=True)

    try:
        length = len(phrases)
    except TypeError:
        length = 1
        phrases = np.array([phrases])

    print(phrases)

    if len(phrases) > 0:
        collated_phrases = commons.collate_phrases(phrases)
        for phrase_annotation, phrases in collated_phrases.items():
            for i, indices in enumerate(phrases):
                melodic_segment_dir = os.path.join(raga_dir,
                                                   phrase_annotation.decode())
                os.makedirs(melodic_segment_dir, exist_ok=True)
                melodic_segment_path = os.path.join(
                    melodic_segment_dir, '{}-{:02}.mp3'.format(filename, i))
                ess.MonoWriter(filename=melodic_segment_path,
                               format='mp3')(audio[indices[0]:indices[1]])
        total_phrases += len(phrases)
        print(filename, len(phrases))

print(total_phrases)
    def separate_and_analyze_function(self):
        # Separation using Smoothness/Sparseness
        frameSize = self.mixed_canvas.frame_size_comboBox.currentText()
        frameSize = int(frameSize)
        hopSize = self.mixed_canvas.hop_size_comboBox.currentText()
        hopSize = int(eval(hopSize) * frameSize)
        fftSize = self.mixed_canvas.fft_size_comboBox.currentText()
        fftSize = int(fftSize)

        x, sampleRate = self.mixed_canvas.get_audio()
        file_directory = os.path.dirname(self.mixed_canvas.get_filename())

        # File name format Algo_FFTSize_frameSize_hopSize
        SMSP_filename_prefix = "SMSP_" + str(fftSize) + "_" + str(
            frameSize) + "_" + str(hopSize)
        median_filename_prefix = "median_" + str(fftSize) + "_" + str(
            frameSize) + "_" + str(hopSize)

        if not x == []:
            if self.SMSP_checkbox.checkState():
                self.StatusBarSignal.emit(
                    "Separating Using Smoothness/Sparseness NMF Algorithm")

                #   separate using Smoothness/Sparseness
                hpss = SMSP_HPSS(
                    np.array(x),
                    directory=file_directory,
                    filename=SMSP_filename_prefix,
                    format="wav",
                    beta=1.5,
                    frameSize=frameSize,
                    hopSize=hopSize,
                    fftSize=fftSize,
                    Rp=150,
                    Rh=150,
                    K_SSM=.2,  # Percussive Spectral Smoothness
                    K_TSP=.1,  # Percussive Temporal Smoothness
                    K_SSP=.1,  # Harmonic Spectral Smoothness
                    K_TSM=.2,  # Harmonic Temporal Smoothness
                )

                maxIter = 100

                for i in range(int(maxIter)):
                    self.StatusBarSignal.emit("Iteration %i out of %i" %
                                              (i + 1, maxIter))
                    hpss.next_iteration()

                hpss.create_masks()
                hpss.spectral_to_temporal_using_masks()

                hpss.save_separated_audiofiles()

                shutil.move(
                    os.path.join(file_directory,
                                 SMSP_filename_prefix + "_harmonic.wav"),
                    os.path.join(os.path.join(file_directory, "harmonic"),
                                 SMSP_filename_prefix + "_harmonic.wav"))

                shutil.move(
                    os.path.join(file_directory,
                                 SMSP_filename_prefix + "_percussive.wav"),
                    os.path.join(os.path.join(file_directory, "percussive"),
                                 SMSP_filename_prefix + "_percussive.wav"))

                # self.StatusBarSignal.emit("Finished Separating (SMSP)")

            if self.median_checkbox.checkState():
                self.StatusBarSignal.emit(
                    "Separating Using Median Filtering Algorithm")
                # Separation using median filtering
                _stft = stft(x,
                             n_fft=fftSize,
                             hop_length=hopSize,
                             win_length=frameSize,
                             window="hann")

                X_H, X_P = librosa.decompose.hpss(
                    _stft,
                    kernel_size=150)  # Get harmonic and percussive stfts

                x_h = istft(X_H, hop_length=hopSize, win_length=frameSize
                            )  # Convert stfts to time domain signals
                x_p = istft(X_P, hop_length=hopSize, win_length=frameSize)

                MonoWriter = es.MonoWriter(sampleRate=44100,
                                           format="wav")  # Write to file
                MonoWriter.configure(filename=os.path.join(
                    os.path.join(file_directory, "percussive"),
                    median_filename_prefix + "_percussive.wav"))
                MonoWriter(array(x_p))

                MonoWriter = es.MonoWriter(sampleRate=44100,
                                           format="wav")  # Write to file
                MonoWriter.configure(filename=os.path.join(
                    os.path.join(file_directory, "harmonic"),
                    median_filename_prefix + "_harmonic.wav"))
                MonoWriter(array(x_h))

                # self.StatusBarSignal.emit("Finished Separating (Median Filtering)")

            self.FindSeparatedFilesSignal.emit()

        return