示例#1
0
def compute_essentia_descriptors(audio_segment, actual_bar_beg,
                                 actual_bar_end):
    """
	Computes the values of selected descriptors in the given audio segment.
	"""
    frames = FrameGenerator(audio_segment,
                            frameSize=frameSize,
                            hopSize=hopSize)
    mfccs_bar = []
    bark_vector = [0] * 27
    pool = essentia.Pool()
    total_frames = frames.num_frames()

    for frame in frames:
        frame_windowed = window(frame)
        frame_spectrum = spectrum(frame_windowed)
        (frame_frequencies, frame_magnitudes) = spectralPeaks(frame_spectrum)
        mag, phase, = c2p(fft(frame_windowed))
        pool.add('onsets.hfc', od(mag, phase))
        frame_dissonance = dissonance(frame_frequencies, frame_magnitudes)
        pool.add('dissonance', frame_dissonance)
        # pool.add('zerocrossingrate', zerocrossingrate(frame))
        mfcc_bands, mfcc_coeffs = mfcc(spectrum(window(frame)))
        mfccs_bar.append(mfcc_coeffs)
        frame_barkbands = barkbands(frame_spectrum)
        for i in range(27):
            bark_vector[i] += frame_barkbands[i] / total_frames

    onsets_hfc = onsets(essentia.array([pool['onsets.hfc']]), [1])
    onset_rate = float(len(onsets_hfc)) / (actual_bar_end - actual_bar_beg)
    bar_dissonance = mean(pool["dissonance"])

    return mfccs_bar, bark_vector, onset_rate, bar_dissonance
示例#2
0
def essFalsestereoDetector(x: list,
                           frameSize=1024,
                           hopSize=512,
                           correlationThreshold=0.98,
                           percentageThreshold=90,
                           channels=2,
                           **kwargs):
    """Computes the correlation and consideres if the information in the two channels is the same

    Args:
        x: (list) input signal
        frameSize: (int) frame size for the analysis in falseStereoDetector
        hopSize: (int) hop_size for the analysis in falseStereoDetector
        correlationthreshold: (float) lower limit to decide if a file has correlation problems

    Returns:
        final_bool: (bool) True if the information is the same in both channels, False otherwise
        percentace: (float) How many frames were false stereo over all the frames
    """
    if channels < 2:
        return 1, False, True

    rx, lx = StereoDemuxer()(x)
    mux = StereoMuxer()
    falseStereoDetector = FalseStereoDetector(
        correlationThreshold=correlationThreshold, **kwargs)

    lfg = FrameGenerator(lx,
                         frameSize=frameSize,
                         hopSize=hopSize,
                         startFromZero=True)
    rfg = FrameGenerator(rx,
                         frameSize=frameSize,
                         hopSize=hopSize,
                         startFromZero=True)

    problematicFrames = sum([
        falseStereoDetector(mux(frameL, frameR))[0]
        for frameL, frameR in zip(lfg, rfg)
    ])
    # problematicFrames = []
    # for frameL, frameR in zip(lfg, rfg):
    #     res, corr = falseStereoDetector(mux(frameL, frameR))
    #     problematicFrames.append(res)

    falseStereoDetector.reset()

    conf = float(sum(problematicFrames)) / float(lfg.num_frames())

    return conf, conf > percentageThreshold / 100, False
示例#3
0
def outofPhaseDetector(x: list,
                       frameSize=1024,
                       hopSize=512,
                       correlationThreshold=-0.8,
                       percentageThreshold=90,
                       channels=2,
                       **kwargs):
    """Computes the correlation and flags the file if the file has a 90% of frames out of phase

    Args:
        x: (list) input signal
        frameSize: (int) frame size for the analysis in falseStereoDetector
        hopSize: (int) hop_size for the analysis in falseStereoDetector
        correlationthreshold: (float) higher limit to decide if a file has correlation problems

    Returns:
        final_bool: (bool) True if the information is the same in both channels, False otherwise
        percentace: (float) How many frames were false stereo over all the frames
    """
    if channels < 2:
        return 1, False, True

    rx, lx = StereoDemuxer()(x)
    mux = StereoMuxer()
    falseStereoDetector = FalseStereoDetector(**kwargs)

    lfg = FrameGenerator(lx,
                         frameSize=frameSize,
                         hopSize=hopSize,
                         startFromZero=True)
    rfg = FrameGenerator(rx,
                         frameSize=frameSize,
                         hopSize=hopSize,
                         startFromZero=True)

    problematicFrames = 0
    for frameL, frameR in zip(lfg, rfg):
        _, corr = falseStereoDetector(mux(frameL, frameR))
        problematicFrames += corr < correlationThreshold
    falseStereoDetector.reset()

    conf = problematicFrames / lfg.num_frames()

    return conf, conf > percentageThreshold / 100, False