Exemplo n.º 1
0
 def predictOne(self, path):
     """
     Returns the structure and label from the algorithm specified
     Removes the first and last boundarie which is the start and the end of the track
     """
     import msaf
     if self.parameters["feature"].value is None:
         boundaries, labels = msaf.process(
             path, boundaries_id=self.parameters["algorithm"].value)
     else:
         boundaries, labels = msaf.process(
             path,
             boundaries_id=self.parameters["algorithm"].value,
             feature=self.parameters["feature"].value)
     return (SparseSignal(labels[1:], boundaries[1:-1]), )
Exemplo n.º 2
0
def parts(audio_file, videos_folder):
    os.system(
        f'DBNDownBeatTracker --downbeats single "{audio_file}" >> beats.txt')
    with open('beats.txt', 'r') as f:
        downbeat_times = list(map(float, f.readlines()))

    boundaries, labels = msaf.process(audio_file,
                                      boundaries_id="foote",
                                      labels_id="fmc2d")
    print("BOUNDARIES", boundaries)
    print("LABELS", labels)

    parts_names = [
        folder for folder in os.listdir(videos_folder)
        if len(folder) == 1 and not_empty(os.path.join(videos_folder, folder))
    ]
    labels2ids = {
        list(set(labels))[i]: parts_names[i % len(parts_names)]
        for i in range(len(set(labels)))
    }

    boundaries_info = {k: [] for k in labels2ids.values()}
    l_index = 0
    for v_min, v_max in zip(boundaries[:-1], boundaries[1:]):
        boundaries_info[labels2ids[labels[l_index]]].append((v_min, v_max))
        l_index += 1

    start = 0.0
    end = boundaries[-1]
    downbeat_times = [start] + list(downbeat_times) + [end]

    clean()
    return boundaries_info, downbeat_times
def main(wavlist, outdir):
    for f in wavlist:
        if f.endswith('.wav'):
            print("processing {}".format(f))
            boundaries, labels = msaf.process(f, boundaries_id="sf",
                                              labels_id="vmo",feature="mfcc")
            labels = [int(l) for l in labels]

            # conglomerate boundaries by label
            new_boundaries = [boundaries[0]]
            new_labels = [labels[0]]
            for i in xrange(1, len(labels)):
                if labels[i] == labels[i-1]:
                    continue
                new_boundaries.append(boundaries[i])
                new_labels.append(labels[i])
            boundaries = new_boundaries
            labels = new_labels

            # read wavfile, parse out segments
            rate, data = wavfile.read(f)
            segments = []
            for b_ind in xrange(1, len(boundaries)):
                seg_start = int(np.round(rate*(boundaries[b_ind - 1])))
                seg_end = int(np.round(rate*(boundaries[b_ind])))
                segments.append(data[seg_start:seg_end])

            assert len(segments) + 1 == len(boundaries) == len(labels)

            # merge short segments
            new_segments = [segments[0]]
            new_boundaries = [boundaries[0]]
            new_labels = [labels[0]]
            for i in xrange(1, len(segments)):
                seg = segments[i]
                if len(seg) < rate*MIN_SEG_LEN:
                    new_segments[i-1] = np.concatenate((new_segments[i-1], seg), axis=0)
                else:
                    new_segments.append(seg)
                    new_labels.append(labels[i])
                    new_boundaries.append(labels[i])

            if len(new_segments[0]) < rate*MIN_SEG_LEN:
                new_segments[1] = np.concatenate((new_segments[0], new_segments[1]), axis=0)
                new_boundaries[1] = new_boundaries[0]
                new_segments = new_segments[1:]
                new_boundaries = new_boundaries[1:]
                new_labels = new_labels[1:]

            segments = new_segments
            boundaries = new_boundaries
            labels = new_labels

            for i in xrange(len(segments)):
                outfilename = "{}-clip-{}-label-{}.wav".format(op.splitext(op.basename(f))[0],
                                                               i, labels[i])
                outpath = op.join(outdir, outfilename)
                wavfile.write(outpath, rate, segments[i])
                print("{} created.".format(outfilename))
Exemplo n.º 4
0
def segment_song(msd_id):
    boundaries, labels = msaf.process(get_synthesized_path(msd_id),
        boundaries_id="olda", labels_id="scluster", feature="mfcc")

    # merge short segments
    new_boundaries = [boundaries[0]]
    new_labels = [labels[0]]
    for i in range(1, len(boundaries)):
        if (boundaries[i] - boundaries[i-1]) > MIN_SEGMENT_LEN:
            new_boundaries.append(boundaries[i])
            new_labels.append(labels[i])

    boundaries = new_boundaries
    labels = new_labels

    # calculate tick values for segments
    mt_midi = Multitrack(get_npz_path(msd_id))
    tempo = mt_midi.tempo[0]
    beat_resolution = mt_midi.beat_resolution
    # beats/min * ticks/beat * min/sec = ticks/sec
    ticks_per_second = (tempo*mt_midi.beat_resolution) // 60
    
    def get_nearest(num, nearest="downbeat"):
        if nearest == "beat":
            multiple = beat_resolution
        elif nearest == "downbeat":
            multiple = beat_resolution * 4
        else:
            raise ValueError("Argument to get_nearest should be either 'beat' or 'downbeat'")
    
        factor = num // multiple
        remainder = num % multiple
        if remainder < (multiple // 2):
            ndb = multiple*factor
        else:
            ndb = multiple*(factor + 1)
        return ndb

    prev_l = -1
    tick_boundaries = []
    for b, l in zip(boundaries, labels):
        if l != prev_l:
            tick_boundaries.append(get_nearest(b*ticks_per_second, "downbeat"))
        else:
            tick_boundaries.append(get_nearest(b*ticks_per_second, "beat"))

    second_boundaries = [tb/ticks_per_second for tb in tick_boundaries]
            
    # change labels to letters and order
    letter_labels = []
    label_map = dict()
    for label in labels:
        if label not in label_map:
            label_map[label] = string.ascii_uppercase[len(label_map)]
        letter_labels.append(label_map[label])
    
    return second_boundaries, tick_boundaries, letter_labels
Exemplo n.º 5
0
def process_boundaries(path_to_read):
	try:
		boundaries, labels = msaf.process(path_to_read, n_jobs=1,
										boundaries_id="scluster", 
										labels_id="scluster")
	except ValueError:
		boundaries = []
		labels = []
		print 'Perhaps path error:%s' % path_to_read	
	print 'boundary and label: done: %s' % path_to_read
	return (boundaries, labels)
Exemplo n.º 6
0
def parts(audio_file):
	boundaries, labels = msaf.process(audio_file, boundaries_id="foote", labels_id="fmc2d")
	parts_names = ["A","B","C","D","E","F","G","H","I","J","K","L"]
	labels2ids = {list(set(labels))[i]:parts_names[i%len(parts_names)] for i in range(len(set(labels)))}

	boundaries_info = {k:[] for k in labels2ids.values()}
	l_index = 0
	for v_min, v_max in zip(boundaries[:-1], boundaries[1:]):
		boundaries_info[labels2ids[labels[l_index]]].append((v_min,v_max))
		l_index += 1

	clean()
	return boundaries_info
Exemplo n.º 7
0
def getPerformanceData(audioFile):
    # Load the audio file with a sampling rate of 44100 Hz
    x, fs = librosa.load(audioFile, sr=44100)
    print("File \'" + audioFile + "\' loaded.")
    # Calculate the duration of the audio file
    duration = int(10 * librosa.get_duration(x, fs))
    # Calculate the emotion of the audio file and get the associated colors
    print("Calculating emotion data...")
    colors = groupColor(getVApair(audioFile))
    # Get the percussive elements of the of the audio file
    print("Extracting percussive elements...")
    xPercussive = librosa.effects.percussive(x, margin=3.0)
    #xPercussive=x
    # Get the beats of the audio file
    print("Detecting beats...")
    tempo, beats = librosa.beat.beat_track(xPercussive, sr=44100)
    beatSampleTimes = librosa.frames_to_time(beats, sr=fs)
    beatDsTimes = [int(10 * round(b, 1)) for b in beatSampleTimes]
    # Get the onsets of the audio file
    print("Detecting onsets...")
    onsets = librosa.onset.onset_detect(xPercussive, sr=44100)
    onsetSampleTimes = librosa.frames_to_time(onsets, sr=fs)
    onsetDsTimes = [int(10 * round(o, 1)) for o in onsetSampleTimes]
    # Get the segment boundaries
    print("Segmenting audio file...")
    boundaries, labels = msaf.process(audioFile, boundaries_id="sf")
    # Clean the boundaries to remove redundant segments
    boundariesDs = cleanSegments([int(10 * round(f, 1)) for f in boundaries],
                                 duration)
    # Store the results of the segmentation in segments.txt for faster retrieval
    # during later segmentations
    outFile = 'segments.txt'
    print('Saving output to %s' % outFile)
    msaf.io.write_mirex(boundaries, labels, outFile)
    # Return a dictionary of the relevant performance data
    performanceData = {
        "colors": colors,
        "waveValues": x,
        "duration": duration,
        "beats": beatDsTimes,
        "onsets": onsetDsTimes,
        "boundaries": boundariesDs
    }
    return performanceData
Exemplo n.º 8
0
def get_boundaries(audio_file: str,
                   sample_rate: int,
                   boundary_detection_id: str = "olda",
                   label_detection_id: str = "scluster"):
    '''
    Parses labels and sections to generate a mapping of labels to actual sections and a measure of how percussive each section is
    
    Parameters
    ----------
    audio_file: filename of desired audio file
    sample_rate: sample_rate of audio file
    boundary_detection_id: 
        Algorithm for boundary detection. 
        Other algorithms are documented here: https://github.com/urinieto/msaf/blob/master/examples/Run%20MSAF.ipynb
    label_detection_id: 
        Algorithm for label_detection. 
        Other algorithms are documented here: https://github.com/urinieto/msaf/blob/master/examples/Run%20MSAF.ipynb
        
    Returns
    -------
    sections:
        A list of tuples designating the start/end of each section in samples
    dirty_labels:
        A list of integers corresponding to the label each section has been assigned 
        (e.g. a song structure of ABCAB could have a list of [0, 1, 2, 0, 1])
    '''

    bounds, labels = msaf.process(audio_file,
                                  boundaries_id=boundary_detection_id,
                                  labels_id=label_detection_id,
                                  out_sr=sample_rate)
    sections = np.array([(int(bounds[i] * sample_rate),
                          int(bounds[i + 1] * sample_rate))
                         for i in range(len(bounds) - 1)])
    boolarr = [
        True if end - start > sample_rate else False for start, end in sections
    ]
    sections = sections[boolarr]
    labels = np.array(labels)[boolarr]
    dirty_labels = [int(i) for i in labels]

    return sections, dirty_labels
Exemplo n.º 9
0
def main(argv):
    inputfile = ''
    outputfile = ''
    try:
        opts, args = getopt.getopt(argv, "hi:o:", ["ifile=", "ofile="])
    except getopt.GetoptError:
        print 'run_msaf.py -i <inputfile> -o <outputfile>'
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print 'run_msaf.py -i <inputfile> -o <outputfile>'
            sys.exit()
        elif opt in ("-i", "--ifile"):
            inputfile = arg
        elif opt in ("-o", "--ofile"):
            outputfile = arg
    print 'Input file is "', inputfile
    print 'Output file is "', outputfile
    boundaries, labels = msaf.process(inputfile, labels_id='scluster')
    print('Estimated boundaries:', boundaries)
    print('Estimated labels:', labels)
    msaf.io.write_mirex(boundaries, labels, outputfile)
Exemplo n.º 10
0
input_audio = audio_path + "testb.wav"

config = {
    "dirichlet": True,
    "xmeans": True,
    "k": 6,
    "M_gaussian": 16,
    "m_embedded": 3,
    "k_nearest": 0.06,
    "Mp_adaptive": 24,
    "offset_thres": 0.04
}

est_times, est_labels = msaf.process(input_audio,
                                     feature="hpcp",
                                     boundaries_id="sf",
                                     labels_id="fmc2d",
                                     config=config)

print 'writing data'

labels_file = open(labels_path, "w")

data = zip(est_times, est_labels)

names = [random.choice(words) for i in range(len(est_labels))]

for i in data:
    labels_file.write("%s %s\n" % (i[0], names[int(i[-1])]))

labels_file.close()
Exemplo n.º 11
0
def main(args, callback=log_progress()):
    if isinstance(callback, types.GeneratorType):
        next(callback)

    start = time.time()
    print('Analyzing music %s...'%args.input)

    if not os.path.exists(args.input):
        raise FileNotFoundError
    elif not os.path.isdir(args.data) and not args.data[-4:] == '.csv':
        if not os.path.exists(args.data):
            raise FileNotFoundError
        else:
            raise Exception('The data path must either be a .csv file or a folder')

    # 1. Get major changes in music
    callback.send('(1/3) Identifying significant rythm changes in music...\n This will take about a minute.')

    filterwarnings('ignore')
    boundaries, labels = msaf.process(args.input, boundaries_id='olda')

    if boundaries[-1] < 60 or boundaries[-1]>400:
        callback.send('Error : Please chose a music lasting between 60 and 400 seconds for getting a quality MV.')
        return -1
        
    callback.send('Key changes found at \n(%s) seconds\n'%' , '.join(map('{:.2f}'.format, boundaries)))

    if args.data[-4:] == '.csv':
        # 2. Find music genre and style (music video style = larger category of genre)
        musicGenre = args.genre
        musicStyle = ''
        if musicGenre == '': # No genre given, must find it

            title, artist, musicGenre, musicStyle = get_music_infos(args.input)

            if musicStyle == '':
                callback.send('GenreError : The algorithm did not manage to recognize the music genre.\n'
                                    'Please try with another music, or manually add genre with the argument --genre <name of genre> \n'
                                    'with genre in ('+','.join(AUTHORIZED_GENRES)+').')
                return -1

            callback.send('Music genre identified : %s.'%musicGenre)

        else:
            musicStyle = convert_genre_to_style(musicGenre)
            if musicStyle == '':
                callback.send('GenreError : This genre is not authorized. Please input one of the following ('+\
                ','.join(AUTHORIZED_GENRES)+') or let the algorithm find the genre.')
                return -1


        # 3. With the music genre, find appropriate videos in database
        callback.send('(2/3) Fetching matching videos in database...\n')
        
        # use k-means clustering result on scenes extracted from Music Videos with same genre and chose one resolution
        resolution = random.random()
        if resolution < RESOLUTION_PROBABILITY :
            resolution = '40'
        else:
            resolution = '16'
        clusterResult = pd.read_csv('/home/sarah/YoutubeMVGenerator/statistics/kmeans_'+resolution+'_'+musicStyle+'.csv')

    else:
        # use k-means clustering result on scenes extracted from Music Videos with same genre
        listFiles = list_scenes(args.data,'json')
        callback.send('(2/3) Generating K-Means for the database...')
        clusterResult = compute_kmeans(listFiles)

    # 4. Join music scenes while respecting the clustering and the input music rythm
    callback.send('(3/3) Building the music video around these boundaries...\n This won \'t take long.\n')

    # Select and order videos for music clip
    tempDir = tempfile.mkdtemp('_music_video_build')+'/'
    print("Building the video file in folder %s"%tempDir)
    assemble_videos(clusterResult, boundaries, tempDir)

    # Concatenate videos
    subprocess.call(['ffmpeg', '-y', '-loglevel', 'error', '-f', 'concat', '-safe', '0', '-i', 'video_structure.txt',
    '-c', 'copy', '-an', tempDir+'temp_video.MTS'])

    # Put input music on top of resulting video
    extension = os.path.splitext(args.output)[1]
    if extension != '.avi' and extension != '.mkv':
        args.output = os.path.splitext(args.output)[0]+'.mp4'
        if extension != '.mp4' :
            print('No format within (avi,mkv,mp4) given. Using default mp4 ...')

    # copies video stream and replace audio of arg 0 by arg 1
    subprocess.call(['ffmpeg', '-y', '-loglevel', 'error', '-i', tempDir+'temp_video.MTS', '-i', args.input,
    '-c:v' ,'copy', '-map', '0:v:0', '-map', '1:a:0', args.output])

    print('Video file %s written.\n'%args.output)
    callback.send('--- Finished building the music video in %f seconds. ---'%(time.time()-start))

    # Delete temp files
    shutil.rmtree(tempDir)

    # Copy video to folder generated
    if os.path.exists('generatedmvs'):
        shutil.copyfile(args.output, 'generatedmvs/'+time.strftime('%Y-%m-%d_%H-%M-%S', time.gmtime())+'.mp4')

    if callback is not None and isinstance(callback, types.GeneratorType):  # Close the generator
        callback.close()
Exemplo n.º 12
0
def line_align(songs,
               dump_dir,
               boundary_algorithm='olda',
               label_algorithm='fmc2d',
               do_twinnet=False):
    """
    Aligns given audio with lyrics by line. If dump_dir is None, no timestamp
    yml is created.

    :param songs: Song metadata in dict with keys 'song', 'artist', 'path' and \
                  'genre'. Key 'path' is audio file path. Key 'genre' optional.
    :type songs: list[dict{}] | dict{}
    :param dump_dir: Directory to store timestamp ymls.
    :type dump_dir: file-like | None
    :param boundary_algorithm: Segmentation algorithm for MSAF.
    :type boundary_algorithm: str
    :param label_algorithm: Labelling algorithm for MSAF.
    :type label_algorithm: str
    :param do_twinnet: Flag for performing vocal isolation.
    :type do_twinnet: bool
    :return align_data: List of alignment data. See below for formatting.
    :rtype: list[dict{}]
    """

    logging.info('Beginning alignment...')

    if isinstance(songs, dict):
        songs = [songs]

    # Module initializations
    snd = SND(silencedb=-15)
    sc = SyllableCounter()

    # Perform MaD TwinNet in one batch
    if do_twinnet:
        paths = [song['path'] for song in songs]
        twinnet.twinnet_process(paths)
    else:
        #logging.info('Skipping MaD TwinNet')
        print('Performing source separation using spleeter..')
        audio_path = songs[0]['path']
        destination = os.path.splitext(audio_path)[0]
        if not os.path.exists(destination):
            separator = Separator('spleeter:2stems')
            separator.separate_to_file(audio_descriptor=audio_path,
                                       destination=destination)

    total_align_data = []

    for song in songs:

        logging.info('Processing {} by {}'.format(song['song'],
                                                  song['artist']))

        start_time = time.time()

        # Get file names
        mixed_path = song['path']
        voice_path = os.path.splitext(song['path'])[0] + '_voice.wav'
        if not do_twinnet:
            voice_path = os.path.join(destination, 'vocals.wav')

        # Get lyrics from Genius
        lyrics = get_lyrics(song['song'], song['artist'])

        # Get syllable count from lyrics
        formatted_lyrics = sc.build_lyrics(lyrics)
        syl_lyrics = sc.get_syllable_count_lyrics(formatted_lyrics)
        sc_syllables = sc.get_syllable_count_per_section(syl_lyrics)

        # Get syllable count from SND
        snd_syllables = snd.run(voice_path)

        # Structural segmentation analysis on original audio
        sections, labels = msaf.process(mixed_path,
                                        boundaries_id=boundary_algorithm,
                                        labels_id=label_algorithm)

        # Save instrumental section indices
        instrumentals = []

        # Get SND counts, densities per label
        max_count = 0

        labels_density = {}
        i_s = 0
        for i, section in enumerate(zip(labels, sections[:-1], sections[1:])):
            count = 0
            while i_s < len(snd_syllables) and snd_syllables[i_s] < section[2]:
                count += 1
                i_s += 1
            max_count = max(max_count, count)

            duration = section[2] - section[1]
            density = count / duration

            # TODO: Improve instrumental categorization
            if density < 0.4:
                instrumentals.append(i)
            else:
                if section[0] not in labels_density:
                    labels_density[section[0]] = [[], []]
                labels_density[section[0]][0].append(count)
                labels_density[section[0]][1].append(density)
            # if section[0] not in labels_density:
            #     labels_density[section[0]] = [[], []]
            # labels_density[section[0]][0].append(count)
            # labels_density[section[0]][1].append(density)

        # Normalize SND syllable counts
        for label in labels_density:
            labels_density[label][0] = [
                count / max_count for count in labels_density[label][0]
            ]

        # Normalize SSA syllable counts
        gt_max_syl = max(section[1] for section in sc_syllables)
        gt_chorus_syl = mean(section[1] / gt_max_syl
                             for section in sc_syllables
                             if section[0] == 'chorus')

        # Find label most similar to chorus
        min_label = labels[0]
        min_distance = float('inf')
        for label in labels_density:
            if len(labels_density[label][0]) < 2:
                continue

            # TODO: Fix distance scales
            mean_syl = mean(labels_density[label][0])
            std_den = stdev(labels_density[label][1])
            distance = sqrt(((mean_syl - gt_chorus_syl) / gt_chorus_syl)**2 +
                            std_den**2)

            if distance < min_distance:
                min_distance = distance
                min_label = label

        # Relabel
        relabels = [''] * len(labels)

        temp = defaultdict(list)
        for i, label in enumerate(labels):
            temp[label].append(i)
        for label in temp:
            for i in temp[label]:
                if i in instrumentals:
                    continue
                elif label == min_label:
                    relabels[i] = 'chorus'
                elif len(temp[label]) > 1:
                    relabels[i] = 'verse'
                else:
                    relabels[i] = 'other'
        del temp

        relabels = [label for label in relabels if label]

        if not relabels:
            logging.error('Whole song tagged as instrumental! Skipping...')
            continue

        # Calculate accumulated error matrix
        dp = [[-1 for j in range(len(relabels))]
              for i in range(len(sc_syllables))]
        for i in range(len(sc_syllables)):
            for j in range(len(relabels)):
                dp[i][j] = dp_err_matrix[sc_syllables[i][0]][relabels[j]]
                if i == 0 and j == 0:
                    pass
                elif i == 0:
                    dp[i][j] += dp[i][j - 1]
                elif j == 0:
                    dp[i][j] += dp[i - 1][j]
                else:
                    dp[i][j] += min(dp[i - 1][j], dp[i][j - 1],
                                    dp[i - 1][j - 1])

        # Backtrack
        i, j = len(sc_syllables) - 1, len(relabels) - 1
        path = []
        while True:
            path.append((i, j))
            if (i, j) == (0, 0):
                break
            elif i == 0:
                j -= 1
            elif j == 0:
                i -= 1
            else:
                min_dir = min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1])
                if dp[i - 1][j] == min_dir:
                    i -= 1
                elif dp[i][j - 1] == min_dir:
                    j -= 1
                else:
                    i -= 1
                    j -= 1
        path.reverse()

        # Process alignment and write to file
        alignment = [[] for i in range(len(labels))]
        for i in instrumentals:
            alignment[i].append('instrumental')

        section_id = 0
        j_prev = 0
        for (i, j) in path:
            if j != j_prev:
                section_id += 1
                j_prev = j
            while 'instrumental' in alignment[section_id]:
                section_id += 1
            alignment[section_id].append(i)

        end_time = time.time()

        align_data = {
            'song': song['song'],
            'artist': song['artist'],
            'process time': end_time - start_time,
            'duration': round((sections[-1] - sections[0]).item(), 2),
            'align': []
        }

        if 'genre' in song:
            align_data['genre'] = song['genre']

        cur_lyric_section = -1
        for i, section in enumerate(alignment):
            for n, lyric_section in enumerate(section):
                if lyric_section != cur_lyric_section:
                    break_point = round((
                        sections[i] + n *
                        (sections[i + 1] - sections[i]) / len(section)).item(),
                                        2)
                    if cur_lyric_section != 'instrumental' and align_data[
                            'align']:
                        align_data['align'][-1]['end'] = break_point
                    if lyric_section != 'instrumental':
                        align_data['align'].append({
                            'label':
                            sc_syllables[lyric_section][0],
                            'syllables':
                            sc_syllables[lyric_section][1],
                            'start':
                            break_point,
                            'lines': []
                        })
                    cur_lyric_section = lyric_section

        if 'end' not in align_data['align'][-1]:
            align_data['align'][-1]['end'] = break_point

        for i, section in enumerate(align_data['align']):
            duration = section['end'] - section['start']
            line_start = section['start']
            for j, line in enumerate(formatted_lyrics[i][1]):
                line_text = ' '.join(line)
                line_syls = sum(syl_lyrics[i][1][j])
                line_duration = line_syls / align_data['align'][i][
                    'syllables'] * duration

                align_data['align'][i]['lines'].append({
                    'end': line_start + line_duration,
                    'text': line_text
                })

                line_start += line_duration

        if dump_dir is not None:
            file_name = '{}_{}.yml'.format(song['artist'],
                                           song['song']).replace(' ', '')
            file_path = os.path.join(dump_dir, file_name)

            with open(file_path, 'w') as f:
                yaml.dump(align_data, f, default_flow_style=False)

        total_align_data.append(align_data)

    return total_align_data
Exemplo n.º 13
0
#chorus_start_sec = find_and_output_chorus("audio/foo.wav", "chorus.wav", 15)

num_samples = chroma.shape[1]
time_time_similarity = TimeTimeSimilarityMatrix(chroma, sr)
time_lag_similarity = TimeLagSimilarityMatrix(chroma, sr)

#time_time_similarity.display()
print(msaf.get_all_label_algorithms())
print(msaf.get_all_boundary_algorithms())

#novelty based segmentation
#uses the foote or checkerboard kernel method of segmenting songs
#plot = True
boundaries, labels = msaf.process("audio/" + file_name,
                                  feature="mfcc",
                                  boundaries_id="foote",
                                  labels_id="fmc2d",
                                  out_sr=sr)

#audio = librosa.load(sonified_file, sr=sr)[0]

new_boundaries = []
new_labels = []
segment_nums = []
mfccs = []
idx = 0
for x in range(len(boundaries) - 1):
    if boundaries[x + 1] - boundaries[x] >= 3:
        print("segment found at {0:g} min {1:.2f} sec".format(
            boundaries[x] // 60, boundaries[x] % 60))
        segment_wav_data = song_wav_data[int(boundaries[x] *
Exemplo n.º 14
0
labels_path = root+"labels.txt"
input_audio = audio_path + "testb.wav"

config = {
    "dirichlet" :   True,
    "xmeans"    :   True,
    "k"         :   6,

    "M_gaussian"    : 16,
    "m_embedded"    : 3,
    "k_nearest"     : 0.06,
    "Mp_adaptive"   : 24,
    "offset_thres"  : 0.04

}

est_times, est_labels = msaf.process(input_audio, feature="hpcp", boundaries_id="sf", labels_id="fmc2d",config=config)

print 'writing data'

labels_file = open(labels_path,"w")

data = zip(est_times,est_labels)

names = [random.choice(words) for i in range(len(est_labels))]

for i in data:
	labels_file.write("%s %s\n" % (i[0],names[int(i[-1])]))

labels_file.close()


#
#
# Use MSAF to analyze structure of a song.
#
#
import msaf
msaf.config.dataset.audio_dir = "."
import librosa
beat = tracker.Beat()
beat.load_metadata('track_info')
trackids = beat.track_info.trackid
collection = data_dir + "/audio/tmp_wav_set/"
results = msaf.process(collection, n_jobs=1, boundaries_id="foote", feature='pcp')



trackid=6
track_audiopath = beat.audio_orig_dir + beat.track_info.loc[beat.track_info.trackid==trackid]["filename_track"].iloc[0]+".wav"
boundaries, labels = msaf.process(track_audiopath)


sonified_file = "my_boundaries.wav"
sr = 44100
boundaries, labels = msaf.process(track_audiopath, sonify_bounds=True, 
                                  out_bounds=sonified_file, out_sr=sr)

melids = np.unique(beat.sections.melid.loc[beat.sections.trackid==trackid])
Exemplo n.º 16
0
def extract_segments(song_name):
    file_format = ".wav"
    audio_file = song_name + file_format
    # 2. Segment the file using the default MSAF parameters (this might take a few seconds)
    boundaries, labels = msaf.process(audio_file,
                                      boundaries_id="olda",
                                      labels_id="scluster")
    labels = [int(i) for i in labels]
    segCount = 0
    currIndex = 0
    firstSegLen = 0
    usedLabels = [0] * 5

    while (firstSegLen < 5):
        lbound = boundaries[currIndex]
        rbound = boundaries[currIndex + 1]
        segLen = rbound - lbound
        firstSegLen += segLen
        firstRt = rbound
        currIndex += 1
        usedLabels[labels[currIndex]] = 1

    segmentList = []
    segmentList.append((0, firstRt))
    while (currIndex < len(labels)):
        if (usedLabels[labels[currIndex]] == 0):
            lbound = boundaries[currIndex]
            rbound = boundaries[currIndex + 1]
            segLen = rbound - lbound
            if (segLen > 15):
                segmentList.append((lbound, rbound))
                currIndex += 1
            else:
                if (currIndex != len(labels) - 1):
                    if (labels[currIndex] == labels[currIndex + 1]):
                        rbound = boundaries[currIndex + 2]
                        segLen = rbound - lbound
                        if (segLen > 15):
                            segmentList.append((lbound, rbound))
                            currIndex += 2
        currIndex += 1

    y, sr = librosa.load(audio_file)
    tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
    beatsList = librosa.frames_to_time(beats, sr=sr)

    fourBeatsList = beatsList[3::4]
    beatDist = fourBeatsList[1] - fourBeatsList[0]
    for i in range(100):
        fourBeatsList = np.append(fourBeatsList, fourBeatsList[-1] + beatDist)

    sound = AudioSegment.from_file(audio_file)
    totalLen = 0
    for i in range(len(segmentList)):
        totalLen += segmentList[i][1] - segmentList[i][0]
    songLen = len(sound) / 1000.0
    if (totalLen <= 35):
        segmentList.append((songLen - 23, songLen - 3))

    def closest(lst, K):
        return lst[min(range(len(lst)), key=lambda i: abs(lst[i] - K))]

    newSegmentList = []
    for i in range(len(segmentList)):
        start = segmentList[i][0]
        end = segmentList[i][1]
        newStart = closest(fourBeatsList, start)
        newEnd = closest(fourBeatsList, end)
        if (i == 0):
            newStart = 0
        newSegmentList.append((newStart, newEnd))

    mixedFile = sound[newSegmentList[0][0] * 1000:newSegmentList[0][1] * 1000]
    finalSegmentList = []
    totalTime = newSegmentList[0][1] - newSegmentList[0][0]
    finalSegmentList.append(newSegmentList[0])
    currIter = 1
    while (totalTime <= 55):
        if (currIter == len(newSegmentList)):
            break
        totalTime += newSegmentList[currIter][1] - newSegmentList[currIter][0]
        finalSegmentList.append(newSegmentList[currIter])
        currSound = sound[newSegmentList[currIter][0] *
                          1000:newSegmentList[currIter][1] * 1000]
        mixedFile = mixedFile.append(currSound, crossfade=beatDist * 1000)
        currIter += 1

    lastStart = len(sound) / 1000.0 - 5
    lastEnd = len(sound) / 1000.0
    lastStart = closest(fourBeatsList, lastStart)
    finalSegmentList.append((lastStart, lastEnd))
    lastEnd = closest(fourBeatsList, lastEnd)
    lastSound = sound[lastStart * 1000:]
    mixedFile = mixedFile.append(lastSound, crossfade=beatDist * 1000)
    mixedFile.export(song_name + '_segmented.mp3', format="mp3")
    print("Final Audio Segments Obtained: ")
    print(finalSegmentList)
    return (mixedFile, finalSegmentList, beatDist)
Exemplo n.º 17
0
def segment(fileLocation, toLocation,songLocation, songToLocation):
    print("Processing instrumental list...")
    for path, dir, files in os.walk(fileLocation):
        for filename in files:
            if filename.endswith(".mp3"):
                sound = AudioSegment.from_mp3(os.path.join(path, filename))
                filename = os.path.splitext(filename)[0]
                filename = filename+".wav"
                sound.export(os.path.join(toLocation, filename), format="wav")

            if not filename.endswith(".wav"):
                print ("Please check your audio file type: " + filename)
                continue

            audio_file = os.path.join(path, filename)
            song = AudioSegment.from_wav(audio_file)
            print ('Segment ' + audio_file)

            # Segment the file using default MSAF parameters
            boundaries, labels = msaf.process(audio_file)
            print(boundaries)
            songBoundary[filename[:filename.rfind('(inst')]] = boundaries
            '''
                Using unit in milliseconds(ten_seconds = 10 * 1000)
                first_10_seconds = song[:ten_seconds]
                last_5_seconds = song[-5000:]
            '''
            segments = list()
            boundaries *= 1000
            buff = 2500
            for index in xrange(1,len(boundaries)):
                if index == 1 or index == len(boundaries)-2 : continue
                elif index == 2 or index == len(boundaries)-1 :
                    segments.append(song[max(0, boundaries[index-2]-buff)
                                         : min(boundaries[len(boundaries)-1], boundaries[index]+buff)])
                else:
                    segments.append(song[boundaries[index-1]-buff:boundaries[index]+buff])

            for index in xrange(len(segments)):
                output = filename[:filename.rfind('.')] + '_' + str(index+1) + filename[filename.rfind('.'):]
                out_format = filename.split('.')[-1]
                segments[index].export(os.path.join(toLocation, output), format = out_format)

    print("Processing vocal list...")
    for path, dir, files in os.walk(songLocation):
        for filename in files:
            if filename.endswith(".mp3"):
                sound = AudioSegment.from_mp3(os.path.join(path, filename))
                filename = os.path.splitext(filename)[0]
                filename = filename+".wav"
                sound.export(os.path.join(songToLocation, filename), format="wav")

            if not filename.endswith(".wav"):
                print ("Please check your audio file type: " + filename)
                continue

            audio_file = os.path.join(path, filename)
            song = AudioSegment.from_wav(audio_file)
            print ('Segment ' + audio_file)

            segments = list()
            if filename.rfind('(vocal') != -1:
                boundaries = songBoundary[filename[:filename.rfind('(vocal')]]
            else:
                boundaries = songBoundary[filename[:filename.rfind('.')]]
            print (boundaries)

            for index in xrange(1,len(boundaries)):
                if index == 1 or index == len(boundaries)-2 : continue
                elif index == 2 or index == len(boundaries)-1 :
                    segments.append(song[max(0, boundaries[index-2]-buff)
                                         : min(boundaries[len(boundaries)-1], boundaries[index]+buff)])
                else:
                    segments.append(song[boundaries[index-1]-buff:boundaries[index]+buff])

            for index in xrange(len(segments)):
                output = filename[:filename.rfind('.')] + '_' + str(index+1) + filename[filename.rfind('.'):]
                out_format = filename.split('.')[-1]
                segments[index].export(os.path.join(songToLocation, output), format = out_format)