Exemplo n.º 1
0
def update_instru_mapping(folder_path, instru_mapping, quantization):
    if not os.path.isdir(folder_path):
        return instru_mapping

    # Is there an original piano score or do we have to create it ?
    num_music_file = max(len(glob.glob(folder_path + '/*.mid')),
                         len(glob.glob(folder_path + '/*.xml')))
    if num_music_file == 2:
        is_piano = True
    elif num_music_file == 1:
        is_piano = False
    else:
        raise Exception("CAVAVAVAMAVAL")

    # Read pr
    if is_piano:
        pr_piano, _, _, instru_piano, _, pr_orch, _, _, instru_orch, _, duration =\
         build_data_aux.process_folder(folder_path, quantization, binary_piano, binary_orch, temporal_granularity, gapopen=3, gapextend=1)
    else:
        try:
            pr_piano, _, _, instru_piano, _, pr_orch, _, _, instru_orch, _, duration =\
             build_data_aux_no_piano.process_folder_NP(folder_path, quantization, binary_piano, binary_orch, temporal_granularity)
        except:
            duration = None
            logging.warning("Could not read file in " + folder_path)

    if duration is None:
        # Files that could not be aligned
        return instru_mapping

    # Modify the mapping from instrument to indices in pianorolls and pitch bounds
    instru_mapping = build_data_aux.instru_pitch_range(
        instrumentation=instru_piano,
        pr=pr_piano,
        instru_mapping=instru_mapping,
    )
    # remark : instru_mapping would be modified if it is only passed to the function,
    #                   f(a)  where a is modified inside the function
    # but i prefer to make the reallocation explicit
    #                   a = f(a) with f returning the modified value of a.
    # Does it change anything for computation speed ? (Python pass by reference,
    # but a slightly different version of it, not clear to me)
    instru_mapping = build_data_aux.instru_pitch_range(
        instrumentation=instru_orch,
        pr=pr_orch,
        instru_mapping=instru_mapping,
    )

    return instru_mapping
Exemplo n.º 2
0
def check_orchestration_alignment(path_db, subfolder_names, temporal_granularity, quantization, unit_type, gapopen, gapextend):

    output_dir = 'DEBUG/' +\
                 'Grid_search_database_alignment/' + str(quantization) +\
                 '_' + temporal_granularity +\
                 '_' + unit_type +\
                 '_' + str(gapopen) +\
                 '_' + str(gapextend)

    if temporal_granularity == "event_level":
        quantization_write = 1
    else:
        quantization_write = quantization

    counter = 0
    sum_score = 0
    nbFrame = 0
    nbId = 0
    nbDiffs = 0

    for sub_db in subfolder_names:
        print '#' * 30
        print sub_db

        sub_db_path = path_db + '/' + sub_db
        if not os.path.isdir(sub_db_path):
            continue

        for folder_name in os.listdir(sub_db_path):
            print '# ' + sub_db + ' : ' + folder_name
            folder_path = sub_db_path + '/' + folder_name
            if not os.path.isdir(folder_path):
                continue

            # Skip already computed folders
            save_folder_name = output_dir +\
                '/' + sub_db + '_' + folder_name
            if os.path.isdir(save_folder_name):
                continue

            pr_piano_no_map, _, _, _, _, pr_orchestra_no_map, _, _, instru_orch, _, duration =\
                build_data_aux.process_folder(folder_path, quantization, binary_piano, binary_orch, temporal_granularity, gapopen, gapextend)

            # Apply the mapping
            pr_piano = {}
            pr_orchestra = {}
            for k, v in pr_piano_no_map.iteritems():
                if 'Piano' in pr_piano:
                    pr_piano['Piano'] = np.maximum(pr_piano['Piano'], v)
                else:
                    pr_piano['Piano'] = v

            for k, v in pr_orchestra_no_map.iteritems():
                # unmix instrus
                new_k = instru_orch[k.rstrip('\x00')]
                instru_names = build_data_aux.unmixed_instru(new_k)
                for instru_name in instru_names:
                    if instru_name in pr_orchestra:
                        pr_orchestra[instru_name] = np.maximum(pr_orchestra[instru_name], v)
                    else:
                        pr_orchestra[instru_name] = v

            # Sum all instrument
            piano_aligned = sum_along_instru_dim(pr_piano)
            orchestra_aligned = sum_along_instru_dim(pr_orchestra)
            OOO_aligned = np.zeros((duration, 30), dtype=np.int16)
            CCC_aligned = np.concatenate((piano_aligned, OOO_aligned, orchestra_aligned), axis=1)

            # Update statistics
            # nbFrame += duration
            # sum_score += this_sum_score
            # nbId += this_nbId
            # nbDiffs += this_nbDiffs

            # counter = counter + 1

            # Save every 100 example
            # if counter % 10 == 0:
            #     import pdb; pdb.set_trace()

            if not os.path.isdir(save_folder_name):
                os.makedirs(save_folder_name)

            visualize_mat(CCC_aligned, save_folder_name, 'aligned')
            # write_midi(pr={'piano1': sum_along_instru_dim(pr0)}, quantization=quantization, write_path=save_folder_name + '/0.mid', tempo=80)
            # write_midi(pr={'piano1': sum_along_instru_dim(pr1)}, quantization=quantization, write_path=save_folder_name + '/1.mid', tempo=80)
            write_midi(pr=pr_piano, quantization=quantization_write, write_path=save_folder_name + '/0.mid', tempo=80)
            write_midi(pr=pr_orchestra, quantization=quantization_write, write_path=save_folder_name + '/1.mid', tempo=80)
            write_midi(pr={'Piano': piano_aligned, 'Violin': orchestra_aligned}, quantization=quantization_write, write_path=save_folder_name + '/both_aligned.mid', tempo=80)
Exemplo n.º 3
0
def build_split_matrices(folder_paths, destination_folder, chunk_size,
                         instru_mapping, N_piano, N_orchestra, embedding_model,
                         binary_piano, binary_orch):
    file_counter = 0
    train_only_files = {}
    train_and_valid_files = {}

    for folder_path in folder_paths:
        ###############################
        # Read file
        folder_path = folder_path.rstrip()
        logging.info(" : " + folder_path)
        if not os.path.isdir(folder_path):
            continue

        if folder_path in avoid_tracks.no_valid_tracks():
            train_only_files[folder_path] = []
        else:
            train_and_valid_files[folder_path] = []

        # Is there an original piano score or do we have to create it ?
        num_music_file = max(len(glob.glob(folder_path + '/*.mid')),
                             len(glob.glob(folder_path + '/*.xml')))
        if num_music_file == 2:
            is_piano = True
        elif num_music_file == 1:
            is_piano = False
        else:
            raise Exception("CAVAVAVAMAVAL")

        # Get pr, warped and duration
        if is_piano:
            new_pr_piano, _, new_duration_piano, _, new_name_piano, new_pr_orchestra, _, new_duration_orch, new_instru_orchestra, _, duration\
             = build_data_aux.process_folder(folder_path, quantization, binary_piano, binary_orch, temporal_granularity, gapopen=3, gapextend=1)
        else:
            try:
                new_pr_piano, _, new_duration_piano, _, new_name_piano, new_pr_orchestra, _, new_duration_orch, new_instru_orchestra, _, duration\
                 = build_data_aux_no_piano.process_folder_NP(folder_path, quantization, binary_piano, binary_orch, temporal_granularity)
            except:
                logging.warning("Could not read file in " + folder_path)
                continue

        # Skip shitty files
        if new_pr_piano is None:
            # It's definitely not a match...
            # Check for the files : are they really a piano score and its orchestration ??
            with (open('log_build_db.txt', 'a')) as f:
                f.write(folder_path + '\n')
            continue

        pr_orch = build_data_aux.cast_small_pr_into_big_pr(
            new_pr_orchestra, new_instru_orchestra, 0, duration,
            instru_mapping, np.zeros((duration, N_orchestra)))
        pr_piano = build_data_aux.cast_small_pr_into_big_pr(
            new_pr_piano, {}, 0, duration, instru_mapping,
            np.zeros((duration, N_piano)))
        ###############################

        ###############################
        # Embed piano
        piano_embedded = []
        len_piano = len(pr_piano)
        batch_size = 500  # forced to batch for memory issues
        start_batch_index = 0
        while start_batch_index < len_piano:
            end_batch_index = min(start_batch_index + batch_size, len_piano)
            this_batch_size = end_batch_index - start_batch_index
            piano_resize_emb = np.zeros(
                (this_batch_size, 1,
                 128))  # Embeddings accetp size 128 samples
            piano_resize_emb[:, 0, instru_mapping['Piano']['pitch_min']:
                             instru_mapping['Piano']['pitch_max']] = pr_piano[
                                 start_batch_index:end_batch_index]
            piano_resize_emb_TT = torch.tensor(piano_resize_emb)
            if cuda_gpu:
                piano_resize_emb_TT = piano_resize_emb_TT.cuda()
            piano_embedded_TT = embedding_model(piano_resize_emb_TT.float(), 0)
            if cuda_gpu:
                piano_embedded.append(piano_embedded_TT.cpu().numpy())
            else:
                piano_embedded.append(piano_embedded_TT.numpy())
            start_batch_index += batch_size
        piano_embedded = np.concatenate(piano_embedded)
        ###############################

        ###############################
        # Split
        last_index = pr_piano.shape[0]
        start_indices = range(0, pr_piano.shape[0], chunk_size)

        for split_counter, start_index in enumerate(start_indices):
            this_split_folder = destination_folder + '/' + str(
                file_counter) + '_' + str(split_counter)
            os.mkdir(this_split_folder)
            end_index = min(start_index + chunk_size, last_index)

            section = pr_piano[start_index:end_index]
            section_cast = section.astype(np.float32)
            np.save(this_split_folder + '/pr_piano.npy', section_cast)

            section = piano_embedded[start_index:end_index]
            section_cast = section.astype(np.float32)
            np.save(this_split_folder + '/pr_piano_embedded.npy', section_cast)

            section = pr_orch[start_index:end_index]
            section_cast = section.astype(np.float32)
            np.save(this_split_folder + '/pr_orch.npy', section_cast)

            section = new_duration_piano[start_index:end_index]
            section_cast = np.asarray(section, dtype=np.int8)
            np.save(this_split_folder + '/duration_piano.npy', section_cast)

            section = new_duration_orch[start_index:end_index]
            section_cast = np.asarray(section, dtype=np.int8)
            np.save(this_split_folder + '/duration_orch.npy', section_cast)

            # Keep track of splits
            if folder_path in avoid_tracks.no_valid_tracks():
                train_only_files[folder_path].append(this_split_folder)
            else:
                train_and_valid_files[folder_path].append(this_split_folder)

        file_counter += 1
        ###############################

    return train_and_valid_files, train_only_files
def build_split_matrices(folder_paths, destination_folder, chunk_size,
                         instru_mapping, N_piano, N_orchestra, embedding_model,
                         binary_piano, binary_orch, build_embedding,
                         max_number_note_played):
    file_counter = 0
    train_only_files = {}
    train_and_valid_files = {}

    for folder_path in folder_paths:
        ###############################
        # Read file
        folder_path = folder_path.rstrip()
        logging.info(" : " + folder_path)
        if not os.path.isdir(folder_path):
            continue

        folder_path_split = re.split("/", folder_path)
        folder_path_relative = folder_path_split[-2] + "/" + folder_path_split[
            -1]
        if folder_path_relative in avoid_tracks.no_valid_tracks():
            train_only_files[folder_path_relative] = []
        else:
            train_and_valid_files[folder_path_relative] = []

        # Is there an original piano score or do we have to create it ?
        num_music_file = max(len(glob.glob(folder_path + '/*.mid')),
                             len(glob.glob(folder_path + '/*.xml')))
        if num_music_file == 2:
            is_piano = True
        elif num_music_file == 1:
            is_piano = False
        else:
            raise Exception("CAVAVAVAMAVAL")

        # Get pr, warped and duration
        if is_piano:
            new_pr_piano, _, new_duration_piano, new_name_piano, new_pr_orchestra, _, new_duration_orch, new_name_orchestra, duration\
             = build_data_aux.process_folder(folder_path, quantization, binary_piano, binary_orch, temporal_granularity, gapopen=3, gapextend=1, align_bool=True)
        else:
            try:
                new_pr_piano, _, new_duration_piano, _, new_name_piano, new_pr_orchestra, _, new_duration_orch, new_name_orchestra, duration\
                 = build_data_aux_no_piano.process_folder_NP(folder_path, quantization, binary_piano, binary_orch, temporal_granularity)
            except:
                logging.warning("Could not read file in " + folder_path)
                continue

        # Skip shitty files
        if new_pr_piano is None:
            # It's definitely not a match...
            # Check for the files : are they really a piano score and its orchestration ??
            with (open('log_build_db.txt', 'a')) as f:
                f.write(folder_path + '\n')
            continue

        ########################################################################################################################
        ########################################################################################################################
        #				D   E   B   U   G
        ##############################
        ##############################
        # Test for aligned event Piano/Orch
        ##############################
        # Instru mapping
        # matrix_orch = build_data_aux.cast_small_pr_into_big_pr(new_pr_orchestra, 0, duration, instru_mapping, np.zeros((duration, N_orchestra)))
        # matrix_piano = build_data_aux.cast_small_pr_into_big_pr(new_pr_piano, 0, duration, instru_mapping, np.zeros((duration, N_piano)))
        # ##############################
        # # Reconstruct rhythm
        # orch_reconstruction = reconstruct_pr.instrument_reconstruction(matrix_orch, instru_mapping)
        # piano_reconstruction = reconstruct_pr.instrument_reconstruction_piano(matrix_piano, instru_mapping)
        # ##############################
        # # Write midi
        # write_midi({k: v*90 for k,v in orch_reconstruction.items()}, ticks_per_beat=1, write_path="../DEBUG/orch_reconstruction.mid", articulation=None)
        # write_midi({k: v*90 for k,v in piano_reconstruction.items() if (v.sum()>0)}, ticks_per_beat=1, write_path="../DEBUG/piano_reconstruction.mid", articulation=None)
        # import pdb; pdb.set_trace()

        ##############################
        # Detect short silence = only zeros and duration < double croche (=8)
        # def silence_short(pr, duration):
        # 	mat = sum_along_instru_dim(pr)
        # 	flat = mat.sum(axis=1)
        # 	silence_ind = []
        # 	for e in range(len(flat)):
        # 		if (flat[e] == 0) and (duration[e] < 8):
        # 			silence_ind.append(e)
        # 	return silence_ind
        # import pdb; pdb.set_trace()
        # silence_short_piano = silence_short(new_pr_piano, new_duration_piano)
        # silence_short_orch = silence_short(new_pr_orchestra, new_duration_orch)

        ##############################
        # Detect misaligned silences
        # def silence_misaligned(pr_orch, pr_piano):
        # 	mat_orch = sum_along_instru_dim(pr_orch)
        # 	mat_piano = sum_along_instru_dim(pr_piano)
        # 	flat_orch = mat.sum(axis=1)
        # 	flat_piano = mat.sum(axis=1)
        # 	silence_piano_not_orch = []
        # 	silence_orch_not_piano = []
        # 	for e in range(len(flat_orch)):
        # 		if (flat_piano[e] == 0) and (flat_orch[e] != 0):
        # 			silence_piano_not_orch.append(e)
        # 		if (flat_orch[e] == 0) and (flat_piano[e] != 0):
        # 			silence_orch_not_piano.append(e)
        # 	return silence_piano_not_orch, silence_orch_not_piano
        # silence_piano_not_orch, silence_orch_not_piano = silence_misaligned(new_pr_orchestra, new_pr_piano)

        ##############################
        # Detect out of tessitura frames
        ########################################################################################################################
        ########################################################################################################################

        ########################################################################################################################
        ########################################################################################################################
        # for track_name, pr_instru in new_pr_orchestra.items():
        # 	pitch_min = instru_mapping[track_name]['pitch_min']
        # 	pitch_max = instru_mapping[track_name]['pitch_max']
        # 	pr_min = pr_instru[:, :pitch_min]
        # 	pr_max = pr_instru[:, pitch_max:]
        # 	notes_out = (pr_min>0).sum() + (pr_max>0).sum()
        # 	if (pr_min.sum() > 0) or (pr_max.sum() > 0):
        # 		with open('temp.txt', 'a') as ff:
        # 			ff.write(folder_path + ' : ' + track_name + ' : ' + str(notes_out) + '\n')
        ########################################################################################################################
        ########################################################################################################################

        pr_orch = build_data_aux.cast_small_pr_into_big_pr(
            new_pr_orchestra, 0, duration, instru_mapping,
            np.zeros((duration, N_orchestra)))
        pr_piano = build_data_aux.cast_small_pr_into_big_pr(
            new_pr_piano, 0, duration, instru_mapping,
            np.zeros((duration, N_piano)))

        ##############################
        # Bonus: write aligned midi files in an external folder
        # Reconstruct rhythm
        # pr_orchestra_I = reconstruct_pr.instrument_reconstruction(pr_orch, instru_mapping)
        # pr_piano_I = reconstruct_pr.instrument_reconstruction_piano(pr_piano, instru_mapping)
        # # Write midi
        # target_folder = re.sub("LOP_database_mxml_clean", "LOP_database_event_aligned", folder_path)
        # os.makedirs(target_folder)
        # target_piano = re.sub("LOP_database_mxml_clean", "LOP_database_event_aligned", new_name_piano) + '.mid'
        # target_orchestra = re.sub("LOP_database_mxml_clean", "LOP_database_event_aligned", new_name_orchestra) + '.mid'
        # write_midi({k: v*90 for k,v in pr_piano_I.items()}, ticks_per_beat=1, write_path=target_piano, articulation=None)
        # write_midi({k: v*90 for k,v in pr_orchestra_I.items() if (v.sum()>0)}, ticks_per_beat=1, write_path=target_orchestra, articulation=None)
        ###############################

        ###############################
        # Embed piano
        if build_embedding:
            piano_embedded = []
            len_piano = len(pr_piano)
            batch_size = 500  # forced to batch for memory issues
            start_batch_index = 0
            while start_batch_index < len_piano:
                end_batch_index = min(start_batch_index + batch_size,
                                      len_piano)
                this_batch_size = end_batch_index - start_batch_index
                piano_resize_emb = np.zeros(
                    (this_batch_size, 1,
                     128))  # Embeddings accetp size 128 samples
                piano_resize_emb[:, 0, instru_mapping['Piano']['pitch_min']:
                                 instru_mapping['Piano']
                                 ['pitch_max']] = pr_piano[
                                     start_batch_index:end_batch_index]
                piano_resize_emb_TT = torch.tensor(piano_resize_emb)
                if cuda_gpu:
                    piano_resize_emb_TT = piano_resize_emb_TT.cuda()
                piano_embedded_TT = embedding_model(
                    piano_resize_emb_TT.float(), 0)
                if cuda_gpu:
                    piano_embedded.append(piano_embedded_TT.cpu().numpy())
                else:
                    piano_embedded.append(piano_embedded_TT.numpy())
                start_batch_index += batch_size
            piano_embedded = np.concatenate(piano_embedded)
        ###############################

        ##############################
        # Update the max number of notes played in the orchestral score
        this_max_num_notes = int(np.max(np.sum(pr_orch, axis=1)))
        max_number_note_played = max(max_number_note_played,
                                     this_max_num_notes)
        ##############################

        ###############################
        # Split
        last_index = pr_piano.shape[0]
        start_indices = range(0, pr_piano.shape[0], chunk_size)

        for split_counter, start_index in enumerate(start_indices):
            this_split_folder = destination_folder + '/' + str(
                file_counter) + '_' + str(split_counter)
            os.mkdir(this_split_folder)
            end_index = min(start_index + chunk_size, last_index)

            section = pr_piano[start_index:end_index]
            section_cast = section.astype(np.float32)
            np.save(this_split_folder + '/pr_piano.npy', section_cast)

            if build_embedding:
                section = piano_embedded[start_index:end_index]
                section_cast = section.astype(np.float32)
                np.save(this_split_folder + '/pr_piano_embedded.npy',
                        section_cast)

            section = pr_orch[start_index:end_index]
            section_cast = section.astype(np.float32)
            np.save(this_split_folder + '/pr_orch.npy', section_cast)

            if new_duration_piano:
                section = new_duration_piano[start_index:end_index]
            else:
                section = np.ones((chunk_size, ))
            section_cast = np.asarray(section, dtype=np.int8)
            np.save(this_split_folder + '/duration_piano.npy', section_cast)

            if new_duration_orch:
                section = new_duration_orch[start_index:end_index]
            else:
                section = np.ones((chunk_size, ))
            section_cast = np.asarray(section, dtype=np.int8)
            np.save(this_split_folder + '/duration_orch.npy', section_cast)

            # Keep track of splits
            split_path = re.split("/", this_split_folder)
            this_split_folder_relative = split_path[-2] + "/" + split_path[-1]
            if folder_path_relative in avoid_tracks.no_valid_tracks():
                train_only_files[folder_path_relative].append(
                    this_split_folder_relative)
            else:
                train_and_valid_files[folder_path_relative].append(
                    this_split_folder_relative)

        file_counter += 1
        ###############################

    return train_and_valid_files, train_only_files, max_number_note_played