Exemplo n.º 1
0
def get_harmony_mapping(all_harmonies, chord_cutoff=64):
    """
    all_harmonies: all harmonies of the songs; dim: [num_songs * T], element are chord strings;
    chord_cutoff: if chords are seen less than this cutoff, they are ignored and marked as
                  as rests in the resulting dataset;
    return: harmony mapping;
    """
    chords = {}
    for harmony in all_harmonies:
        for h in harmony:
            if h not in chords:
                chords[h] = 1
            else:
                chords[h] += 1
    chords = {c: i for c, i in chords.iteritems() if chords[c] >= chord_cutoff}
    if NO_CHORD not in chords.keys():
        chords[NO_CHORD] = 1  # make sure NO_CHORD is in the map
    chord_to_idx = {c: i for i, c in enumerate(chords.keys())}
    idx_to_chord = {i: c for i, c in enumerate(chords.keys())}
    return chord_to_idx, idx_to_chord
Exemplo n.º 2
0
def prepare_nottingham_pickle(time_step,
                              chord_cutoff=64,
                              filename=PICKLE_LOC,
                              verbose=False):
    """
    time_step: the time step to discretize all notes into
    chord_cutoff: if chords are seen less than this cutoff, they are ignored and marked as
                  as rests in the resulting dataset
    filename: the location where the pickle will be saved to
    """

    data = {}
    store = {}
    chords = {}
    max_seq = 0
    seq_lens = []

    for d in ["train", "test", "valid"]:
        parsing_msg = "Parsing {}...".format(d)
        print(parsing_msg)
        parsed = parse_nottingham_directory("data/Nottingham/{}".format(d),
                                            time_step,
                                            verbose=False)
        metadata = [s[0] for s in parsed]
        seqs = [s[1] for s in parsed]
        data[d] = seqs
        data[d + '_metadata'] = metadata
        lens = [len(s[1]) for s in seqs]
        seq_lens += lens
        max_seq = max(max_seq, max(lens))

        for _, harmony in seqs:
            for h in harmony:
                if h not in chords:
                    chords[h] = 1
                else:
                    chords[h] += 1

    avg_seq = float(sum(seq_lens)) / len(seq_lens)

    chords = {c: i for c, i in chords.iteritems() if chords[c] >= chord_cutoff}
    chord_mapping = {c: i for i, c in enumerate(chords.keys())}
    num_chords = len(chord_mapping)
    store['chord_to_idx'] = chord_mapping
    if verbose:
        pprint(chords)
        print("Number of chords: {}".format(num_chords))
        print("Max Sequence length: {}".format(max_seq))
        print("Avg Sequence length: {}".format(avg_seq))
        print("Num Sequences: {}".format(len(seq_lens)))

    def combine(melody, harmony):
        full = np.zeros(
            (melody.shape[0], NOTTINGHAM_MELODY_RANGE + num_chords))

        assert melody.shape[0] == len(harmony)

        # for all melody sequences that don't have any notes, add the empty melody marker (last one)
        for i in range(melody.shape[0]):
            if np.count_nonzero(melody[i, :]) == 0:
                melody[i, NOTTINGHAM_MELODY_RANGE - 1] = 1

        # all melody encodings should now have exactly one 1
        for i in range(melody.shape[0]):
            assert np.count_nonzero(melody[i, :]) == 1

        # add all the melodies
        full[:, :melody.shape[1]] += melody

        harmony_idxs = [ chord_mapping[h] if h in chord_mapping else chord_mapping[NO_CHORD] \
                         for h in harmony ]
        harmony_idxs = [NOTTINGHAM_MELODY_RANGE + h for h in harmony_idxs]
        full[np.arange(len(harmony)), harmony_idxs] = 1

        # all full encodings should have exactly two 1's
        for i in range(full.shape[0]):
            assert np.count_nonzero(full[i, :]) == 2

        return full

    for d in ["train", "test", "valid"]:
        print("Combining {}".format(d))
        store[d] = [combine(m, h) for m, h in data[d]]
        store[d + '_metadata'] = data[d + '_metadata']

    with open(filename, 'w') as f:
        cPickle.dump(store, f, protocol=-1)

    return True
Exemplo n.º 3
0
                if c not in chords:
                    chords[c] = 1
                else:
                    chords[c] += 1

    #Calculate average length, which may be used for identifying batch timestep length
    avg_seq = float(sum(sequenceLength)) / len(sequenceLength)

    #Prepare chord index for harmony one hot vector
    chordLimit = 64
    # calculate chords and frequences
    chords = {
        chord: ind
        for chord, ind in chords.iteritems() if chords[chord] >= chordLimit
    }
    chordMap = {chord: ind for ind, chord in enumerate(chords.keys())}
    numChords = len(chordMap)

    # append chord index to final
    final['chordIx'] = chordMap

    #plot the chord distribution chart
    pprint(chords)
    plt.figure(figsize=(10, 4))
    plt.bar(range(len(chords)), chords.values())
    plt.xticks(range(len(chords)), chords.keys())
    plt.show()

    #print sequence information
    print "Total sequences parsed:      {}".format(len(sequenceLength))
    print "Maximum length of sequences: {}".format(sequenceMax)
Exemplo n.º 4
0
def prepare_nottingham_pickle(time_step, chord_cutoff=64, filename=PICKLE_LOC, verbose=False):
    """
    time_step: the time step to discretize all notes into
    chord_cutoff: if chords are seen less than this cutoff, they are ignored and marked as
                  as rests in the resulting dataset
    filename: the location where the pickle will be saved to
    """

    data = {}
    store = {}
    chords = {}
    max_seq = 0
    seq_lens = []
    
    for d in ["train", "test", "valid"]:
        print "Parsing {}...".format(d)
        parsed = parse_nottingham_directory("data/Nottingham/{}".format(d), time_step, verbose=False)
        metadata = [s[0] for s in parsed]
        seqs = [s[1] for s in parsed]
        data[d] = seqs
        data[d + '_metadata'] = metadata
        lens = [len(s[1]) for s in seqs]
        seq_lens += lens
        max_seq = max(max_seq, max(lens))
        
        for _, harmony in seqs:
            for h in harmony:
                if h not in chords:
                    chords[h] = 1
                else:
                    chords[h] += 1

    avg_seq = float(sum(seq_lens)) / len(seq_lens)

    chords = { c: i for c, i in chords.iteritems() if chords[c] >= chord_cutoff }
    chord_mapping = { c: i for i, c in enumerate(chords.keys()) }
    num_chords = len(chord_mapping)
    store['chord_to_idx'] = chord_mapping
    if verbose:
        pprint(chords)
        print "Number of chords: {}".format(num_chords)
        print "Max Sequence length: {}".format(max_seq)
        print "Avg Sequence length: {}".format(avg_seq)
        print "Num Sequences: {}".format(len(seq_lens))

    def combine(melody, harmony):
        full = np.zeros((melody.shape[0], NOTTINGHAM_MELODY_RANGE + num_chords))

        assert melody.shape[0] == len(harmony)

        # for all melody sequences that don't have any notes, add the empty melody marker (last one)
        for i in range(melody.shape[0]):
            if np.count_nonzero(melody[i, :]) == 0:
                melody[i, NOTTINGHAM_MELODY_RANGE-1] = 1

        # all melody encodings should now have exactly one 1
        for i in range(melody.shape[0]):
            assert np.count_nonzero(melody[i, :]) == 1

        # add all the melodies
        full[:, :melody.shape[1]] += melody

        harmony_idxs = [ chord_mapping[h] if h in chord_mapping else chord_mapping[NO_CHORD] \
                         for h in harmony ]
        harmony_idxs = [ NOTTINGHAM_MELODY_RANGE + h for h in harmony_idxs ]
        full[np.arange(len(harmony)), harmony_idxs] = 1

        # all full encodings should have exactly two 1's
        for i in range(full.shape[0]):
            assert np.count_nonzero(full[i, :]) == 2

        return full

    for d in ["train", "test", "valid"]:
        print "Combining {}".format(d)
        store[d] = [ combine(m, h) for m, h in data[d] ]
        store[d + '_metadata'] = data[d + '_metadata']

    with open(filename, 'w') as f:
        cPickle.dump(store, f, protocol=-1)

    return True
Exemplo n.º 5
0
#                if rem > 0:
#                    padStep = []
#                    for _ in range(rem):
#                        padStep += [[0,0,0,0]]
#                    tStepFinal += [padStep+ tStepVec]
#                else:
#                    tStepFinal += ([tStepVec])
#            data[d + '_countFB'] = tStepFinal

                    
    #Calculate average length, which may be used for identifying batch timestep length 
    avg_seq = float(sum(seq_lens)) / len(seq_lens)
        
    #Prepare chord index for harmony one hot vector    
    chords = { c: i for c, i in chords.iteritems() if chords[c] >= chordLimit }
    chord_mapping = { c: i for i, c in enumerate(chords.keys()) }
    num_chords = len(chord_mapping)
    store['chord_to_idx'] = chord_mapping
    
    #plot the chord distribution chart 
    pprint(chords)
    plt.figure(figsize=(10, 4))
    plt.bar(range(len(chords)), chords.values())
    plt.xticks(range(len(chords)), chords.keys())
    plt.show()
    
    #print sequence information 
    print "Number of chords: {}".format(num_chords)
    print "Max Sequence length: {}".format(max_seq)
    print "Avg Sequence length: {}".format(avg_seq)
    print "Num Sequences: {}".format(len(seq_lens))