Exemplo n.º 1
0
def load_glove_embeddings(dim, glove_path):
    # skip if done
    if _glove_embeddings:
        return

    global _glove_embeddings
    _glove_embeddings = []

    # parse glove embeddings csv transforming TRACK_NUM to 128 and <unk> to 129
    with open(os.path.join(glove_path, 'vectors_d{}.txt'.format(dim)),
              'r') as f:
        for line in f.readlines():
            split = line.split()
            if split[0] == '<unk>': split[0] = 130
            elif split[0] == 'TRACK_START': split[0] = 129
            _glove_embeddings.append(np.asarray([float(x) for x in split]))

    # add random rest vector as the 128th row
    with open(os.path.join(glove_path, 'rest.txt')) as f:
        vec = [128] + [float(x) for x in f.read().split(' ')][0:dim]
        _glove_embeddings.append(vec)

    # sort the list by index (numeric key)
    _glove_embeddings.sort(key=lambda x: x[0])

    # remove index
    for i, _ in enumerate(_glove_embeddings):
        _glove_embeddings[i] = np.delete(_glove_embeddings[i], 0)

    log('loaded GloVe vector embeddings with dimension: {}'.format(dim),
        'VERBOSE')
Exemplo n.º 2
0
        def _train_model(model, callbacks):

            start_time = time.time()

            kwargs = {
                "epochs": num_epochs,
                "callbacks": callbacks,
                "verbose": 1,
            }

            if train_gen and val_gen:
                # this is a somewhat magic number which is the average number of length-20 windows
                # calculated from ~5K MIDI files from the Lakh MIDI Dataset.
                magic_number = 827
                kwargs['generator'] = train_gen
                kwargs['validation_data'] = val_gen
                kwargs[
                    'steps_per_epoch'] = num_midi_files * magic_number / batch_size
                kwargs[
                    'validation_steps'] = num_midi_files * 0.2 * magic_number / batch_size
                history = model.fit_generator(**kwargs)
            else:
                kwargs['x'] = train_data[0]
                kwargs['y'] = train_data[1]
                kwargs['validation_data'] = val_data
                kwargs['batch_size'] = batch_size
                pudb.set_trace()
                history = model.fit(**kwargs)

            log(
                'Finished training model in {:.2f} seconds'.format(
                    time.time() - start_time), 'NOTICE')
            return history
Exemplo n.º 3
0
 def save(self, experiment_dir):
     for i, model in enumerate(self.models):
         with open(os.path.join(experiment_dir, 'model_{}.json'.format(i)),
                   'w') as f:
             log('saved model {} to {}'\
                 .format(i, os.path.join(experiment_dir,
                                         'model_{}.json'.format(i))),
                 'verbose')
             f.write(model.to_json())
Exemplo n.º 4
0
def parse_midi(path):
    midi = None
    with open(path, 'r+b') as f:
        try:
            midi = pretty_midi.PrettyMIDI(f)
            midi.remove_invalid_notes()
        except Exception as e:
            log(e, 'WARNING')
            pass
    return midi
Exemplo n.º 5
0
    def generate(self,
                 seeds,
                 window_size,
                 length,
                 num_to_gen,
                 encoding='one-hot'):
        def gen(model, seed, window_size, length):

            generated = []
            # ring buffer
            buf = np.copy(seed).tolist()

            if encoding == 'glove-embedding':
                buf = data.input.one_hot_2_glove_embedding(buf)

            while len(generated) < length:
                arr = np.expand_dims(np.asarray(buf), 0)
                # error here: ValueError: Error when checking : expected lstm_1_input to have 3 dimensions, but got array with shape (1, 20)
                pred = model.predict(arr)

                # argmax sampling (NOT RECOMMENDED), or...
                # index = np.argmax(pred)

                # prob distrobuition sampling
                index = np.random.choice(range(0, len(pred[0])), p=pred[0])
                pred = np.zeros(len(pred[0]))

                pred[index] = 1
                generated.append(pred)

                if encoding == 'glove-embedding':
                    pred = data.input.one_hot_2_glove_embedding([pred])[0]

                buf.pop(0)
                buf.append(pred)

            return generated

        per_model = []

        for model in self.models:
            generated = []
            for i in range(0, num_to_gen):
                seed = seeds[random.randint(0, len(seeds) - 1)]
                generated.append(gen(model, seed, window_size, length))
                log('generated data of length {}'.format(length), 'VERBOSE')
            per_model.append(copy.deepcopy(generated))
        return per_model
Exemplo n.º 6
0
def save_midi(pm_midis, folder):
    
    existing_files = glob.glob(os.path.join(folder, '*.mid'))

    _max = 0
    for file in existing_files:
        try:
            _max = max(int(os.path.basename(file[0:-4])), _max)
        except ValueError as e:
            # ignrore non-numeric folders in experiments/
            pass

    if _max != 0:
        log('existing midi files found in {}, starting names after {} '\
            'to avoid collision'.format(folder, _max), 'VERBOSE')

    for i, midi in enumerate(pm_midis):
        file = os.path.join(folder, '{}.mid'.format(str(_max + i + 1).rjust(4, '0')))
        midi.write(file)
        log('saved {} to disk'.format(file), 'VERBOSE')
Exemplo n.º 7
0
    def create_experiment_dir(self,
                              note_representation,
                              encoding,
                              experiment_dir=None,
                              midai_root=None,
                              mode='develop'):
        log('creating experiment directory', 'VERBOSE')
        if not experiment_dir:

            if not midai_root:
                raise Exception('midai_root not set and experiment_dir'\
                                ' not provided as an argument')

            path = [
                midai_root, 'trained_models', mode, self.name,
                '{}_{}'.format(note_representation, encoding)
            ]
            parent_dir = os.path.join(*path)

            if not os.path.exists(parent_dir):
                log('{} does not already exist, creating.'.format(parent_dir),
                    'notice')
                os.makedirs(parent_dir)

            experiments = os.listdir(parent_dir)
            experiments = [dir_ for dir_ in experiments \
                  if os.path.isdir(os.path.join(parent_dir, dir_))]

            log('{} existing directories found in {}'\
             .format(len(experiments), parent_dir), 'VERBOSE')

            most_recent_exp = 0
            for dir_ in experiments:
                try:
                    most_recent_exp = max(int(dir_), most_recent_exp)
                except ValueError as e:
                    # ignrore non-numeric folders in experiments/
                    pass

            experiment_dir = os.path.join(
                parent_dir,
                str(most_recent_exp + 1).rjust(3, '0'))

        os.makedirs(experiment_dir)
        log('created {}'.format(experiment_dir), 'VERBOSE')

        os.makedirs(os.path.join(experiment_dir, 'checkpoints'))
        log('created {}'\
         .format(os.path.join(experiment_dir, 'checkpoints')), 'VERBOSE')

        os.makedirs(os.path.join(experiment_dir, 'generated'))
        log('created {}'\
         .format(os.path.join(experiment_dir, 'generated')), 'VERBOSE')

        self.experiment_dir = experiment_dir
        return experiment_dir
Exemplo n.º 8
0
    def load(self, path, best=True, recent=False):
        def _find_experiment_dir(path, best, recent):
            models = []
            checkpoints = []
            for dirpath, dirnames, filenames in os.walk(path):
                if recent:
                    if 'model_0.json' in filenames:
                        models.append(os.path.join(dirpath, 'model_0.json'))
                else:  # best
                    [checkpoints.append(os.path.join(dirpath, c)) for c in \
                     filter(lambda x: '.hdf5' in x and 'checkpoint' in x,
                                     filenames)]

            if recent:
                return os.path.dirname(max(models, key=os.path.getctime))
            else:
                checkpoint = _get_best_checkpoint(checkpoints)
                return os.path.dirname(os.path.dirname(checkpoint))

        def _get_best_checkpoint(checkpoints):
            best = []
            for check in checkpoints:
                try:
                    val_acc = float(check[-10:-5])
                    best.append((val_acc, check))
                except ValueError:
                    pass
            best.sort(key=lambda x: -x[0])
            return best[0][1]

        if self.ready:
            log('model ready is True, do you really mean to load?', 'WARNING')

        if len(self.models) > 0:
            log('models list is not empty.' \
                'Have you already loaded this model?', 'WARNING')

        if best and recent:
            message = '"best" and "recent" arguments are mutually exclusive'
            log(message, 'ERROR')
            raise Exception(message)

        experiment_dir = _find_experiment_dir(path, best, recent)

        self.models = []
        num_models = len(
            glob.glob(os.path.join(experiment_dir, 'model_*.json')))
        if num_models < 1:
            message = 'No models found in {}'.format(experiment_dir)
            log(message, 'ERROR')
            raise Exception(message)

        for i in range(num_models):
            with open(os.path.join(experiment_dir, 'model_{}.json'.format(i)),
                      'r') as f:
                model = model_from_json(f.read())
                log('loaded model {} from JSON'.format(i), 'VERBOSE')

            # epoch = 0
            path = [experiment_dir, 'checkpoints', 'model_{}*.hdf5'.format(i)]
            best_checkpoint = _get_best_checkpoint(
                glob.glob(os.path.join(*path)))

            if best_checkpoint:
                # epoch = int(newest_checkpoint[-22:-19])
                model.load_weights(best_checkpoint)
                log(
                    'loaded model {} weights from checkpoint {}'.format(
                        i, best_checkpoint), 'VERBOSE')

            self.models.append(model)

        self.experiment_dir = experiment_dir
        self.ready = True