Пример #1
0
    def __init__(self, dataset_dir='../../datasets'):
        filename = 'Nottingham.zip'
        source = 'http://www-etud.iro.umontreal.ca/~boulanni/Nottingham.zip'

        super(Nottingham, self).__init__(filename=filename, source=source, dataset_dir=dataset_dir)

        # now the file path has been installed to self.dataset_locations directory
        # grab the appropriate filenames
        train_filenames = os.path.join(self.dataset_location, 'Nottingham', 'train', '*.mid')
        valid_filenames = os.path.join(self.dataset_location, 'Nottingham', 'valid', '*.mid')
        test_filenames = os.path.join(self.dataset_location, 'Nottingham', 'test', '*.mid')
        # glob the files
        train_files = glob.glob(train_filenames)
        valid_files = glob.glob(valid_filenames)
        test_files = glob.glob(test_filenames)
        # grab the datasets from midireading the files
        train_datasets = [midiread(f, r=(21, 109), dt=0.3).piano_roll.astype(theano.config.floatX) for f in train_files]
        valid_datasets = [midiread(f, r=(21, 109), dt=0.3).piano_roll.astype(theano.config.floatX) for f in valid_files]
        test_datasets = [midiread(f, r=(21, 109), dt=0.3).piano_roll.astype(theano.config.floatX) for f in test_files]
        # get the data shapes
        self.train_shapes = [train.shape for train in train_datasets]
        self.valid_shapes = [valid.shape for valid in valid_datasets]
        self.test_shapes = [test.shape for test in test_datasets]
        # put them into shared variables
        log.debug('Putting Nottingham into theano shared variables')
        self.train = dataset_shared(numpy.concatenate(train_datasets), name='nottingham_train', borrow=True)
        self.valid = dataset_shared(numpy.concatenate(valid_datasets), name='nottingham_valid', borrow=True)
        self.test = dataset_shared(numpy.concatenate(test_datasets), name='nottingham_test', borrow=True)
Пример #2
0
    def __init__(self, path='datasets/Nottingham',
                 source='http://www-etud.iro.umontreal.ca/~boulanni/Nottingham.zip',
                 train_filter='.*train.*',
                 valid_filter='.*valid.*',
                 test_filter='.*test.*', ):

        super(Nottingham, self).__init__(path=path, source=source,
                                       train_filter=train_filter,
                                       valid_filter=valid_filter,
                                       test_filter=test_filter)

        # grab the datasets from midireading the files
        train_datasets = [
            midiread(f, r=(21, 109), dt=0.3).piano_roll.astype(config.floatX)
            for f in find_files(self.path, train_filter)
            ]
        valid_datasets = [
            midiread(f, r=(21, 109), dt=0.3).piano_roll.astype(config.floatX)
            for f in find_files(self.path, valid_filter)
            ]
        test_datasets = [
            midiread(f, r=(21, 109), dt=0.3).piano_roll.astype(config.floatX)
            for f in find_files(self.path, test_filter)
            ]

        self.train_inputs = numpy.concatenate(train_datasets)
        self.train_targets = None

        self.valid_inputs = numpy.concatenate(valid_datasets)
        self.valid_targets = None

        self.test_inputs = numpy.concatenate(test_datasets)
        self.test_targets = None
Пример #3
0
    def __init__(self, path='datasets/MuseData',
                 source='http://www-etud.iro.umontreal.ca/~boulanni/MuseData.zip',
                 train_filter='.*train.*',
                 valid_filter='.*valid.*',
                 test_filter='.*test.*', ):

        super(MuseData, self).__init__(path=path, source=source,
                                       train_filter=train_filter,
                                       valid_filter=valid_filter,
                                       test_filter=test_filter)

        # grab the datasets from midireading the files
        train_datasets = [
            midiread(f, r=(21, 109), dt=0.3).piano_roll.astype(theano.config.floatX)
            for f in find_files(self.path, train_filter)
            ]
        valid_datasets = [
            midiread(f, r=(21, 109), dt=0.3).piano_roll.astype(theano.config.floatX)
            for f in find_files(self.path, valid_filter)
            ]
        test_datasets = [
            midiread(f, r=(21, 109), dt=0.3).piano_roll.astype(theano.config.floatX)
            for f in find_files(self.path, test_filter)
            ]

        self.train_inputs = numpy.concatenate(train_datasets)
        self.train_targets = None

        self.valid_inputs = numpy.concatenate(valid_datasets)
        self.valid_targets = None

        self.test_inputs = numpy.concatenate(test_datasets)
        self.test_targets = None
Пример #4
0
    def __init__(self, dataset_dir='../../datasets'):
        """
        Parameters
        ----------
        dataset_dir : str
            The `dataset_dir` parameter to a ``FileDataset``.
        """
        log.debug("Loading MuseData midi dataset...")

        filename = 'MuseData.zip'
        source = 'http://www-etud.iro.umontreal.ca/~boulanni/MuseData.zip'

        super(MuseData, self).__init__(filename=filename,
                                       source=source,
                                       dataset_dir=dataset_dir)

        # now the file path has been installed to self.dataset_locations directory
        # grab the appropriate filenames
        train_filenames = os.path.join(self.dataset_location, 'MuseData',
                                       'train', '*.mid')
        valid_filenames = os.path.join(self.dataset_location, 'MuseData',
                                       'valid', '*.mid')
        test_filenames = os.path.join(self.dataset_location, 'MuseData',
                                      'test', '*.mid')
        # glob the files
        train_files = glob.glob(train_filenames)
        valid_files = glob.glob(valid_filenames)
        test_files = glob.glob(test_filenames)
        # grab the datasets from midireading the files
        train_datasets = [
            midiread(f, r=(21, 109),
                     dt=0.3).piano_roll.astype(theano.config.floatX)
            for f in train_files
        ]
        valid_datasets = [
            midiread(f, r=(21, 109),
                     dt=0.3).piano_roll.astype(theano.config.floatX)
            for f in valid_files
        ]
        test_datasets = [
            midiread(f, r=(21, 109),
                     dt=0.3).piano_roll.astype(theano.config.floatX)
            for f in test_files
        ]
        # get the data shapes
        self.train_shapes = [train.shape for train in train_datasets]
        self.valid_shapes = [valid.shape for valid in valid_datasets]
        self.test_shapes = [test.shape for test in test_datasets]
        # put them into shared variables
        log.debug('Putting MuseData into theano shared variables')
        self.train = dataset_shared(numpy.concatenate(train_datasets),
                                    name='muse_train',
                                    borrow=True)
        self.valid = dataset_shared(numpy.concatenate(valid_datasets),
                                    name='muse_valid',
                                    borrow=True)
        self.test = dataset_shared(numpy.concatenate(test_datasets),
                                   name='muse_test',
                                   borrow=True)