def convert_to_dataset(X,y):            
         X = np.vstack(X);
         y = np.vstack(y);
         
         # convert labels
         y = self.label_converter.get_labels(y, self.label_mode);
         y = np.hstack(y);
         
         one_hot_y = one_hot(y);
         
         dataset = DenseDesignMatrix(X=X, y=one_hot_y);
         dataset.labels = y; # for confusion matrix
 
         return dataset;
Пример #2
0
            def convert_to_dataset(X, y):
                X = np.vstack(X)
                y = np.vstack(y)

                # convert labels
                y = self.label_converter.get_labels(y, self.label_mode)
                y = np.hstack(y)

                one_hot_y = one_hot(y)

                dataset = DenseDesignMatrix(X=X, y=one_hot_y)
                dataset.labels = y
                # for confusion matrix

                return dataset
Пример #3
0
    def on_monitor(self, model, dataset, algorithm):
        d = model.get_param_vector() - self.origin

        data = list(dataset.get_batch_design(self.batch_size,
            include_labels=True))
        from pylearn2.utils.one_hot import one_hot
        data[1] = one_hot(data[1])

        cost_values = []
        for scale in self.scales:
            print "Evaluating cost at scale ", scale

            model.set_param_vector(self.origin + scale * d)
            model.enforce_constraints()

            cost_values.append(self.cost_fn(*data))

        print 'Scales searched: ',self.scales
        print 'Cost values: ', cost_values

        best_scale = self.scales[cost_values.index(min(cost_values))]
        print "best_scale: ", best_scale
        model.set_param_vector(self.origin + best_scale * d)
Пример #4
0
def test_one_hot_basic():
    assert_equal(one_hot([1, 2]), [[0, 1, 0], [0, 0, 1]])
    assert_equal(one_hot([[1], [2], [1]], max_label=3), [[0, 1, 0, 0], [0, 0, 1, 0], [0, 1, 0, 0]])
Пример #5
0
def test_one_hot_out():
    out = np.empty((2, 3), dtype="uint8")
    assert_equal(one_hot([1, 2], out=out), [[0, 1, 0], [0, 0, 1]])
    assert_equal(out, [[0, 1, 0], [0, 0, 1]])
Пример #6
0
def test_one_hot_dtypes():
    int_dt = ["int8", "int16", "int32", "int64"]
    int_dt += ["u" + dt for dt in int_dt]
    float_dt = ["float64", "float32", "complex64", "complex128"]
    all_dt = int_dt + float_dt
    assert_(all(one_hot([5], dtype=dt).dtype == np.dtype(dt) for dt in all_dt))
Пример #7
0
    def setup_impl(self, model, dataset, algorithm):
        cost = algorithm.cost

        root = model.get_param_vector()

        dim = root.size

        rng = self.rng


        points = rng.randn(self.num_points, self.num_basis_vectors)
        points = points.astype(root.dtype)
        points *= self.scale

        if self.include_root:
            points[0, :] = 0.

        if not hasattr(self, 'cost_fn'):
            # Cargo cult all the Pascal bullshit needed to evaluate the f*****g cost function now
            # =======================================
            data_specs = cost.get_data_specs(model)
            mapping = DataSpecsMapping(data_specs)
            space_tuple = mapping.flatten(data_specs[0], return_tuple=True)
            source_tuple = mapping.flatten(data_specs[1], return_tuple=True)

            # Build a flat tuple of Theano Variables, one for each space.
            # We want that so that if the same space/source is specified
            # more than once in data_specs, only one Theano Variable
            # is generated for it, and the corresponding value is passed
            # only once to the compiled Theano function.
            theano_args = []
            for space, source in safe_zip(space_tuple, source_tuple):
                name = '%s[%s]' % (self.__class__.__name__, source)
                arg = space.make_theano_batch(name=name,
                                              batch_size=self.batch_size)
                theano_args.append(arg)
            theano_args = tuple(theano_args)

            # Methods of `cost` need args to be passed in a format compatible
            # with data_specs
            nested_args = mapping.nest(theano_args)
            fixed_var_descr = cost.get_fixed_var_descr(model, nested_args)
            self.on_load_batch = fixed_var_descr.on_load_batch

            cost_value = cost.expr(model, nested_args,
                                        ** fixed_var_descr.fixed_vars)
            # End cargo culting
            # ======================

            print "Compiling cost function..."
            cost_fn = function(theano_args, cost_value)
            self.cost_fn = cost_fn
        else:
            cost_fn = self.cost_fn

        cost_values = np.zeros(self.num_points)


        data = list(dataset.get_batch_design(self.batch_size,
            include_labels=True))
        from pylearn2.utils.one_hot import one_hot
        data[1] = one_hot(data[1])


        if self.method == 'gaussian':
            basis = rng.normal(dim, self.num_basis_vectors).astype(root.dtype)
        elif self.method == 'element':
            basis = np.zeros((dim, self.num_basis_vectors)).astype(root.dtype)
            for i in xrange(self.num_basis_vectors):
                basis[rng.randint(dim), i] = 1.
        elif self.method == 'gradient':
            if not hasattr(self, 'grad_fn'):
                self.grad_fn = function(theano_args, grad(cost_value, model.get_params()))
            grad_fn = self.grad_fn

            basis = np.zeros((dim, self.num_basis_vectors)).astype(root.dtype)
            for i in xrange(self.num_basis_vectors):
                ipt = list(dataset.get_batch_design(1, include_labels=True))
                label = ipt[1]
                assert label.size == 1
                label = label[0]
                one_hot = np.zeros((1, 10,),dtype='float32')
                one_hot[0, label] = 1
                ipt[1] = one_hot
                g = grad_fn(*ipt)
                basis[:,i] = np.concatenate([e.reshape(e.size) for e in g], axis=0)
        else:
            assert False

        basis /= np.sqrt(np.square(basis).sum(axis=0))

        # Orthogonalize basis
        for i in xrange(self.num_basis_vectors):
            v = basis[:,i ].copy()
            for j in xrange(i - 1):
                u = basis[:, j].copy()
                v -= np.dot(u, v) * u
            norm = np.sqrt(np.square(v).sum())
            assert norm > 1e-4
            v /= norm
            basis[:,i] = v


        for i in xrange(self.num_points):
            print "Evaluating cost at point ", i

            point = points[i, :]
            full_point = root + np.dot(basis, point)
            model.set_param_vector(full_point)

            cost_values[i] = cost_fn(*data)
            print cost_values[i]


        from pylearn2.utils import sharedX
        import theano.tensor as T

        print "!!!!!!!! FITTING THE QUADRATIC FUNCTION !!!!!!!!!!!!!!!!!!!"

        if not hasattr(self, 'fit_quad'):
            points = sharedX(points)
            #from theano import config
            #config.compute_test_value = 'raise'
            cost_values = sharedX(cost_values)
            A = sharedX(np.zeros((self.num_basis_vectors, self.num_basis_vectors)))
            if self.psd:
                mat = T.dot(A.T, A)
            else:
                mat = A
            b = sharedX(np.zeros(self.num_basis_vectors))
            c = sharedX(0.)
            half_quad = T.dot(points, mat)
            quad = (points * half_quad).sum(axis=1)
            lin = T.dot(points, b)
            pred = quad + lin + c

            from pylearn2.optimization.batch_gradient_descent import BatchGradientDescent

            mse = T.square(pred - cost_values).mean()
            mae = abs(pred - cost_values).mean()

            obj = locals()[self.fitting_cost]

            fit_quad = BatchGradientDescent(obj, params = [A, b, c],
                    max_iter = self.num_basis_vectors ** 2,
                    verbose = 3, tol = None,
                    init_alpha = None, min_init_alpha = 1e-7,
                    reset_alpha = False, conjugate = True,
                    reset_conjugate = False,
                    line_search_mode = 'exhaustive')
            self.fit_quad = fit_quad
            self.A = A
            self.b = b
            self.c = c
            self.points = points
            self.cost_values = cost_values
        else:
            self.A.set_value(.001 * np.identity(self.A.get_value().shape[0], dtype=self.A.dtype))
            self.b.set_value(self.b.get_value() * 0.)
            self.c.set_value(self.c.get_value() * 0.)
            self.points.set_value(points)
            self.cost_values.set_value(cost_values.astype(self.cost_values.dtype))

        self.fit_quad.minimize()

        print "!!!!!!!!!!!!! FINDING ITS MINIMUM !!!!!!!!!!!!!!!!!!!!!!!!!!!"

        if self.use_solver:
            if self.psd:
                Av = self.A.get_value()
                mat_v = np.dot(Av.T, Av)
            else:
                mat_v = self.A.get_value()
            bv = self.b.get_value()

            # minimize for x^T A x + b^T x + c
            # -> solve 2 A x + b = 0
            # Ax = - b / 2

            print "********** mat_v", mat_v.min(), mat_v.max()
            x, ignored_residuals, ignored_rank, ignored_singular_values = np.linalg.lstsq(mat_v, - 0.5 * bv)
            print "********** soln: ", x.min(), x.mean(), x.max()
            print "********** SVs: ", ignored_singular_values.min(), ignored_singular_values.max()
            assert x.ndim == 1, x.shape
            prod = np.dot(basis, x)
            norm = np.sqrt(np.square(prod).sum())
            print "*************** Moving params by ",norm
            vector = root + prod
            model.set_param_vector(vector)

        else: # use minimizer
            if not hasattr(self, 'fit_params'):
                self.vector = sharedX(points.get_value().mean(axis=0))
                vector = self.vector
                obj = T.dot(T.dot(mat, vector), vector) + T.dot(b, vector)

                def constrain(d):
                    assert vector in d
                    n = d[vector]
                    norm = T.sqrt(T.square(n).sum())
                    desired_norm = T.clip(norm, 0., self.max_jump_norm)
                    d[vector] = n * desired_norm / norm

                self.fit_params = BatchGradientDescent(obj, params=[vector],
                    max_iter = self.num_basis_vectors,
                    verbose = 3, tol=None,
                    param_constrainers = [constrain],
                    init_alpha = None, min_init_alpha = 1e-3,
                    reset_alpha=False, conjugate=True, reset_conjugate=False,
                    line_search_mode='exhaustive')
            else:
                self.vector.set_value(points.mean(axis=0).astype(self.vector.dtype))

            self.fit_params.minimize()

            model.set_param_vector(root + np.dot(basis , self.vector.get_value()))
Пример #8
0
    def __init__(
        self,
        path,
        suffix='',  # required data file parameters
        subjects='all',  # optional selector (list) or 'all'
        start_sample=0,
        stop_sample=None,  # optional for selection of sub-sequences
        frame_size=-1,
        hop_size=-1,  # values > 0 will lead to windowing
        label_mode='tempo',
        name='',  # optional name
        n_fft=0,
        n_freq_bins=None,
        save_matrix_path=None,
        channels=None,
        resample=None,
        stimulus_id_filter=None,
        keep_metadata=False,
        spectrum_log_amplitude=False,
        spectrum_normalization_mode=None,
    ):
        '''
        Constructor
        '''

        self.name = name

        self.spectrum_normalization_mode = spectrum_normalization_mode
        self.spectrum_log_amplitude = spectrum_log_amplitude

        self.datafiles = []
        subject_paths = glob.glob(os.path.join(path, 'Sub*'))
        for path in subject_paths:
            dataset_filename = os.path.join(path, 'dataset' + suffix + '.pklz')
            if os.path.isfile(dataset_filename):
                log.debug('addding {}'.format(dataset_filename))
                self.datafiles.append(dataset_filename)
            else:
                log.warn('file does not exists {}'.format(dataset_filename))
        self.datafiles.sort()

        if subjects == 'all':
            subjects = np.arange(0, len(self.datafiles))
        assert subjects is not None and len(subjects) > 0

        self.label_mode = label_mode
        self.label_converter = LabelConverter()

        if stimulus_id_filter is None:
            stimulus_id_filter = []
        self.stimulus_id_filter = stimulus_id_filter

        self.subject_partitions = []
        # used to keep track of original subjects
        self.sequence_partitions = []
        # used to keep track of original sequences
        self.trial_partitions = []
        # keeps track of original trials

        # metadata: [subject, trial_no, stimulus, channel, start, ]
        self.metadata = []

        sequences = []
        labels = []
        n_sequences = 0
        last_raw_label = -1
        for i in xrange(len(self.datafiles)):
            if i in subjects:
                with log_timing(
                        log, 'loading data from {}'.format(self.datafiles[i])):
                    self.subject_partitions.append(n_sequences)
                    # save start of next subject

                    subject_sequences, subject_labels, channel_meta = load(
                        self.datafiles[i])

                    subject_trial_no = -1

                    for j in xrange(len(subject_sequences)):
                        l = subject_labels[j]
                        # get raw label

                        if l in stimulus_id_filter:
                            #                             log.debug('skipping stimulus {}'.format(l));
                            continue

                        c = channel_meta[j][0]

                        if channels is not None and not c in channels:  # apply optional channel filter
                            log.debug('skipping channel {}'.format(c))
                            continue

                        self.sequence_partitions.append(n_sequences)
                        # save start of next sequence

                        if l != last_raw_label:  # if raw label changed...
                            self.trial_partitions.append(n_sequences)
                            # ...save start of next trial
                            subject_trial_no += 1
                            # increment subject_trial_no counter
                            last_raw_label = l

                        l = self.label_converter.get_label(
                            l[0], self.label_mode)
                        # convert to label_mode view

                        s = subject_sequences[j]
                        s = s[start_sample:stop_sample]
                        # get sub-sequence in original space

                        # down-sample if requested
                        if resample is not None and resample[0] != resample[1]:
                            s = librosa.resample(s, resample[0], resample[1])

                        if n_fft is not None and n_fft > 0:  # Optionally:
                            #     transform to spectogram
                            hop_length = n_fft / 4
                            '''
                            from http://theremin.ucsd.edu/~bmcfee/librosadoc/librosa.html
                            >>> # Get a power spectrogram from a waveform y
                            >>> S       = np.abs(librosa.stft(y)) ** 2
                            >>> log_S   = librosa.logamplitude(S)
                            '''
                            s = np.abs(
                                librosa.core.stft(s,
                                                  n_fft=n_fft,
                                                  hop_length=hop_length))**2

                            if n_freq_bins is not None:  # Optionally:
                                s = s[0:n_freq_bins, :]
                                #    cut off high bands

                            if self.spectrum_log_amplitude:
                                s = librosa.logamplitude(s)
                            '''
                            NOTE on normalization:
                            It depends on the structure of a neural network and (even more) 
                            on the properties of data. There is no best normalization algorithm 
                            because if there would be one, it would be used everywhere by default...
                        
                            In theory, there is no requirement for the data to be normalized at all. 
                            This is a purely practical thing because in practice convergence could 
                            take forever if your input is spread out too much. The simplest would be 
                            to just normalize it by scaling your data to (-1,1) (or (0,1) depending 
                            on activation function), and in most cases it does work. If your 
                            algorithm converges well, then this is your answer. If not, there are 
                            too many possible problems and methods to outline here without knowing 
                            the actual data.
                            '''

                            ## normalize to mean 0, std 1
                            if self.spectrum_normalization_mode == 'mean0_std1':
                                #                                 s = preprocessing.scale(s, axis=0);
                                mean = np.mean(s)
                                std = np.std(s)
                                s = (s - mean) / std

                            ## normalize by linear transform to [0,1]
                            elif self.spectrum_normalization_mode == 'linear_0_1':
                                s = s / np.max(s)

                            ## normalize by linear transform to [-1,1]
                            elif self.spectrum_normalization_mode == 'linear_-1_1':
                                s = -1 + 2 * (s - np.min(s)) / (np.max(s) -
                                                                np.min(s))

                            elif self.spectrum_normalization_mode is not None:
                                raise ValueError(
                                    'unsupported spectrum normalization mode {}'
                                    .format(self.spectrum_normalization_mode))

                            #print s.mean(axis=0)
                            #print s.std(axis=0)

                            # transpose to fit pylearn2 layout
                            s = np.transpose(s)
                        else:
                            # normalize to max amplitude 1
                            s = librosa.util.normalize(s)

                        s = np.asfarray(s, dtype='float32')

                        if frame_size > 0 and hop_size > 0:
                            s, l = self._split_sequence(
                                s, l, frame_size, hop_size)

#                         print s.shape
                        n_sequences += len(s)

                        sequences.append(s)
                        labels.extend(l)

                        if keep_metadata:
                            self.metadata.append({
                                'subject':
                                i,  # subject 
                                'trial_no':
                                subject_trial_no,  # trial_no
                                'stimulus':
                                last_raw_label[0],  # stimulus 
                                'channel':
                                c,  # channel
                                'start':
                                self.sequence_partitions[-1],  # start
                                'stop':
                                n_sequences  # stop
                            })

        # turn into numpy arrays
        sequences = np.vstack(sequences)
        #         print sequences.shape;

        labels = np.hstack(labels)

        one_hot_y = one_hot(labels)

        self.labels = labels
        # save for later

        if n_fft > 0:
            sequences = np.array([sequences])

            # re-arrange dimensions
            sequences = sequences.swapaxes(0, 1).swapaxes(1, 2).swapaxes(2, 3)

            log.debug('final dataset shape: {} (b,0,1,c)'.format(
                sequences.shape))

            super(EEGDataset, self).__init__(topo_view=sequences,
                                             y=one_hot_y,
                                             axes=['b', 0, 1, 'c'])
        else:
            super(EEGDataset, self).__init__(X=sequences,
                                             y=one_hot_y,
                                             axes=['b', 0, 1, 'c'])

        log.debug(
            'generated dataset "{}" with shape X={} y={} labels={} '.format(
                self.name, self.X.shape, self.y.shape, self.labels.shape))

        if save_matrix_path is not None:
            matrix = DenseDesignMatrix(X=sequences, y=one_hot_y)
            with log_timing(
                    log,
                    'saving DenseDesignMatrix to {}'.format(save_matrix_path)):
                serial.save(save_matrix_path, matrix)
    def __init__(self, 
                 path, suffix='',   # required data file parameters
                 subjects='all',    # optional selector (list) or 'all'
                 start_sample = 0,
                 stop_sample  = None, # optional for selection of sub-sequences
                 frame_size = -1, 
                 hop_size   = -1,   # values > 0 will lead to windowing
                 label_mode='tempo',
                 name = '',         # optional name
                 n_fft = 0,
                 n_freq_bins = None,
                 save_matrix_path = None,
                 channels = None,
                 resample = None,
                 stimulus_id_filter = None,
                 keep_metadata = False,
                 spectrum_log_amplitude = False,
                 spectrum_normalization_mode = None,
                 ):
        '''
        Constructor
        '''
        
        self.name = name;
        
        self.spectrum_normalization_mode = spectrum_normalization_mode;
        self.spectrum_log_amplitude = spectrum_log_amplitude;
        
        self.datafiles = [];
        subject_paths = glob.glob(os.path.join(path, 'Sub*'));
        for path in subject_paths:
            dataset_filename = os.path.join(path, 'dataset'+suffix+'.pklz');
            if os.path.isfile(dataset_filename):   
                log.debug('addding {}'.format(dataset_filename));
                self.datafiles.append(dataset_filename);
            else:
                log.warn('file does not exists {}'.format(dataset_filename));                
        self.datafiles.sort();
        
        if subjects == 'all':
            subjects = np.arange(0,len(self.datafiles));            
        assert subjects is not None and len(subjects) > 0;
        
        self.label_mode = label_mode;
        self.label_converter = LabelConverter();
        
        if stimulus_id_filter is None:
            stimulus_id_filter = [];
        self.stimulus_id_filter = stimulus_id_filter;
        
        self.subject_partitions = []; # used to keep track of original subjects
        self.sequence_partitions = []; # used to keep track of original sequences
        self.trial_partitions = []; # keeps track of original trials
        
        # metadata: [subject, trial_no, stimulus, channel, start, ]
        self.metadata = [];
        
        sequences = [];
        labels = [];
        n_sequences = 0;
        last_raw_label = -1;
        for i in xrange(len(self.datafiles)):
            if i in subjects:
                with log_timing(log, 'loading data from {}'.format(self.datafiles[i])): 
                    self.subject_partitions.append(n_sequences);                            # save start of next subject
                    
                    subject_sequences, subject_labels, channel_meta = load(self.datafiles[i]);
                    
                    subject_trial_no = -1;
                    
                    for j in xrange(len(subject_sequences)):
                        l = subject_labels[j];                                              # get raw label

                        if l in stimulus_id_filter:
#                             log.debug('skipping stimulus {}'.format(l));
                            continue;

                        c = channel_meta[j][0];
                        
                        if channels is not None and not c in channels:                      # apply optional channel filter
                            log.debug('skipping channel {}'.format(c));
                            continue;
                        
                        self.sequence_partitions.append(n_sequences);                       # save start of next sequence                        
                        
                        if l != last_raw_label:                                             # if raw label changed...
                            self.trial_partitions.append(n_sequences);                      # ...save start of next trial
                            subject_trial_no += 1;                                          # increment subject_trial_no counter
                            last_raw_label = l;
                        
                        l = self.label_converter.get_label(l[0], self.label_mode);          # convert to label_mode view
                        
                        s = subject_sequences[j];                        
                        s = s[start_sample:stop_sample];                                    # get sub-sequence in original space

                        # down-sample if requested
                        if resample is not None and resample[0] != resample[1]:
                            s = librosa.resample(s, resample[0], resample[1]);
                                                                                                
                        if n_fft is not None and n_fft > 0:                          # Optionally:
                                                                                            #     transform to spectogram
                            hop_length = n_fft / 4;
        
                            '''
                            from http://theremin.ucsd.edu/~bmcfee/librosadoc/librosa.html
                            >>> # Get a power spectrogram from a waveform y
                            >>> S       = np.abs(librosa.stft(y)) ** 2
                            >>> log_S   = librosa.logamplitude(S)
                            '''                                     
                            s = np.abs(librosa.core.stft(s, 
                                                            n_fft=n_fft, 
                                                            hop_length=hop_length)
                                          )**2;      
                            
                            if n_freq_bins is not None:                               # Optionally:
                                s = s[0:n_freq_bins, :];                              #    cut off high bands
                                                      
                            if self.spectrum_log_amplitude:      
                                s = librosa.logamplitude(s);
                                                        
                            '''
                            NOTE on normalization:
                            It depends on the structure of a neural network and (even more) 
                            on the properties of data. There is no best normalization algorithm 
                            because if there would be one, it would be used everywhere by default...
                        
                            In theory, there is no requirement for the data to be normalized at all. 
                            This is a purely practical thing because in practice convergence could 
                            take forever if your input is spread out too much. The simplest would be 
                            to just normalize it by scaling your data to (-1,1) (or (0,1) depending 
                            on activation function), and in most cases it does work. If your 
                            algorithm converges well, then this is your answer. If not, there are 
                            too many possible problems and methods to outline here without knowing 
                            the actual data.
                            '''
    
                            ## normalize to mean 0, std 1
                            if self.spectrum_normalization_mode == 'mean0_std1':
#                                 s = preprocessing.scale(s, axis=0);
                                mean = np.mean(s);
                                std = np.std(s);
                                s = (s - mean) / std;
                            
                            ## normalize by linear transform to [0,1]
                            elif self.spectrum_normalization_mode == 'linear_0_1':
                                s = s / np.max(s);
                            
                            ## normalize by linear transform to [-1,1]
                            elif self.spectrum_normalization_mode == 'linear_-1_1':
                                s = -1 + 2 * (s - np.min(s)) / (np.max(s) - np.min(s));
                                
                            elif self.spectrum_normalization_mode is not None:
                                raise ValueError(
                                                 'unsupported spectrum normalization mode {}'.format(
                                                self.spectrum_normalization_mode)
                                                 );                
                            
                            #print s.mean(axis=0)
                            #print s.std(axis=0)
                            
                            # transpose to fit pylearn2 layout
                            s = np.transpose(s);
                        else:
                            # normalize to max amplitude 1
                            s = librosa.util.normalize(s);
                        
                        s = np.asfarray(s, dtype='float32');
                        
                        if frame_size > 0 and hop_size > 0:
                            s, l = self._split_sequence(s, l, frame_size, hop_size);
                        
#                         print s.shape
                        n_sequences += len(s);
                         
                        sequences.append(s);
                        labels.extend(l);
                        
                        if keep_metadata:
                            self.metadata.append({
                                        'subject'   : i,                              # subject 
                                        'trial_no'  : subject_trial_no,               # trial_no
                                        'stimulus'  : last_raw_label[0],              # stimulus 
                                        'channel'   : c,                              # channel
                                        'start'     : self.sequence_partitions[-1],   # start
                                        'stop'      : n_sequences                     # stop
                                    });
      
        # turn into numpy arrays
        sequences = np.vstack(sequences);
#         print sequences.shape;
        
        labels = np.hstack(labels);        
        
        one_hot_y = one_hot(labels);
                
        self.labels = labels; # save for later
        
        if n_fft > 0:
            sequences = np.array([sequences]);
            
            # re-arrange dimensions
            sequences = sequences.swapaxes(0,1).swapaxes(1,2).swapaxes(2,3);
            
            log.debug('final dataset shape: {} (b,0,1,c)'.format(sequences.shape));
            
            super(EEGDataset, self).__init__(topo_view=sequences, y=one_hot_y, axes=['b', 0, 1, 'c']);
        else:
            super(EEGDataset, self).__init__(X=sequences, y=one_hot_y, axes=['b', 0, 1, 'c']);
        
        log.debug('generated dataset "{}" with shape X={} y={} labels={} '.format(self.name, self.X.shape, self.y.shape, self.labels.shape));
        
        if save_matrix_path is not None:
            matrix = DenseDesignMatrix(X=sequences, y=one_hot_y);
            with log_timing(log, 'saving DenseDesignMatrix to {}'.format(save_matrix_path)):
                serial.save(save_matrix_path, matrix);
Пример #10
0
def test_one_hot_basic():
    assert_equal(one_hot([1, 2]), [[0, 1, 0], [0, 0, 1]])
    assert_equal(one_hot([[1], [2], [1]], max_label=3),
                 [[0, 1, 0, 0], [0, 0, 1, 0], [0, 1, 0, 0]])
Пример #11
0
def test_one_hot_out():
    out = np.empty((2, 3), dtype='uint8')
    assert_equal(one_hot([1, 2], out=out), [[0, 1, 0], [0, 0, 1]])
    assert_equal(out, [[0, 1, 0], [0, 0, 1]])
Пример #12
0
def test_one_hot_dtypes():
    int_dt = ['int8', 'int16', 'int32', 'int64']
    int_dt += ['u' + dt for dt in int_dt]
    float_dt = ['float64', 'float32', 'complex64', 'complex128']
    all_dt = int_dt + float_dt
    assert_(all(one_hot([5], dtype=dt).dtype == np.dtype(dt) for dt in all_dt))