Пример #1
0
 def _make_dataset(self,x,y):
     y = np.asarray(y, dtype=np.int)
     #if not hasattr( self, "classmap" ):
     #    self.classmap = ClassMap(y)
         
     ds = DenseDesignMatrix(X=x, y=y )
     ds.convert_to_one_hot()
     return ds
Пример #2
0
 def _create_matrix_input(self, X, y):
     if self.is_convolution:
         # Using `b01c` arrangement of data, see this for details:
         #   http://benanne.github.io/2014/04/03/faster-convolutions-in-theano.html
         # input: (batch size, channels, rows, columns)
         # filters: (number of filters, channels, rows, columns)
         input_space = Conv2DSpace(shape=X.shape[1:3],
                                   num_channels=X.shape[-1])
         view = input_space.get_origin_batch(X.shape[0])
         return DenseDesignMatrix(topo_view=view, y=y), input_space
     else:
         return DenseDesignMatrix(X=X, y=y), None
Пример #3
0
    def _get_dataset(self, X, y=None):
        """
        Construct a pylearn2 dataset.

        Parameters
        ----------
        X : array_like
            Training examples.
        y : array_like, optional
            Labels.
        """
        from pylearn2.datasets import DenseDesignMatrix

        X = np.asarray(X)
        assert X.ndim > 1
        if y is not None:
            y = self._get_labels(y)
        if X.ndim == 2:
            return DenseDesignMatrix(X=X, y=y)
        return DenseDesignMatrix(topo_view=X, y=y)
Пример #4
0
    def __linit(self, X, y):
        if (self.verbose > 0):
            print "Lazy initialisation"

        layers = self.layers
        pylearn2mlp_layers = []
        self.units_per_layer = []
        #input layer units
        self.units_per_layer += [X.shape[1]]

        for layer in layers[:-1]:
            self.units_per_layer += [layer[1]]

        #Output layer units
        self.units_per_layer += [y.shape[1]]

        if (self.verbose > 0):
            print "Units per layer", str(self.units_per_layer)

        for i, layer in enumerate(layers[:-1]):

            fan_in = self.units_per_layer[i] + 1
            fan_out = self.units_per_layer[i + 1]
            lim = np.sqrt(6) / (np.sqrt(fan_in + fan_out))
            layer_name = "Hidden_%i_%s" % (i, layer[0])
            activate_type = layer[0]
            if activate_type == "RectifiedLinear":
                hidden_layer = mlp.RectifiedLinear(dim=layer[1],
                                                   layer_name=layer_name,
                                                   irange=lim)
            elif activate_type == "Sigmoid":
                hidden_layer = mlp.Sigmoid(dim=layer[1],
                                           layer_name=layer_name,
                                           irange=lim)
            elif activate_type == "Tanh":
                hidden_layer = mlp.Tanh(dim=layer[1],
                                        layer_name=layer_name,
                                        irange=lim)
            elif activate_type == "Maxout":
                hidden_layer = maxout.Maxout(num_units=layer[1],
                                             num_pieces=layer[2],
                                             layer_name=layer_name,
                                             irange=lim)
            else:
                raise NotImplementedError(
                    "Layer of type %s are not implemented yet" % layer[0])
            pylearn2mlp_layers += [hidden_layer]

        output_layer_info = layers[-1]
        output_layer_name = "Output_%s" % output_layer_info[0]

        fan_in = self.units_per_layer[-2] + 1
        fan_out = self.units_per_layer[-1]
        lim = np.sqrt(6) / (np.sqrt(fan_in + fan_out))

        if (output_layer_info[0] == "Linear"):
            output_layer = mlp.Linear(dim=self.units_per_layer[-1],
                                      layer_name=output_layer_name,
                                      irange=lim)
            pylearn2mlp_layers += [output_layer]

        self.mlp = mlp.MLP(pylearn2mlp_layers, nvis=self.units_per_layer[0])
        self.ds = DenseDesignMatrix(X=X, y=y)
        self.trainer.setup(self.mlp, self.ds)
        inputs = self.mlp.get_input_space().make_theano_batch()
        self.f = theano.function([inputs], self.mlp.fprop(inputs))
Пример #5
0
for params in ParameterGrid(param_grid):
    k_min, k_max = params['k_min'], params['k_max']
    predAll = [np.zeros(y_valid.shape) for s in range(n_add)]
    for i in range(nIter):
        seed = i + 9198
        R = col_k_ones_matrix(X.shape[1],
                              m,
                              k_min=k_min,
                              k_max=k_max,
                              seed=seed)
        np.random.seed(seed + 33)
        R.data = np.random.choice([1, -1], R.data.size)
        X3 = X * R
        X1 = np.sign(X3) * np.abs(X3)**po
        X2 = scaler.fit_transform(X1)
        training = DenseDesignMatrix(X=X2[train_idx], y=yMat[train_idx])
        l1 = RectifiedLinear(layer_name='l1',
                             irange=ir,
                             dim=dim,
                             max_col_norm=1.)
        l2 = RectifiedLinear(layer_name='l2',
                             irange=ir,
                             dim=dim,
                             max_col_norm=1.)
        l3 = RectifiedLinear(layer_name='l3',
                             irange=ir,
                             dim=dim,
                             max_col_norm=1.)
        output = Softmax(layer_name='y',
                         n_classes=9,
                         irange=ir,
Пример #6
0
def process(mdl, ds, batch_size=100):
    # This batch size must be evenly divisible into number of total samples!
    mdl.set_batch_size(batch_size)
    X = mdl.get_input_space().make_batch_theano()
    Y = mdl.fprop(X)
    y = T.argmax(Y, axis=1)
    f = function([X], y)
    yhat = []
    for i in xrange(ds.X.shape[0] / batch_size):
        x_arg = ds.X[i * batch_size:(i + 1) * batch_size, :]
        yhat.append(f(x_arg.astype(X.dtype)))
    return np.array(yhat).ravel()


tst = pickle.load(open('saved_tst.pkl', 'rb'))
ds = DenseDesignMatrix(X=tst)
clfs = glob('ensemble_clf/*.pkl')
if (len(clfs) % 2) == 0:
    raise AttributeError('Use an odd number of voters to avoid ties!')
mdls = (serial.load(f) for f in clfs)

fname = 'results.csv'
test_size = ds.X.shape[0]
res = np.zeros((len(clfs), test_size), dtype='float32')
for n, mdl in enumerate(mdls):
    res[n, :] = process(mdl, ds, batch_size=500)
    print "Processed model ", n
    #Fix for CUDA memory issues - wut?
    del mdl
    gc.collect()
Пример #7
0
print X.shape
#X = X - X.mean(axis=0)
onehot = prep.OneHotEncoder()
y = np.array(labels)
print y.shape

from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.05,
                                                    random_state=42)

print onehot.fit_transform(np.reshape(y_train, (-1, 1))).todense()

ds = DenseDesignMatrix(X=X_train,
                       y=onehot.fit_transform(np.reshape(y_train,
                                                         (-1, 1))).todense())
ds_test = DenseDesignMatrix(X=X_test,
                            y=onehot.fit_transform(np.reshape(
                                y_test, (-1, 1))).todense())

#preprocessor = preprocessing.ZCA()
#ds.apply_preprocessor(preprocessor = preprocessor, can_fit = True)
#ds_test.apply_preprocessor(preprocessor = preprocessor, can_fit = True)

print X_train.shape, X_test.shape

l1 = mlp.RectifiedLinear(
    layer_name='l1',
    #sparse_init=12,
    irange=0.1,
Пример #8
0
    def __init__(self,
                 patient_id,
                 which_set,
                 leave_out_seizure_idx_valid,
                 leave_out_seizure_idx_test,
                 data_dir,
                 preprocessor_dir,
                 batch_size=None,
                 balance_class=True,
                 decompose_subbands=False,
                 axes=('b', 0, 1, 'c'),
                 default_seed=0):

        self.balance_class = balance_class
        self.batch_size = batch_size

        EpilepsiaeEEGLoader.__init__(
            self,
            patient_id=patient_id,
            which_set=which_set,
            leave_out_seizure_idx_valid=leave_out_seizure_idx_valid,
            leave_out_seizure_idx_test=leave_out_seizure_idx_test,
            data_dir=data_dir)

        print 'Load signal ...'
        t = time.time()
        # (# of segments, # of samples, # of channels)
        raw_X, y = self.load_data()
        elapsed = time.time() - t
        print(' Elapsed time: ' + str(elapsed) + ' seconds')

        # Preprocessing
        print 'Scaling signal ...'
        t = time.time()
        if which_set == 'train':
            # Reshape the data back to (number of samples x number of channels) for pre-processing
            unrolled_X = np.reshape(raw_X,
                                    (-1, self.scalp_channel_labels.size))

            scaler = preprocessing.StandardScaler()
            # scaler = preprocessing.MinMaxScaler(feature_range=(-1, 1))
            scaler = scaler.fit(unrolled_X)

            with open(
                    os.path.join(
                        preprocessor_dir, self.patient_id + '_scaler_eeg_' +
                        str(self.leave_out_seizure_idx_valid) + '_' +
                        str(self.leave_out_seizure_idx_test) + '.pkl'),
                    'w') as f:
                pickle.dump(scaler, f)

            scaled_X = raw_X.copy()
            for seg_idx in range(scaled_X.shape[0]):
                scaled_X[seg_idx, :, :] = scaler.transform(
                    scaled_X[seg_idx, :, :])
        else:
            with open(
                    os.path.join(
                        preprocessor_dir, self.patient_id + '_scaler_eeg_' +
                        str(self.leave_out_seizure_idx_valid) + '_' +
                        str(self.leave_out_seizure_idx_test) + '.pkl')) as f:
                scaler = pickle.load(f)

            scaled_X = raw_X.copy()
            for seg_idx in range(scaled_X.shape[0]):
                scaled_X[seg_idx, :, :] = scaler.transform(
                    scaled_X[seg_idx, :, :])
        elapsed = time.time() - t
        print(' Elapsed time: ' + str(elapsed) + ' seconds')

        raw_X = None

        if decompose_subbands:

            def bandpass_fir(data,
                             lowcut_f,
                             highcut_f,
                             sampling_rate,
                             window='hamming'):
                '''
                Bandpass filtering using a FIR filter.

                Parameters
                ----------
                data: numpy array
                    Input data with shape [n_samples, n_channels].
                :param lowcut_f:
                :param highcut_f:
                :param sampling_rate:
                :param window:
                :return:
                '''

                nyq_f = sampling_rate * 0.5
                n_taps = max(3 * (sampling_rate / (lowcut_f * 1.0)),
                             3 * nyq_f)  # Filter length

                # The filter length must be even if a passband includes the Nyquist frequency.
                if n_taps % 2 == 1:
                    n_taps = n_taps + 1

                taps = firwin(n_taps, [lowcut_f, highcut_f],
                              nyq=nyq_f,
                              pass_zero=False,
                              window=window,
                              scale=False)

                # If the data is too short, zero-padding
                extra = (3 * taps.size) - data.shape[0]
                half_extra = int(np.ceil(extra / 2.0)) + 1
                if half_extra > 0:
                    padded_data = np.lib.pad(data, ((half_extra, half_extra),
                                                    (0, 0)),
                                             'constant',
                                             constant_values=0)
                else:
                    padded_data = data

                filtered_data = filtfilt(taps, 1.0, padded_data, axis=0)

                if half_extra > 0:
                    return filtered_data[half_extra:-half_extra, :]
                else:
                    return filtered_data

            print 'Decompose EEG signals into 5 sub-bands ...'

            # Decompose EEG data in each segment in to 5 sub-bands
            preprocessed_X = np.zeros((
                scaled_X.shape[0],  # Number of segments
                scaled_X.shape[1],  # Segment samples
                5,  # Number of sub-bands
                scaled_X.shape[2]))  # Number of channels

            t = time.time()
            for seg_idx in range(preprocessed_X.shape[0]):
                delta_X = bandpass_fir(scaled_X[seg_idx], 0.5, 4,
                                       self.sampling_rate)  # Delta 0.5-4 Hz
                theta_X = bandpass_fir(scaled_X[seg_idx], 4, 8,
                                       self.sampling_rate)  # Theta 4-8 Hz
                alpha_X = bandpass_fir(scaled_X[seg_idx], 8, 15,
                                       self.sampling_rate)  # Alpha 8-15 Hz
                beta_X = bandpass_fir(scaled_X[seg_idx], 15, 30,
                                      self.sampling_rate)  # Beta 15-30 Hz
                gamma_X = bandpass_fir(
                    scaled_X[seg_idx], 30, (self.sampling_rate * 0.5) - 0.1,
                    self.sampling_rate)  # Gamma 30-Nyquist Hz

                for ch_idx in range(preprocessed_X.shape[3]):
                    preprocessed_X[seg_idx][:, 0, ch_idx] = delta_X[:, ch_idx]
                    preprocessed_X[seg_idx][:, 1, ch_idx] = theta_X[:, ch_idx]
                    preprocessed_X[seg_idx][:, 2, ch_idx] = alpha_X[:, ch_idx]
                    preprocessed_X[seg_idx][:, 3, ch_idx] = beta_X[:, ch_idx]
                    preprocessed_X[seg_idx][:, 4, ch_idx] = gamma_X[:, ch_idx]

                if seg_idx % 20 == 0 or seg_idx == preprocessed_X.shape[0] - 1:
                    print ' {0} segments {1} seconds ...'.format(
                        seg_idx + 1,
                        time.time() - t)

            elapsed = time.time() - t
            print ' Elapsed time: ' + str(elapsed) + ' seconds'

        else:
            # Reshape the preprocessed EEG data into a compatible format for CNN in pylearn2
            preprocessed_X = np.reshape(
                scaled_X,
                (
                    scaled_X.shape[0],  # Number of segments
                    scaled_X.shape[1],  # Segment samples
                    1,  # EEG data are time-series data (i.e., 1 dimension)
                    scaled_X.shape[2]))  # Number of channels

        scaled_X = None

        # Print shape of input data
        print '------------------------------'
        print 'Dataset: {0}'.format(self.which_set)
        print 'Number of samples: {0}'.format(preprocessed_X.shape[0])
        print ' Preictal samples: {0}'.format(self.preictal_samples)
        print ' Nonictal samples: {0}'.format(self.nonictal_samples)
        print 'Shape of each sample: ({0}, {1})'.format(
            preprocessed_X.shape[1], preprocessed_X.shape[2])
        print 'Number of channels: {0}'.format(preprocessed_X.shape[3])
        print '------------------------------'

        # Create a view converter
        view_converter = DefaultViewConverter(
            shape=[
                preprocessed_X.shape[1],  # Segment samples
                preprocessed_X.shape[2],  # Number of sub-bands
                preprocessed_X.shape[3]
            ],  # Number of channels
            axes=('b', 0, 1, 'c'))

        # Sanity check
        view_converted_X = view_converter.topo_view_to_design_mat(
            preprocessed_X)
        assert np.all(preprocessed_X == view_converter.design_mat_to_topo_view(
            view_converted_X))

        preprocessed_X = None

        if self.balance_class and (self.which_set == 'train'
                                   or self.which_set == 'valid_train'):
            self.X_full = view_converted_X
            self.y_full = y

            (X, y) = self.get_data()
        else:
            # Zero-padding (if necessary)
            if not (self.batch_size is None):
                view_converted_X, y = self.zero_pad(view_converted_X, y,
                                                    self.batch_size)

            X = view_converted_X

        # Initialize DenseDesignMatrix
        DenseDesignMatrix.__init__(self,
                                   X=X,
                                   y=y,
                                   view_converter=view_converter,
                                   axes=axes)
Пример #9
0
 print k_max, epochs
 predAll_train = np.zeros((num_train, 9))
 predAll_test = np.zeros((test.shape[0], 9))
 for i in range(nIter):
     seed = i + 987654
     R = col_k_ones_matrix(X.shape[1],
                           m,
                           k_min=k_min,
                           k_max=k_max,
                           seed=seed)
     np.random.seed(seed + 34)
     R.data = np.random.choice([1, -1], R.data.size)
     X3 = X * R
     X1 = np.sign(X3) * np.abs(X3)**po
     X2 = scaler.fit_transform(X1)
     training = DenseDesignMatrix(X=X2[:num_train], y=yMat)
     l1 = RectifiedLinear(layer_name='l1',
                          irange=ir,
                          dim=dim,
                          max_col_norm=1.)
     l2 = RectifiedLinear(layer_name='l2',
                          irange=ir,
                          dim=dim,
                          max_col_norm=1.)
     l3 = RectifiedLinear(layer_name='l3',
                          irange=ir,
                          dim=dim,
                          max_col_norm=1.)
     output = Softmax(layer_name='y',
                      n_classes=9,
                      irange=ir,
Пример #10
0
training = pd.read_csv(file_train, index_col = 0)
num_train = training.shape[0]
y = training['target'].values
yMat = pd.get_dummies(training['target']).values
X = training.iloc[:,:93].values
scaler = pp.StandardScaler()
X2 = scaler.fit_transform(X ** .6)


kf = cross_validation.StratifiedKFold(y, n_folds=5, shuffle = True, random_state = 345)
for train_idx, valid_idx in kf:
    break
y_train = yMat[train_idx]
y_valid = yMat[valid_idx]

training = DenseDesignMatrix(X = X2[train_idx], y = y_train)
valid = DenseDesignMatrix(X = X2[valid_idx], y = y_valid)

# [l1, l2, l3, l4, output]
nIter = 20

# Params for RI
m = 200
k = 3

# Params for NN
epochs = 20
epochs_add = 2
n_add = 60

bs = 64
Пример #11
0
def main( x ):

	l1_dim = x[0]
	l2_dim = x[1]
	learning_rate = x[2]
	momentum = x[3]
	l1_dropout = x[4]
	decay_factor = x[5]
	
	min_lr = 1e-7

	#

	train = np.loadtxt( train_file, delimiter = ',' )
	x_train = train[:,0:-1]
	y_train = train[:,-1]
	y_train.shape = ( y_train.shape[0], 1 )

	# 

	validation = np.loadtxt( validation_file, delimiter = ',' )
	x_valid = validation[:,0:-1]
	y_valid = validation[:,-1]
	y_valid.shape = ( y_valid.shape[0], 1 )

	#

	#input_space = VectorSpace( dim = x.shape[1] )
	full = DenseDesignMatrix( X = x_train, y = y_train )
	valid = DenseDesignMatrix( X = x_valid, y = y_valid )

	l1 = mlp.RectifiedLinear( 
		layer_name='l1',
		irange=.001,
		dim = l1_dim,
		# "Rather than using weight decay, we constrain the norms of the weight vectors"
		max_col_norm=1.
	)

	l2 = mlp.RectifiedLinear(
		layer_name='l2',
		irange=.001,
		dim = l2_dim,
		max_col_norm=1.
	)

	output = mlp.Linear( dim = 1, layer_name='y', irange=.0001 )

	layers = [l1, l2, output]
	nvis = x_train.shape[1]

	mdl = mlp.MLP( layers, nvis = nvis )	# input_space = input_space

	#lr = .001
	#epochs = 100
	
	decay = sgd.ExponentialDecay( decay_factor = decay_factor, min_lr = min_lr )

	trainer = sgd.SGD(
		learning_rate = learning_rate,
		batch_size=128,
		learning_rule=learning_rule.Momentum( momentum ),
		
		update_callbacks = [ decay ],

		# Remember, default dropout is .5
		cost = Dropout( input_include_probs = {'l1': l1_dropout},
				   input_scales={'l1': 1.}),

		#termination_criterion = EpochCounter(epochs),
		termination_criterion = MonitorBased(
			channel_name = "valid_objective",
			prop_decrease = 0.001,				# 0.1% of objective
			N = 10	
		),

		# valid_objective is MSE

		monitoring_dataset = { 'train': full, 'valid': valid }
	)

	watcher = best_params.MonitorBasedSaveBest( channel_name = 'valid_objective', save_path = output_model_file )
	
	experiment = Train( dataset = full, model = mdl, algorithm = trainer, extensions = [ watcher ] )
	experiment.main_loop()

	###

	error = get_error_from_model( output_model_file )
	print "*** error: {} ***".format( error )
	return error
	
	
Пример #12
0
from pylearn2.train_extensions import best_params
from pylearn2.space import VectorSpace
import pickle
import numpy as np

def to_one_hot(l):
    out = np.zeros((len(l), len(set(l))))
    for n, i in enumerate(l):
        out[n, i] = 1.
    return out

x = pickle.load(open('saved_x.pkl', 'rb'))
y = pickle.load(open('saved_y.pkl', 'rb'))
y = to_one_hot(y)
in_space = VectorSpace(dim=x.shape[1])
full = DenseDesignMatrix(X=x, y=y)

l1 = mlp.RectifiedLinear(layer_name='l1',
                         sparse_init=12,
                         dim=5000,
                         max_col_norm=1.)

l2 = mlp.RectifiedLinear(layer_name='l2',
                         sparse_init=12,
                         dim=5000,
                         max_col_norm=1.)

l3 = mlp.RectifiedLinear(layer_name='l3',
                         sparse_init=12,
                         dim=5000,
                         max_col_norm=1.)