def trainDAE(target, dataPath, refSampleInd, trainIndex, relevantMarkers, mode, keepProb, denoise, loadModel, path): sourceX = [] for i in np.arange(trainIndex.size-1): sourceIndex = np.delete(trainIndex, refSampleInd)[i] source = dh.loadDeepCyTOFData(dataPath, sourceIndex, relevantMarkers, mode) numZerosOK=1 toKeepS = np.sum((source.X==0), axis = 1) <= numZerosOK if i == 0: sourceX = source.X[toKeepS] else: sourceX = np.concatenate([sourceX, source.X[toKeepS]], axis = 0) # preProcess source sourceX = np.log(1 + np.abs(sourceX)) numZerosOK=1 toKeepT = np.sum((target.X==0), axis = 1) <= numZerosOK inputDim = target.X.shape[1] ae_encodingDim = 25 l2_penalty_ae = 1e-2 if denoise: if loadModel: from keras.models import load_model autoencoder = load_model(os.path.join(io.DeepLearningRoot(), path + '/denoisedAE.h5')) else: # train de-noising auto encoder and save it. trainTarget_ae = np.concatenate([sourceX, target.X[toKeepT]], axis=0) trainData_ae = trainTarget_ae * np.random.binomial(n=1, p=keepProb, size = trainTarget_ae.shape) input_cell = Input(shape=(inputDim,)) encoded = Dense(ae_encodingDim, activation='relu', W_regularizer=l2(l2_penalty_ae))(input_cell) encoded1 = Dense(ae_encodingDim, activation='relu', W_regularizer=l2(l2_penalty_ae))(encoded) decoded = Dense(inputDim, activation='linear', W_regularizer=l2(l2_penalty_ae))(encoded1) autoencoder = Model(input=input_cell, output=decoded) autoencoder.compile(optimizer='rmsprop', loss='mse') autoencoder.fit(trainData_ae, trainTarget_ae, nb_epoch=80, batch_size=128, shuffle=True, validation_split=0.1, verbose = 0, callbacks=[mn.monitor(), cb.EarlyStopping( monitor='val_loss', patience=25, mode='auto')]) autoencoder.save(os.path.join(io.DeepLearningRoot(), path + '/denoisedAE.h5')) del sourceX plt.close('all') return autoencoder
def trainClassifier(trainSample, mode = 'None', i = 0, hiddenLayersSizes = [12, 6, 3], activation = 'softplus', l2_penalty = 1e-4, path = 'None'): # Remove unlabeled cells for training. x_train = trainSample.X[trainSample.y != 0] y_train = trainSample.y[trainSample.y != 0] # Labels start from 0. y_train = np.int_(y_train) - 1 # Special case in GvHD: label in those files are 0,1,3,4 with no 2. if mode == 'GvHD' and (i == 5 or i == 9 or i == 10 or i == 11): y_train[y_train != 0] = y_train[y_train != 0] - 1 # Expand labels, to work with sparse categorical cross entropy. y_train = np.expand_dims(y_train, -1) # Construct a feed-forward neural network. inputLayer = Input(shape = (x_train.shape[1],)) hidden1 = Dense(hiddenLayersSizes[0], activation = activation, kernel_regularizer = l2(l2_penalty))(inputLayer) hidden2 = Dense(hiddenLayersSizes[1], activation = activation, kernel_regularizer = l2(l2_penalty))(hidden1) hidden3 = Dense(hiddenLayersSizes[2], activation = activation, kernel_regularizer = l2(l2_penalty))(hidden2) # numClasses = len(np.unique(trainSample.y)) - 1 # with 0 class numClasses = len(np.unique(trainSample.y)) # without 0 class # numClasses = 57 # for HMIS-2 outputLayer = Dense(numClasses, activation = 'softmax')(hidden3) encoder = Model(inputs = inputLayer, outputs = outputLayer) net = Model(inputs = inputLayer, outputs = outputLayer) lrate = LearningRateScheduler(step_decay) optimizer = keras.optimizers.rmsprop(lr = 0.0) net.compile(optimizer = optimizer, loss = 'sparse_categorical_crossentropy') net.fit(x_train, y_train, epochs = 80, batch_size = 128, shuffle = True, validation_split = 0.1, verbose = 0, callbacks=[lrate, mn.monitor(), cb.EarlyStopping(monitor = 'val_loss', patience = 25, mode = 'auto')]) try: net.save(os.path.join(io.DeepLearningRoot(), 'savemodels/' + path + '/cellClassifier.h5')) except OSError: pass #plt.close('all') return net
def plotHidden(trainSample, testSample, mode = 'None', i = 0, hiddenLayersSizes = [12, 6, 3], activation = 'softplus', l2_penalty = 1e-4, path = 'None'): # Remove unlabeled cells for training. x_train = trainSample.X[trainSample.y != 0] y_train = trainSample.y[trainSample.y != 0] x_test = testSample.X[testSample.y != 0] y_test = testSample.y[testSample.y != 0] # Labels start from 0. y_train = np.int_(y_train) - 1 y_test = np.int_(y_test) - 1 # Special case in GvHD: label in those files are 0,1,3,4 with no 2. if mode == 'GvHD' and (i == 5 or i == 9 or i == 10 or i == 11): y_train[y_train != 0] = y_train[y_train != 0] - 1 # Expand labels, to work with sparse categorical cross entropy. y_train = np.expand_dims(y_train, -1) y_test = np.expand_dims(y_test, -1) # Construct a feed-forward neural network. inputLayer = Input(shape = (x_train.shape[1],)) hidden1 = Dense(hiddenLayersSizes[0], activation = activation, W_regularizer = l2(l2_penalty))(inputLayer) hidden2 = Dense(hiddenLayersSizes[1], activation = activation, W_regularizer = l2(l2_penalty))(hidden1) hidden3 = Dense(hiddenLayersSizes[2], activation = activation, W_regularizer = l2(l2_penalty))(hidden2) numClasses = len(np.unique(trainSample.y)) - 1 outputLayer = Dense(numClasses, activation = 'softmax')(hidden3) encoder = Model(input = inputLayer, output = hidden3) # plot data in the 3rd hidden layer h3_data = encoder.predict(x_test, verbose = 0) #fig, (ax1) = plt1.subplots(1,1, subplot_kw={'projection':'3d'}) #ax1.scatter(h3_data[:,0], h3_data[:,1], h3_data[:,2], s = 20, c = np.squeeze(y_test)) fig = plt1.figure() ax = fig.add_subplot(111, projection = '3d') ax.scatter(h3_data[:,0], h3_data[:,1], h3_data[:,2], s = 20, c = np.squeeze(y_test)) #ax1.set_title('data in 3rd hidden layer') plt1.show() net = Model(input = inputLayer, output = outputLayer) lrate = LearningRateScheduler(step_decay) optimizer = keras.optimizers.rmsprop(lr = 0.0) net.compile(optimizer = optimizer, loss = 'sparse_categorical_crossentropy') net.fit(x_train, y_train, nb_epoch = 80, batch_size = 128, shuffle = True, validation_split = 0.1, verbose = 0, callbacks=[lrate, mn.monitor(), cb.EarlyStopping(monitor = 'val_loss', patience = 25, mode = 'auto')]) try: net.save(os.path.join(io.DeepLearningRoot(), 'savemodels/' + path + '/cellClassifier.h5')) except OSError: pass