def trainDAE(target, dataPath, refSampleInd, trainIndex, relevantMarkers, mode, keepProb, denoise, loadModel, path): sourceX = [] for i in np.arange(trainIndex.size-1): sourceIndex = np.delete(trainIndex, refSampleInd)[i] source = dh.loadDeepCyTOFData(dataPath, sourceIndex, relevantMarkers, mode) numZerosOK=1 toKeepS = np.sum((source.X==0), axis = 1) <= numZerosOK if i == 0: sourceX = source.X[toKeepS] else: sourceX = np.concatenate([sourceX, source.X[toKeepS]], axis = 0) # preProcess source sourceX = np.log(1 + np.abs(sourceX)) numZerosOK=1 toKeepT = np.sum((target.X==0), axis = 1) <= numZerosOK inputDim = target.X.shape[1] ae_encodingDim = 25 l2_penalty_ae = 1e-2 if denoise: if loadModel: from keras.models import load_model autoencoder = load_model(os.path.join(io.DeepLearningRoot(), path + '/denoisedAE.h5')) else: # train de-noising auto encoder and save it. trainTarget_ae = np.concatenate([sourceX, target.X[toKeepT]], axis=0) trainData_ae = trainTarget_ae * np.random.binomial(n=1, p=keepProb, size = trainTarget_ae.shape) input_cell = Input(shape=(inputDim,)) encoded = Dense(ae_encodingDim, activation='relu', W_regularizer=l2(l2_penalty_ae))(input_cell) encoded1 = Dense(ae_encodingDim, activation='relu', W_regularizer=l2(l2_penalty_ae))(encoded) decoded = Dense(inputDim, activation='linear', W_regularizer=l2(l2_penalty_ae))(encoded1) autoencoder = Model(input=input_cell, output=decoded) autoencoder.compile(optimizer='rmsprop', loss='mse') autoencoder.fit(trainData_ae, trainTarget_ae, nb_epoch=80, batch_size=128, shuffle=True, validation_split=0.1, verbose = 0, callbacks=[mn.monitor(), cb.EarlyStopping( monitor='val_loss', patience=25, mode='auto')]) autoencoder.save(os.path.join(io.DeepLearningRoot(), path + '/denoisedAE.h5')) del sourceX plt.close('all') return autoencoder
def trainClassifier(trainSample, mode = 'None', i = 0, hiddenLayersSizes = [12, 6, 3], activation = 'softplus', l2_penalty = 1e-4, path = 'None'): # Remove unlabeled cells for training. x_train = trainSample.X[trainSample.y != 0] y_train = trainSample.y[trainSample.y != 0] # Labels start from 0. y_train = np.int_(y_train) - 1 # Special case in GvHD: label in those files are 0,1,3,4 with no 2. if mode == 'GvHD' and (i == 5 or i == 9 or i == 10 or i == 11): y_train[y_train != 0] = y_train[y_train != 0] - 1 # Expand labels, to work with sparse categorical cross entropy. y_train = np.expand_dims(y_train, -1) # Construct a feed-forward neural network. inputLayer = Input(shape = (x_train.shape[1],)) hidden1 = Dense(hiddenLayersSizes[0], activation = activation, kernel_regularizer = l2(l2_penalty))(inputLayer) hidden2 = Dense(hiddenLayersSizes[1], activation = activation, kernel_regularizer = l2(l2_penalty))(hidden1) hidden3 = Dense(hiddenLayersSizes[2], activation = activation, kernel_regularizer = l2(l2_penalty))(hidden2) # numClasses = len(np.unique(trainSample.y)) - 1 # with 0 class numClasses = len(np.unique(trainSample.y)) # without 0 class # numClasses = 57 # for HMIS-2 outputLayer = Dense(numClasses, activation = 'softmax')(hidden3) encoder = Model(inputs = inputLayer, outputs = outputLayer) net = Model(inputs = inputLayer, outputs = outputLayer) lrate = LearningRateScheduler(step_decay) optimizer = keras.optimizers.rmsprop(lr = 0.0) net.compile(optimizer = optimizer, loss = 'sparse_categorical_crossentropy') net.fit(x_train, y_train, epochs = 80, batch_size = 128, shuffle = True, validation_split = 0.1, verbose = 0, callbacks=[lrate, mn.monitor(), cb.EarlyStopping(monitor = 'val_loss', patience = 25, mode = 'auto')]) try: net.save(os.path.join(io.DeepLearningRoot(), 'savemodels/' + path + '/cellClassifier.h5')) except OSError: pass #plt.close('all') return net
def calibrate(target, source, sourceIndex, predLabel, path): mmdNetLayerSizes = [25, 25] l2_penalty = 1e-2 #init = lambda shape:initializers.normal(shape, scale=.1e-4) space_dim = target.X.shape[1] calibInput = Input(shape=(space_dim, )) block1_bn1 = BatchNormalization()(calibInput) block1_a1 = Activation('relu')(block1_bn1) block1_w1 = Dense(mmdNetLayerSizes[0], activation='linear', W_regularizer=l2(l2_penalty), init='random_uniform')(block1_a1) block1_bn2 = BatchNormalization()(block1_w1) block1_a2 = Activation('relu')(block1_bn2) block1_w2 = Dense(space_dim, activation='linear', W_regularizer=l2(l2_penalty), init='random_uniform')(block1_a2) block1_output = Add()([block1_w2, calibInput]) block2_bn1 = BatchNormalization()(block1_output) block2_a1 = Activation('relu')(block2_bn1) block2_w1 = Dense(mmdNetLayerSizes[1], activation='linear', W_regularizer=l2(l2_penalty), init='random_uniform')(block2_a1) block2_bn2 = BatchNormalization()(block2_w1) block2_a2 = Activation('relu')(block2_bn2) block2_w2 = Dense(space_dim, activation='linear', W_regularizer=l2(l2_penalty), init='random_uniform')(block2_a2) block2_output = Add()([block2_w2, block1_output]) block3_bn1 = BatchNormalization()(block2_output) block3_a1 = Activation('relu')(block3_bn1) block3_w1 = Dense(mmdNetLayerSizes[1], activation='linear', W_regularizer=l2(l2_penalty), init='random_uniform')(block3_a1) block3_bn2 = BatchNormalization()(block3_w1) block3_a2 = Activation('relu')(block3_bn2) block3_w2 = Dense(space_dim, activation='linear', W_regularizer=l2(l2_penalty), init='random_uniform')(block3_a2) block3_output = Add()([block3_w2, block2_output]) calibMMDNet = Model(input=calibInput, output=block3_output) n = target.X.shape[0] p = np.random.permutation(n) toTake = p[range(int(.2 * n))] targetXMMD = target.X[toTake] targetYMMD = target.y[toTake] targetXMMD = targetXMMD[targetYMMD != 0] targetYMMD = targetYMMD[targetYMMD != 0] targetYMMD = np.reshape(targetYMMD, (-1, 1)) n = source.X.shape[0] p = np.random.permutation(n) toTake = p[range(int(.2 * n))] sourceXMMD = source.X[toTake] sourceYMMD = predLabel[toTake] sourceXMMD = sourceXMMD[sourceYMMD != 0] sourceYMMD = sourceYMMD[sourceYMMD != 0] sourceYMMD = np.reshape(sourceYMMD, (-1, 1)) lrate = LearningRateScheduler(step_decay) optimizer = opt.rmsprop(lr=0.0) calibMMDNet.compile( optimizer=optimizer, loss=lambda y_true, y_pred: cf.MMD( block3_output, targetXMMD, MMDTargetValidation_split=0.1). KerasCost(y_true, y_pred)) sourceLabels = np.zeros(sourceXMMD.shape[0]) calibMMDNet.fit(sourceXMMD, sourceLabels, nb_epoch=500, batch_size=1000, validation_split=0.1, verbose=0, callbacks=[ lrate, mn.monitorMMD(sourceXMMD, sourceYMMD, targetXMMD, targetYMMD, calibMMDNet.predict), cb.EarlyStopping(monitor='val_loss', patience=20, mode='auto') ]) plt.close('all') calibMMDNet.save_weights( os.path.join( io.DeepLearningRoot(), 'savemodels/' + path + '/ResNet' + str(sourceIndex) + '.h5')) calibrateSource = Sample(calibMMDNet.predict(source.X), source.y) calibMMDNet = None return calibrateSource
def plotHidden(trainSample, testSample, mode = 'None', i = 0, hiddenLayersSizes = [12, 6, 3], activation = 'softplus', l2_penalty = 1e-4, path = 'None'): # Remove unlabeled cells for training. x_train = trainSample.X[trainSample.y != 0] y_train = trainSample.y[trainSample.y != 0] x_test = testSample.X[testSample.y != 0] y_test = testSample.y[testSample.y != 0] # Labels start from 0. y_train = np.int_(y_train) - 1 y_test = np.int_(y_test) - 1 # Special case in GvHD: label in those files are 0,1,3,4 with no 2. if mode == 'GvHD' and (i == 5 or i == 9 or i == 10 or i == 11): y_train[y_train != 0] = y_train[y_train != 0] - 1 # Expand labels, to work with sparse categorical cross entropy. y_train = np.expand_dims(y_train, -1) y_test = np.expand_dims(y_test, -1) # Construct a feed-forward neural network. inputLayer = Input(shape = (x_train.shape[1],)) hidden1 = Dense(hiddenLayersSizes[0], activation = activation, W_regularizer = l2(l2_penalty))(inputLayer) hidden2 = Dense(hiddenLayersSizes[1], activation = activation, W_regularizer = l2(l2_penalty))(hidden1) hidden3 = Dense(hiddenLayersSizes[2], activation = activation, W_regularizer = l2(l2_penalty))(hidden2) numClasses = len(np.unique(trainSample.y)) - 1 outputLayer = Dense(numClasses, activation = 'softmax')(hidden3) encoder = Model(input = inputLayer, output = hidden3) # plot data in the 3rd hidden layer h3_data = encoder.predict(x_test, verbose = 0) #fig, (ax1) = plt1.subplots(1,1, subplot_kw={'projection':'3d'}) #ax1.scatter(h3_data[:,0], h3_data[:,1], h3_data[:,2], s = 20, c = np.squeeze(y_test)) fig = plt1.figure() ax = fig.add_subplot(111, projection = '3d') ax.scatter(h3_data[:,0], h3_data[:,1], h3_data[:,2], s = 20, c = np.squeeze(y_test)) #ax1.set_title('data in 3rd hidden layer') plt1.show() net = Model(input = inputLayer, output = outputLayer) lrate = LearningRateScheduler(step_decay) optimizer = keras.optimizers.rmsprop(lr = 0.0) net.compile(optimizer = optimizer, loss = 'sparse_categorical_crossentropy') net.fit(x_train, y_train, nb_epoch = 80, batch_size = 128, shuffle = True, validation_split = 0.1, verbose = 0, callbacks=[lrate, mn.monitor(), cb.EarlyStopping(monitor = 'val_loss', patience = 25, mode = 'auto')]) try: net.save(os.path.join(io.DeepLearningRoot(), 'savemodels/' + path + '/cellClassifier.h5')) except OSError: pass