def trainDAE(target, dataPath, refSampleInd, trainIndex, relevantMarkers, mode, keepProb, denoise, loadModel, path): sourceX = [] for i in np.arange(trainIndex.size-1): sourceIndex = np.delete(trainIndex, refSampleInd)[i] source = dh.loadDeepCyTOFData(dataPath, sourceIndex, relevantMarkers, mode) numZerosOK=1 toKeepS = np.sum((source.X==0), axis = 1) <= numZerosOK if i == 0: sourceX = source.X[toKeepS] else: sourceX = np.concatenate([sourceX, source.X[toKeepS]], axis = 0) # preProcess source sourceX = np.log(1 + np.abs(sourceX)) numZerosOK=1 toKeepT = np.sum((target.X==0), axis = 1) <= numZerosOK inputDim = target.X.shape[1] ae_encodingDim = 25 l2_penalty_ae = 1e-2 if denoise: if loadModel: from keras.models import load_model autoencoder = load_model(os.path.join(io.DeepLearningRoot(), 'savemodels/' + path + '/denoisedAE.h5')) else: # train de-noising auto encoder and save it. trainTarget_ae = np.concatenate([sourceX, target.X[toKeepT]], axis=0) trainData_ae = trainTarget_ae * np.random.binomial(n=1, p=keepProb, size = trainTarget_ae.shape) input_cell = Input(shape=(inputDim,)) encoded = Dense(ae_encodingDim, activation='relu', W_regularizer=l2(l2_penalty_ae))(input_cell) encoded1 = Dense(ae_encodingDim, activation='relu', W_regularizer=l2(l2_penalty_ae))(encoded) decoded = Dense(inputDim, activation='linear', W_regularizer=l2(l2_penalty_ae))(encoded1) autoencoder = Model(input=input_cell, output=decoded) autoencoder.compile(optimizer='rmsprop', loss='mse') autoencoder.fit(trainData_ae, trainTarget_ae, nb_epoch=80, batch_size=128, shuffle=True, validation_split=0.1, verbose = 0, callbacks=[mn.monitor(), cb.EarlyStopping( monitor='val_loss', patience=25, mode='auto')]) autoencoder.save(os.path.join(io.DeepLearningRoot(), 'savemodels/' + path + '/denoisedAE.h5')) del sourceX plt.close('all') return autoencoder
def loadModel(target, source, sourceIndex, predLabel, path): mmdNetLayerSizes = [25, 25] l2_penalty = 1e-2 init = lambda shape, name: initializers.normal( shape, scale=.1e-4, name=name) space_dim = target.X.shape[1] calibInput = Input(shape=(space_dim, )) block1_bn1 = BatchNormalization()(calibInput) block1_a1 = Activation('relu')(block1_bn1) block1_w1 = Dense(mmdNetLayerSizes[0], activation='linear', W_regularizer=l2(l2_penalty), init=init)(block1_a1) block1_bn2 = BatchNormalization()(block1_w1) block1_a2 = Activation('relu')(block1_bn2) block1_w2 = Dense(space_dim, activation='linear', W_regularizer=l2(l2_penalty), init=init)(block1_a2) block1_output = merge([block1_w2, calibInput], mode='sum') block2_bn1 = BatchNormalization()(block1_output) block2_a1 = Activation('relu')(block2_bn1) block2_w1 = Dense(mmdNetLayerSizes[1], activation='linear', W_regularizer=l2(l2_penalty), init=init)(block2_a1) block2_bn2 = BatchNormalization()(block2_w1) block2_a2 = Activation('relu')(block2_bn2) block2_w2 = Dense(space_dim, activation='linear', W_regularizer=l2(l2_penalty), init=init)(block2_a2) block2_output = merge([block2_w2, block1_output], mode='sum') block3_bn1 = BatchNormalization()(block2_output) block3_a1 = Activation('relu')(block3_bn1) block3_w1 = Dense(mmdNetLayerSizes[1], activation='linear', W_regularizer=l2(l2_penalty), init=init)(block3_a1) block3_bn2 = BatchNormalization()(block3_w1) block3_a2 = Activation('relu')(block3_bn2) block3_w2 = Dense(space_dim, activation='linear', W_regularizer=l2(l2_penalty), init=init)(block3_a2) block3_output = merge([block3_w2, block2_output], mode='sum') calibMMDNet = Model(input=calibInput, output=block3_output) calibMMDNet.load_weights( os.path.join( io.DeepLearningRoot(), 'savemodels/' + path + '/ResNet' + str(sourceIndex) + '.h5')) return calibMMDNet
def loadDeepCyTOFData(dataPath, dataIndex, relevantMarkers, mode, skip_header=0): if mode == 'CSV.GZ': data_filename = dataPath + "/" + str( dataIndex) # I'm just going to give it the file name X = pd.read_csv(os.path.join(io.DeepLearningRoot(), data_filename)).to_numpy() # print(np.shape(X)) actual = pd.read_csv( os.path.join(io.DeepLearningRoot(), data_filename.replace("/x/", "/y/"))) labels = pd.DataFrame([0] * len(actual)) for aci in range(len(actual.columns)): labels[actual[actual.columns[aci]] == 1] = aci + 1 labels = [ item for sublist in labels.values.tolist() for item in sublist ] else: if mode == 'CSV': data_filename = dataPath + '/sample' + str(dataIndex) + '.csv' X = genfromtxt(os.path.join(io.DeepLearningRoot(), data_filename), delimiter=',', skip_header=skip_header) if mode == 'FCS': data_filename = dataPath + '/sample' + str(dataIndex) + '.fcs' _, X = fcsparser.parse(os.path.join(io.DeepLearningRoot(), data_filename), reformat_meta=True) X = X.as_matrix() label_filename = dataPath + '/labels' + str(dataIndex) + '.csv' labels = genfromtxt(os.path.join(io.DeepLearningRoot(), label_filename), delimiter=',') labels = np.int_(labels) X = X[:, relevantMarkers] sample = Sample(X, labels) return sample
def trainClassifier(trainSample, mode = 'None', i = 0, hiddenLayersSizes = [12, 6, 3], activation = 'softplus', l2_penalty = 1e-4, path = 'None'): # Remove unlabeled cells for training. x_train = trainSample.X[trainSample.y != 0] y_train = trainSample.y[trainSample.y != 0] # Labels start from 0. y_train = np.int_(y_train) - 1 # Special case in GvHD: label in those files are 0,1,3,4 with no 2. if mode == 'GvHD' and (i == 5 or i == 9 or i == 10 or i == 11): y_train[y_train != 0] = y_train[y_train != 0] - 1 # Expand labels, to work with sparse categorical cross entropy. y_train = np.expand_dims(y_train, -1) # Construct a feed-forward neural network. inputLayer = Input(shape = (x_train.shape[1],)) hidden1 = Dense(hiddenLayersSizes[0], activation = activation, kernel_regularizer = l2(l2_penalty))(inputLayer) hidden2 = Dense(hiddenLayersSizes[1], activation = activation, kernel_regularizer = l2(l2_penalty))(hidden1) hidden3 = Dense(hiddenLayersSizes[2], activation = activation, kernel_regularizer = l2(l2_penalty))(hidden2) # numClasses = len(np.unique(trainSample.y)) - 1 # with 0 class numClasses = len(np.unique(trainSample.y)) # without 0 class # numClasses = 57 # for HMIS-2 outputLayer = Dense(numClasses, activation = 'softmax')(hidden3) encoder = Model(inputs = inputLayer, outputs = outputLayer) net = Model(inputs = inputLayer, outputs = outputLayer) lrate = LearningRateScheduler(step_decay) optimizer = keras.optimizers.rmsprop(lr = 0.0) net.compile(optimizer = optimizer, loss = 'sparse_categorical_crossentropy') net.fit(x_train, y_train, epochs = 80, batch_size = 128, shuffle = True, validation_split = 0.1, verbose = 0, callbacks=[lrate, mn.monitor(), cb.EarlyStopping(monitor = 'val_loss', patience = 25, mode = 'auto')]) try: net.save(os.path.join(io.DeepLearningRoot(), 'savemodels/' + path + '/cellClassifier.h5')) except OSError: pass #plt.close('all') return net
def loadDeepCyTOFData(dataPath, dataIndex, relevantMarkers, mode, skip_header=0): if mode == 'CSV': data_filename = dataPath + '/sample' + str(dataIndex) + '.csv' X = genfromtxt(os.path.join(io.DeepLearningRoot(), data_filename), delimiter=',', skip_header=skip_header) if mode == 'FCS': data_filename = dataPath + '/sample' + str(dataIndex) + '.fcs' _, X = fcsparser.parse(os.path.join(io.DeepLearningRoot(), data_filename), reformat_meta=True) X = X.as_matrix() X = X[:, relevantMarkers] label_filename = dataPath + '/labels' + str(dataIndex) + '.csv' labels = genfromtxt(os.path.join(io.DeepLearningRoot(), label_filename), delimiter=',') labels = np.int_(labels) sample = Sample(X, labels) return sample
print('Train the de-noising auto encoder.') start = tm.time() DAE = dae.trainDAE(target, dataPath, refSampleInd, trainIndex, relevantMarkers, mode, keepProb, denoise, loadModel, dataSet[choice]) denoiseTarget = dae.predictDAE(target, DAE, denoise) ''' Train the feed-forward classifier on (de-noised) target. ''' denoiseTarget, preprocessor = dh.standard_scale(denoiseTarget, preprocessor = None) if loadModel: from keras.models import load_model cellClassifier = load_model(os.path.join(io.DeepLearningRoot(), 'savemodels/' + dataSet[choice] + '/cellClassifier.h5')) else: print('Train the classifier on de-noised Target') cellClassifier = net.trainClassifier(denoiseTarget, mode, refSampleInd, hiddenLayersSizes, activation, l2_penalty, dataSet[choice]) end = tm.time() print('Training time: ' + str(end - start)) ''' Test the performance with and without calibration. '''
def calibrate(target, source, sourceIndex, predLabel, path): mmdNetLayerSizes = [25, 25] l2_penalty = 1e-2 init = lambda shape, name:initializations.normal(shape, scale=.1e-4, name=name) space_dim = target.X.shape[1] calibInput = Input(shape=(space_dim,)) block1_bn1 = BatchNormalization()(calibInput) block1_a1 = Activation('relu')(block1_bn1) block1_w1 = Dense(mmdNetLayerSizes[0], activation='linear', W_regularizer=l2(l2_penalty), init = init)(block1_a1) block1_bn2 = BatchNormalization()(block1_w1) block1_a2 = Activation('relu')(block1_bn2) block1_w2 = Dense(space_dim, activation='linear', W_regularizer=l2(l2_penalty), init = init)(block1_a2) block1_output = merge([block1_w2, calibInput], mode = 'sum') block2_bn1 = BatchNormalization()(block1_output) block2_a1 = Activation('relu')(block2_bn1) block2_w1 = Dense(mmdNetLayerSizes[1], activation='linear', W_regularizer=l2(l2_penalty), init = init)(block2_a1) block2_bn2 = BatchNormalization()(block2_w1) block2_a2 = Activation('relu')(block2_bn2) block2_w2 = Dense(space_dim, activation='linear', W_regularizer=l2(l2_penalty), init = init)(block2_a2) block2_output = merge([block2_w2, block1_output], mode = 'sum') block3_bn1 = BatchNormalization()(block2_output) block3_a1 = Activation('relu')(block3_bn1) block3_w1 = Dense(mmdNetLayerSizes[1], activation='linear', W_regularizer=l2(l2_penalty), init = init)(block3_a1) block3_bn2 = BatchNormalization()(block3_w1) block3_a2 = Activation('relu')(block3_bn2) block3_w2 = Dense(space_dim, activation='linear', W_regularizer=l2(l2_penalty), init = init)(block3_a2) block3_output = merge([block3_w2, block2_output], mode = 'sum') calibMMDNet = Model(input=calibInput, output=block3_output) n = target.X.shape[0] p = np.random.permutation(n) toTake = p[range(int(.2*n))] targetXMMD = target.X[toTake] targetYMMD = target.y[toTake] targetXMMD = targetXMMD[targetYMMD!=0] targetYMMD = targetYMMD[targetYMMD!=0] targetYMMD = np.reshape(targetYMMD, (-1, 1)) n = source.X.shape[0] p = np.random.permutation(n) toTake = p[range(int(.2*n))] sourceXMMD = source.X[toTake] sourceYMMD = predLabel[toTake] sourceXMMD = sourceXMMD[sourceYMMD!=0] sourceYMMD = sourceYMMD[sourceYMMD!=0] sourceYMMD = np.reshape(sourceYMMD, (-1, 1)) lrate = LearningRateScheduler(step_decay) optimizer = opt.rmsprop(lr=0.0) calibMMDNet.compile(optimizer = optimizer, loss = lambda y_true,y_pred: cf.MMD(block3_output, targetXMMD, MMDTargetValidation_split = 0.1).KerasCost(y_true,y_pred)) sourceLabels = np.zeros(sourceXMMD.shape[0]) calibMMDNet.fit(sourceXMMD,sourceLabels,nb_epoch=500, batch_size=1000,validation_split=0.1,verbose=0, callbacks=[lrate,mn.monitorMMD(sourceXMMD, sourceYMMD, targetXMMD, targetYMMD, calibMMDNet.predict), cb.EarlyStopping(monitor='val_loss',patience=20,mode='auto')]) plt.close('all') calibMMDNet.save_weights(os.path.join(io.DeepLearningRoot(), 'savemodels/' + path + '/ResNet'+ str(sourceIndex)+'.h5')) calibrateSource = Sample(calibMMDNet.predict(source.X), source.y) calibMMDNet = None return calibrateSource
def plotHidden(trainSample, testSample, mode = 'None', i = 0, hiddenLayersSizes = [12, 6, 3], activation = 'softplus', l2_penalty = 1e-4, path = 'None'): # Remove unlabeled cells for training. x_train = trainSample.X[trainSample.y != 0] y_train = trainSample.y[trainSample.y != 0] x_test = testSample.X[testSample.y != 0] y_test = testSample.y[testSample.y != 0] # Labels start from 0. y_train = np.int_(y_train) - 1 y_test = np.int_(y_test) - 1 # Special case in GvHD: label in those files are 0,1,3,4 with no 2. if mode == 'GvHD' and (i == 5 or i == 9 or i == 10 or i == 11): y_train[y_train != 0] = y_train[y_train != 0] - 1 # Expand labels, to work with sparse categorical cross entropy. y_train = np.expand_dims(y_train, -1) y_test = np.expand_dims(y_test, -1) # Construct a feed-forward neural network. inputLayer = Input(shape = (x_train.shape[1],)) hidden1 = Dense(hiddenLayersSizes[0], activation = activation, W_regularizer = l2(l2_penalty))(inputLayer) hidden2 = Dense(hiddenLayersSizes[1], activation = activation, W_regularizer = l2(l2_penalty))(hidden1) hidden3 = Dense(hiddenLayersSizes[2], activation = activation, W_regularizer = l2(l2_penalty))(hidden2) numClasses = len(np.unique(trainSample.y)) - 1 outputLayer = Dense(numClasses, activation = 'softmax')(hidden3) encoder = Model(input = inputLayer, output = hidden3) # plot data in the 3rd hidden layer h3_data = encoder.predict(x_test, verbose = 0) #fig, (ax1) = plt1.subplots(1,1, subplot_kw={'projection':'3d'}) #ax1.scatter(h3_data[:,0], h3_data[:,1], h3_data[:,2], s = 20, c = np.squeeze(y_test)) fig = plt1.figure() ax = fig.add_subplot(111, projection = '3d') ax.scatter(h3_data[:,0], h3_data[:,1], h3_data[:,2], s = 20, c = np.squeeze(y_test)) #ax1.set_title('data in 3rd hidden layer') plt1.show() net = Model(input = inputLayer, output = outputLayer) lrate = LearningRateScheduler(step_decay) optimizer = keras.optimizers.rmsprop(lr = 0.0) net.compile(optimizer = optimizer, loss = 'sparse_categorical_crossentropy') net.fit(x_train, y_train, nb_epoch = 80, batch_size = 128, shuffle = True, validation_split = 0.1, verbose = 0, callbacks=[lrate, mn.monitor(), cb.EarlyStopping(monitor = 'val_loss', patience = 25, mode = 'auto')]) try: net.save(os.path.join(io.DeepLearningRoot(), 'savemodels/' + path + '/cellClassifier.h5')) except OSError: pass
Train the de-noising auto encoder. ''' print('Train the de-noising auto encoder.') DAE = dae.trainDAE(target, dataPath, refSampleInd, trainIndex, relevantMarkers, mode, keepProb, denoise, loadModel, dataSet[choice]) denoiseTarget = dae.predictDAE(target, DAE, denoise) ''' Train the feed-forward classifier on (de-noised) target. ''' denoiseTarget, preprocessor = dh.standard_scale(denoiseTarget, preprocessor=None) if loadModel: from keras.models import load_model cellClassifier = load_model( os.path.join(io.DeepLearningRoot(), 'savemodels/' + dataSet[choice] + '/cellClassifier.h5')) else: print('Train the classifier on de-noised Target') cellClassifier = net.trainClassifier(denoiseTarget, mode, refSampleInd, hiddenLayersSizes, activation, l2_penalty, dataSet[choice]) ''' Test the performance with and without calibration. ''' # Generate the output table. dim = 2 if isCalibrate else 1 acc = np.zeros((testIndex.size, dim), np.float16) F1 = np.zeros((testIndex.size, dim), np.float16) mmd_before = np.zeros(testIndex.size) mmd_after = np.zeros(testIndex.size)
activation = 'softplus' l2_penalty = 1e-4 ''' The user needs to specify the data set to run the cell classifier. Make your choice here - an integer from 0 to 4. 0: NDD 1: CFSE 2: StemCell 3: Lymph 4: GvHD ''' choice = 4 # Generate the path of the chosen data set. dataPath = os.path.join(io.DeepLearningRoot(), 'Data/FlowCAP-I/', dataSet[choice]) # Generate the output table. acc = np.zeros(numSample[choice]) F1 = np.zeros(numSample[choice]) ''' For each single sample of the chosen data set, train a feed-forward neural net classifier using 25% of cells, and test the performance using the rest 75% of cells. ''' print('Data set name: ', dataSet[choice]) for i in range(numSample[choice]): # Load sample. print('Load sample ', str(i + 1)) sample = dh.loadDeepCyTOFData(dataPath,