Пример #1
0
def trainDAE(target, dataPath, refSampleInd, trainIndex, relevantMarkers, mode,
             keepProb, denoise, loadModel, path):
    sourceX = []
    for i in np.arange(trainIndex.size-1):
        sourceIndex = np.delete(trainIndex, refSampleInd)[i]
        source = dh.loadDeepCyTOFData(dataPath, sourceIndex,
                                      relevantMarkers, mode)
        numZerosOK=1
        toKeepS = np.sum((source.X==0), axis = 1) <= numZerosOK
        if i == 0:
            sourceX = source.X[toKeepS]
        else:
            sourceX = np.concatenate([sourceX, source.X[toKeepS]], axis = 0)
        
    # preProcess source
    sourceX = np.log(1 + np.abs(sourceX))
    
    numZerosOK=1
    toKeepT = np.sum((target.X==0), axis = 1) <= numZerosOK
    
    inputDim = target.X.shape[1]
    
    ae_encodingDim = 25
    l2_penalty_ae = 1e-2
    
    if denoise:
        if loadModel:
            from keras.models import load_model
            autoencoder = load_model(os.path.join(io.DeepLearningRoot(), path + '/denoisedAE.h5'))
        else:
            # train de-noising auto encoder and save it.
            trainTarget_ae = np.concatenate([sourceX, target.X[toKeepT]],
                                            axis=0)
            trainData_ae = trainTarget_ae * np.random.binomial(n=1, p=keepProb,
                                                size = trainTarget_ae.shape)
        
            input_cell = Input(shape=(inputDim,))
            encoded = Dense(ae_encodingDim, activation='relu',
                            W_regularizer=l2(l2_penalty_ae))(input_cell)
            encoded1 = Dense(ae_encodingDim, activation='relu',
                             W_regularizer=l2(l2_penalty_ae))(encoded)
            decoded = Dense(inputDim, activation='linear',
                            W_regularizer=l2(l2_penalty_ae))(encoded1)
        
            autoencoder = Model(input=input_cell, output=decoded)
            autoencoder.compile(optimizer='rmsprop', loss='mse')
            autoencoder.fit(trainData_ae, trainTarget_ae, nb_epoch=80,
                            batch_size=128, shuffle=True,
                            validation_split=0.1, verbose = 0,
                            callbacks=[mn.monitor(), cb.EarlyStopping(
                            monitor='val_loss', patience=25,  mode='auto')])
            autoencoder.save(os.path.join(io.DeepLearningRoot(), path + '/denoisedAE.h5'))
            del sourceX
            plt.close('all')
        
        return autoencoder
def trainClassifier(trainSample, mode = 'None', i = 0,
                    hiddenLayersSizes = [12, 6, 3],
                    activation = 'softplus', l2_penalty = 1e-4,
                    path = 'None'):
    # Remove unlabeled cells for training.
    x_train = trainSample.X[trainSample.y != 0]
    y_train = trainSample.y[trainSample.y != 0]
    
    # Labels start from 0.
    y_train = np.int_(y_train) - 1

    
    # Special case in GvHD: label in those files are 0,1,3,4 with no 2.
    if mode == 'GvHD' and (i == 5 or i == 9 or 
                           i == 10 or i == 11):
        y_train[y_train != 0] = y_train[y_train != 0] - 1

    # Expand labels, to work with sparse categorical cross entropy.
    y_train = np.expand_dims(y_train, -1)
    
    # Construct a feed-forward neural network.
    inputLayer = Input(shape = (x_train.shape[1],))
    hidden1 = Dense(hiddenLayersSizes[0], activation = activation,
                    kernel_regularizer = l2(l2_penalty))(inputLayer)
    hidden2 = Dense(hiddenLayersSizes[1], activation = activation,
                    kernel_regularizer = l2(l2_penalty))(hidden1)
    hidden3 = Dense(hiddenLayersSizes[2], activation = activation,
                    kernel_regularizer = l2(l2_penalty))(hidden2)
#    numClasses = len(np.unique(trainSample.y)) - 1   # with 0 class
    numClasses = len(np.unique(trainSample.y))       # without 0 class
#    numClasses = 57                                   # for HMIS-2
    outputLayer = Dense(numClasses, activation = 'softmax')(hidden3)
    
    encoder = Model(inputs = inputLayer, outputs = outputLayer)
    net = Model(inputs = inputLayer, outputs = outputLayer)
    lrate = LearningRateScheduler(step_decay)
    optimizer = keras.optimizers.rmsprop(lr = 0.0)

    net.compile(optimizer = optimizer, 
                loss = 'sparse_categorical_crossentropy')
    net.fit(x_train, y_train, epochs = 80, batch_size = 128, shuffle = True,
            validation_split = 0.1, verbose = 0, 
            callbacks=[lrate, mn.monitor(),
            cb.EarlyStopping(monitor = 'val_loss',
                             patience = 25, mode = 'auto')])
    try:
        net.save(os.path.join(io.DeepLearningRoot(),
                              'savemodels/' + path + '/cellClassifier.h5'))
    except OSError:
        pass
    #plt.close('all')
    
    return net
def plotHidden(trainSample, testSample, mode = 'None', i = 0,
                    hiddenLayersSizes = [12, 6, 3],
                    activation = 'softplus', l2_penalty = 1e-4,
                    path = 'None'):
    # Remove unlabeled cells for training.
    x_train = trainSample.X[trainSample.y != 0]
    y_train = trainSample.y[trainSample.y != 0]
    x_test = testSample.X[testSample.y != 0]
    y_test = testSample.y[testSample.y != 0]
    
    # Labels start from 0.
    y_train = np.int_(y_train) - 1
    y_test = np.int_(y_test) - 1

    
    # Special case in GvHD: label in those files are 0,1,3,4 with no 2.
    if mode == 'GvHD' and (i == 5 or i == 9 or 
                           i == 10 or i == 11):
        y_train[y_train != 0] = y_train[y_train != 0] - 1

    # Expand labels, to work with sparse categorical cross entropy.
    y_train = np.expand_dims(y_train, -1)
    y_test = np.expand_dims(y_test, -1)
    
    # Construct a feed-forward neural network.
    inputLayer = Input(shape = (x_train.shape[1],))
    hidden1 = Dense(hiddenLayersSizes[0], activation = activation,
                    W_regularizer = l2(l2_penalty))(inputLayer)
    hidden2 = Dense(hiddenLayersSizes[1], activation = activation,
                    W_regularizer = l2(l2_penalty))(hidden1)
    hidden3 = Dense(hiddenLayersSizes[2], activation = activation,
                    W_regularizer = l2(l2_penalty))(hidden2)
    numClasses = len(np.unique(trainSample.y)) - 1
    outputLayer = Dense(numClasses, activation = 'softmax')(hidden3)
    
    encoder = Model(input = inputLayer, output = hidden3)
    # plot data in the 3rd hidden layer
    h3_data = encoder.predict(x_test, verbose = 0)
    #fig, (ax1) = plt1.subplots(1,1, subplot_kw={'projection':'3d'})
    #ax1.scatter(h3_data[:,0], h3_data[:,1], h3_data[:,2], s = 20, c = np.squeeze(y_test))
    
    fig = plt1.figure()
    ax = fig.add_subplot(111, projection = '3d')
    ax.scatter(h3_data[:,0], h3_data[:,1], h3_data[:,2], s = 20, c = np.squeeze(y_test))
    #ax1.set_title('data in 3rd hidden layer')
    plt1.show()
    
    net = Model(input = inputLayer, output = outputLayer)
    lrate = LearningRateScheduler(step_decay)
    optimizer = keras.optimizers.rmsprop(lr = 0.0)

    net.compile(optimizer = optimizer, 
                loss = 'sparse_categorical_crossentropy')
    net.fit(x_train, y_train, nb_epoch = 80, batch_size = 128, shuffle = True,
            validation_split = 0.1, verbose = 0, 
            callbacks=[lrate, mn.monitor(),
            cb.EarlyStopping(monitor = 'val_loss',
                             patience = 25, mode = 'auto')])
    try:
        net.save(os.path.join(io.DeepLearningRoot(),
                              'savemodels/' + path + '/cellClassifier.h5'))
    except OSError:
        pass