示例#1
0
def main() :

    if hp.size_stocks == 0 :
        
        data = np.load(hp.path_dir + '/' + hp.code +'_from_2010.npy')
        batch_gen = utils.single_stock_generator(data, hp.batch_size, hp.M, hp.N, 
                                                hp.seq_len, hp.n_features, hp.num_stock_size)
    else :
        batch_gen = utils.multi_stock_generator(hp.path_dir, hp.batch_size, hp.M, hp.N,
                                    hp.seq_len, hp.n_features, hp.num_stock_size)

    print("Stock sequence length is {}, and stock size is {}.".format(hp.seq_len, hp.num_stock_size))

    if hp.model_type == "gan" :
        import gan
        if not hp.size_stocks == 1 :
            print("Sorry, gan model work only with multi stocks.")
            import sys; sys.exit()
        gan.gan_train_step(batch_gen, hp.num_epochs, hp.num_stock_size, hp.num_iters, hp.M, hp.N, hp.T)
    else :
        import dnn
        model = dnn.train_step(batch_gen, hp.num_epochs, hp.num_stock_size, hp.num_iters)


    if hp._savehistory :
        num_test_iters = hp.num_iters // 4
        utils.saveHistory(batch_gen, model, 
                            M=hp.M, num_stock_size=hp.num_stock_size,
                            num_test_iters=num_test_iters)

    print("All training step is done.")
示例#2
0
def trainIntermediateModel(modelFile,name,xTrain,yTrain,yTrainInd,xTest,yTest,yTestInd,optimizer,epochs,batchSize,bestModelMode='history'):
    '''
    Train an existing model using different learning rate and optimizer.
    
    Input parameters:
    modelFile : (str) name of the intermediate model that needs to be
        trained again
    name : (str) Name that the user assigns to the model. All the
        intermediate and best models are saved with this prefix.
    xTrain : training dataset stack represented as a 4D numpy array.
        xTrain.shape yields [N, row, col, channel] where N is the number
        of images in the training dataset, row and col correspond to the
        size of the image, and channel is 1 for this model.
    yTrain : 1D array of labels. 0 is collapsed, 1 is upright.
    yTrainInd : 2D indicator array for the training dataset.
    xTest : 4D test dataset.
    yTest : 1D array of labels for the test dataset.
    yTestInd: 2D indicator array for the test dataset.
    epochs : (int) number of iterations for which the training needs to
        be done.
    batchSize : (int) number of images to use in every batch.
    bestModelMode : method for selecting the best model. Available
        options are 'history',and  'all'. On choosing 'history' the best
        model is selected using the training history dictionary. If
        'all' the best model is selected by testing the model accuracy
        for the entire dataset - training + test data.
    '''
    model = keras.models.load_model('../model/'+modelFile+'.h5')
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    callbacks_list = [keras.callbacks.ModelCheckpoint('../model/'+name+'_intermediate_{epoch:03d}.h5',monitor='val_accuracy',verbose=0,save_best_only=False,mode='auto',save_freq='epoch')]
    print(model.summary())
    
    history = model.fit(xTrain,yTrainInd,epochs=epochs,batch_size=batchSize,validation_data=(xTest,yTestInd),callbacks=callbacks_list)
    plotFileName = '../model/'+name+'_epochs_%d_batchsize_%d_trainAcc_%.2f_testAcc_%.2f.png' %(epochs,batchSize,history.history['accuracy'][-1]*100,history.history['val_accuracy'][-1]*100)
    modelFileName = '../model/'+name+'_epochs_%d_batchsize_%d_trainAcc_%.2f_testAcc_%.2f.h5' %(epochs,batchSize,history.history['accuracy'][-1]*100,history.history['val_accuracy'][-1]*100)
    historyFileName = plotFileName.replace('.png','.dat')
    utils.saveHistory(historyFileName,history)
    model.save(modelFileName)
    plot.plotMetrics(plotFileName,history)
    keras.backend.clear_session()
    
    modelFileList = []
    for epoch in range(1,epochs+1):
        modelFileList.append('../model/'+name+'_intermediate_'+str(epoch).zfill(3)+'.h5')
    if (bestModelMode=='histroy'):
        utils.selectBestModelHistory(modelFileList,historyFileName)
    elif (bestModelMode=='all'):
        utils.selectBestModel(modelFileList,xTrain,yTrainInd,xTest,yTestInd)
lr_schedule = LearningRateScheduler(lambda epoch: LEARNING_RATE *
                                    (0.2**int(epoch / 10)))

check_point = ModelCheckpoint('{epoch:02d}_{val_acc:.4f}.h5',
                              monitor='val_acc')

## Train
model = build_model()
history = model.fit(
    [train_x1, train_x2],
    train_y,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    verbose=1,
    validation_data=([valid_x1, valid_x2], valid_y),
    callbacks=[auc, stop, csv, reduce_lr, lr_schedule, check_point],
    shuffle=True,
    class_weight=class_weights)

# save Model
model.save(MODEL_NAME + '.h5')
print('model saved')
# save history
HISTORY_FILE = MODEL_NAME + '.pkl'

saveHistory(history, auc, HISTORY_FILE)

print('Total time:', time.time() - start_time)
K.clear_session()
示例#4
0
def model_02(name,xTrain,yTrain,yTrainInd,xTest,yTest,yTestInd,epochs,batchSize):
    '''
    Referred to as the CNN model in the manuscript.
    The structure of the model is:
    
    CONV (32,5,5,SAME), RELU, CONV (32,5,5), RELU, MAXPOOL (2,2), DROPOUT (0.50)
    DENSE (256), RELU, DROPOUT (0.50)
    DENSE (128), RELU, DROPOUT (0.50)
    DENSE (2), SOFTMAX
    
    The model parameters are saved after every epoch. After the last
    epoch, the accuracy of all the intermediate models is tested and the
    most accuracte model is retained. All other intermediate models are
    removed.
    
    Input parameters:
    name : (str) Name that the user assigns to the model. All the
        intermediate and best models are saved with this prefix.
    xTrain : training dataset stack represented as a 4D numpy array.
        xTrain.shape yields [N, row, col, channel] where N is the number
        of images in the training dataset, row and col correspond to the
        size of the image, and channel is 1 for this model.
    yTrain : 1D array of labels. 0 is collapsed, 1 is upright.
    yTrainInd : 2D indicator array for the training dataset.
    xTest : 4D test dataset.
    yTest : 1D array of labels for the test dataset.
    yTestInd: 2D indicator array for the test dataset.
    epochs : (int) number of iterations for which the training needs to
        be done.
    batchSize : (int) number of images to use in every batch.
    '''
    optimizer = optimizers.SGD(learning_rate=0.01,momentum=0.99,nesterov=False)
    [N,row,col,channel] = xTrain.shape
    
    model = keras.Sequential()
    model.add(layers.Input(shape=(row,col,channel)))
    model.add(layers.Conv2D(32,kernel_size=(5,5),padding='same'))
    model.add(layers.Activation('relu'))
    model.add(layers.Conv2D(32,kernel_size=(5,5)))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling2D(pool_size=(2,2)))
    model.add(layers.Dropout(0.50))
    model.add(layers.Flatten())
    model.add(layers.Dense(256))
    model.add(layers.Activation('relu'))
    model.add(layers.Dropout(0.50))
    model.add(layers.Dense(128))
    model.add(layers.Activation('relu'))
    model.add(layers.Dropout(0.50))
    model.add(layers.Dense(2))
    model.add(layers.Activation('softmax'))
    
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    filepath='../model/'+name+'_intermediate_{epoch:03d}.h5'
    callbacks_list = [keras.callbacks.ModelCheckpoint('../model/'+name+'_intermediate_{epoch:03d}.h5',monitor='val_accuracy',verbose=0,save_best_only=False,mode='auto',save_freq='epoch')]
    print(model.summary())
    
    tic = time()
    history = model.fit(xTrain,yTrainInd,epochs=epochs,batch_size=batchSize,validation_data=(xTest,yTestInd),callbacks=callbacks_list)
    toc = time(); print ('Time required %f' %(toc-tic))
    plotFileName = '../model/'+name+'_epochs_%d_batchsize_%d_trainAcc_%.2f_testAcc_%.2f.png' %(epochs,batchSize,history.history['accuracy'][-1]*100,history.history['val_accuracy'][-1]*100)
    modelFileName = '../model/'+name+'_epochs_%d_batchsize_%d_trainAcc_%.2f_testAcc_%.2f.h5' %(epochs,batchSize,history.history['accuracy'][-1]*100,history.history['val_accuracy'][-1]*100)
    historyFileName = plotFileName.replace('.png','.dat')
    utils.saveHistory(historyFileName,history)
    model.save(modelFileName)
    plot.plotMetrics(plotFileName,history)
    keras.backend.clear_session()
    
    modelFileList = []
    for epoch in range(1,epochs+1):
        modelFileList.append('../model/'+name+'_intermediate_'+str(epoch).zfill(3)+'.h5')
    utils.selectBestModelHistory(modelFileList,historyFileName)
示例#5
0
def trainUsingVGG16(name,xTrain,yTrain,yTrainInd,xTest,yTest,yTestInd,epochs,batchSize):
    '''
    VGG16 model with additional dense layers of size 100 and 2 at the
    end. Imagenet weights wer used for the convolution section and
    training was performed only on the final 2 dense layers. The
    structure of the model is:
    
    x = layers.Flatten()(vgg16.output)
    x = layers.Dense(100, activation='relu')(x)
    x = layers.Dense(2, activation='softmax')(x)
    model = keras.Model(inputs=vgg16.input, outputs=x)
    
    The model parameters are saved after every epoch. After the last
    epoch, the accuracy of all the intermediate models is tested and the
    most accuracte model is retained. All other intermediate models are
    removed. The training and test datasets have 3 channels (RGB) and
    the data renormalization is done by subtracting the mean intensity
    value from each image.
    
    Input parameters:
    name : (str) Name that the user assigns to the model. All the
        intermediate and best models are saved with this prefix.
    xTrain : training dataset stack represented as a 4D numpy array.
        xTrain.shape yields [N, row, col, channel] where N is the number
        of images in the training dataset, row and col correspond to the
        size of the image, and channel is 1 for this model.
    yTrain : 1D array of labels. 0 is collapsed, 1 is upright.
    yTrainInd : 2D indicator array for the training dataset.
    xTest : 4D test dataset.
    yTest : 1D array of labels for the test dataset.
    yTestInd: 2D indicator array for the test dataset.
    epochs : (int) number of iterations for which the training needs to
        be done.
    batchSize : (int) number of images to use in every batch.
    '''
    optimizer = optimizers.SGD(learning_rate=0.01,momentum=0.99,nesterov=False)
    [N,row,col,channel] = xTrain.shape
    xTrain,xTest = transform.renormalizeDataset(xTrain,xTest,VGG=True)
    
    vgg16 = VGG16(input_shape=(row,col,channel),weights='imagenet',include_top=False)
    for layer in vgg16.layers:
        layer.trainable = False
        
    x = layers.Flatten()(vgg16.output)
    x = layers.Dense(100, activation='relu')(x)
    x = layers.Dense(2, activation='softmax')(x)
    model = keras.Model(inputs=vgg16.input, outputs=x)
    
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    filepath='../model/'+name+'_intermediate_{epoch:03d}.h5'
    callbacks_list = [keras.callbacks.ModelCheckpoint('../model/'+name+'_intermediate_{epoch:03d}.h5',monitor='val_accuracy',verbose=0,save_best_only=False,mode='auto',save_freq='epoch')]
    print(model.summary())
    
    tic = time()
    history = model.fit(xTrain,yTrainInd,epochs=epochs,batch_size=batchSize,validation_data=(xTest,yTestInd),callbacks=callbacks_list)
    toc = time(); print ('Time required %f' %(toc-tic))
    plotFileName = '../model/'+name+'_epochs_%d_batchsize_%d_trainAcc_%.2f_testAcc_%.2f.png' %(epochs,batchSize,history.history['accuracy'][-1]*100,history.history['val_accuracy'][-1]*100)
    modelFileName = '../model/'+name+'_epochs_%d_batchsize_%d_trainAcc_%.2f_testAcc_%.2f.h5' %(epochs,batchSize,history.history['accuracy'][-1]*100,history.history['val_accuracy'][-1]*100)
    historyFileName = plotFileName.replace('.png','.dat')
    utils.saveHistory(historyFileName,history)
    model.save(modelFileName)
    plot.plotMetrics(plotFileName,history)
    keras.backend.clear_session()
    
    modelFileList = []
    for epoch in range(1,epochs+1):
        modelFileList.append('../model/'+name+'_intermediate_'+str(epoch).zfill(3)+'.h5')
    utils.selectBestModelHistory(modelFileList,historyFileName)