def main() : if hp.size_stocks == 0 : data = np.load(hp.path_dir + '/' + hp.code +'_from_2010.npy') batch_gen = utils.single_stock_generator(data, hp.batch_size, hp.M, hp.N, hp.seq_len, hp.n_features, hp.num_stock_size) else : batch_gen = utils.multi_stock_generator(hp.path_dir, hp.batch_size, hp.M, hp.N, hp.seq_len, hp.n_features, hp.num_stock_size) print("Stock sequence length is {}, and stock size is {}.".format(hp.seq_len, hp.num_stock_size)) if hp.model_type == "gan" : import gan if not hp.size_stocks == 1 : print("Sorry, gan model work only with multi stocks.") import sys; sys.exit() gan.gan_train_step(batch_gen, hp.num_epochs, hp.num_stock_size, hp.num_iters, hp.M, hp.N, hp.T) else : import dnn model = dnn.train_step(batch_gen, hp.num_epochs, hp.num_stock_size, hp.num_iters) if hp._savehistory : num_test_iters = hp.num_iters // 4 utils.saveHistory(batch_gen, model, M=hp.M, num_stock_size=hp.num_stock_size, num_test_iters=num_test_iters) print("All training step is done.")
def trainIntermediateModel(modelFile,name,xTrain,yTrain,yTrainInd,xTest,yTest,yTestInd,optimizer,epochs,batchSize,bestModelMode='history'): ''' Train an existing model using different learning rate and optimizer. Input parameters: modelFile : (str) name of the intermediate model that needs to be trained again name : (str) Name that the user assigns to the model. All the intermediate and best models are saved with this prefix. xTrain : training dataset stack represented as a 4D numpy array. xTrain.shape yields [N, row, col, channel] where N is the number of images in the training dataset, row and col correspond to the size of the image, and channel is 1 for this model. yTrain : 1D array of labels. 0 is collapsed, 1 is upright. yTrainInd : 2D indicator array for the training dataset. xTest : 4D test dataset. yTest : 1D array of labels for the test dataset. yTestInd: 2D indicator array for the test dataset. epochs : (int) number of iterations for which the training needs to be done. batchSize : (int) number of images to use in every batch. bestModelMode : method for selecting the best model. Available options are 'history',and 'all'. On choosing 'history' the best model is selected using the training history dictionary. If 'all' the best model is selected by testing the model accuracy for the entire dataset - training + test data. ''' model = keras.models.load_model('../model/'+modelFile+'.h5') model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) callbacks_list = [keras.callbacks.ModelCheckpoint('../model/'+name+'_intermediate_{epoch:03d}.h5',monitor='val_accuracy',verbose=0,save_best_only=False,mode='auto',save_freq='epoch')] print(model.summary()) history = model.fit(xTrain,yTrainInd,epochs=epochs,batch_size=batchSize,validation_data=(xTest,yTestInd),callbacks=callbacks_list) plotFileName = '../model/'+name+'_epochs_%d_batchsize_%d_trainAcc_%.2f_testAcc_%.2f.png' %(epochs,batchSize,history.history['accuracy'][-1]*100,history.history['val_accuracy'][-1]*100) modelFileName = '../model/'+name+'_epochs_%d_batchsize_%d_trainAcc_%.2f_testAcc_%.2f.h5' %(epochs,batchSize,history.history['accuracy'][-1]*100,history.history['val_accuracy'][-1]*100) historyFileName = plotFileName.replace('.png','.dat') utils.saveHistory(historyFileName,history) model.save(modelFileName) plot.plotMetrics(plotFileName,history) keras.backend.clear_session() modelFileList = [] for epoch in range(1,epochs+1): modelFileList.append('../model/'+name+'_intermediate_'+str(epoch).zfill(3)+'.h5') if (bestModelMode=='histroy'): utils.selectBestModelHistory(modelFileList,historyFileName) elif (bestModelMode=='all'): utils.selectBestModel(modelFileList,xTrain,yTrainInd,xTest,yTestInd)
lr_schedule = LearningRateScheduler(lambda epoch: LEARNING_RATE * (0.2**int(epoch / 10))) check_point = ModelCheckpoint('{epoch:02d}_{val_acc:.4f}.h5', monitor='val_acc') ## Train model = build_model() history = model.fit( [train_x1, train_x2], train_y, epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=1, validation_data=([valid_x1, valid_x2], valid_y), callbacks=[auc, stop, csv, reduce_lr, lr_schedule, check_point], shuffle=True, class_weight=class_weights) # save Model model.save(MODEL_NAME + '.h5') print('model saved') # save history HISTORY_FILE = MODEL_NAME + '.pkl' saveHistory(history, auc, HISTORY_FILE) print('Total time:', time.time() - start_time) K.clear_session()
def model_02(name,xTrain,yTrain,yTrainInd,xTest,yTest,yTestInd,epochs,batchSize): ''' Referred to as the CNN model in the manuscript. The structure of the model is: CONV (32,5,5,SAME), RELU, CONV (32,5,5), RELU, MAXPOOL (2,2), DROPOUT (0.50) DENSE (256), RELU, DROPOUT (0.50) DENSE (128), RELU, DROPOUT (0.50) DENSE (2), SOFTMAX The model parameters are saved after every epoch. After the last epoch, the accuracy of all the intermediate models is tested and the most accuracte model is retained. All other intermediate models are removed. Input parameters: name : (str) Name that the user assigns to the model. All the intermediate and best models are saved with this prefix. xTrain : training dataset stack represented as a 4D numpy array. xTrain.shape yields [N, row, col, channel] where N is the number of images in the training dataset, row and col correspond to the size of the image, and channel is 1 for this model. yTrain : 1D array of labels. 0 is collapsed, 1 is upright. yTrainInd : 2D indicator array for the training dataset. xTest : 4D test dataset. yTest : 1D array of labels for the test dataset. yTestInd: 2D indicator array for the test dataset. epochs : (int) number of iterations for which the training needs to be done. batchSize : (int) number of images to use in every batch. ''' optimizer = optimizers.SGD(learning_rate=0.01,momentum=0.99,nesterov=False) [N,row,col,channel] = xTrain.shape model = keras.Sequential() model.add(layers.Input(shape=(row,col,channel))) model.add(layers.Conv2D(32,kernel_size=(5,5),padding='same')) model.add(layers.Activation('relu')) model.add(layers.Conv2D(32,kernel_size=(5,5))) model.add(layers.Activation('relu')) model.add(layers.MaxPooling2D(pool_size=(2,2))) model.add(layers.Dropout(0.50)) model.add(layers.Flatten()) model.add(layers.Dense(256)) model.add(layers.Activation('relu')) model.add(layers.Dropout(0.50)) model.add(layers.Dense(128)) model.add(layers.Activation('relu')) model.add(layers.Dropout(0.50)) model.add(layers.Dense(2)) model.add(layers.Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) filepath='../model/'+name+'_intermediate_{epoch:03d}.h5' callbacks_list = [keras.callbacks.ModelCheckpoint('../model/'+name+'_intermediate_{epoch:03d}.h5',monitor='val_accuracy',verbose=0,save_best_only=False,mode='auto',save_freq='epoch')] print(model.summary()) tic = time() history = model.fit(xTrain,yTrainInd,epochs=epochs,batch_size=batchSize,validation_data=(xTest,yTestInd),callbacks=callbacks_list) toc = time(); print ('Time required %f' %(toc-tic)) plotFileName = '../model/'+name+'_epochs_%d_batchsize_%d_trainAcc_%.2f_testAcc_%.2f.png' %(epochs,batchSize,history.history['accuracy'][-1]*100,history.history['val_accuracy'][-1]*100) modelFileName = '../model/'+name+'_epochs_%d_batchsize_%d_trainAcc_%.2f_testAcc_%.2f.h5' %(epochs,batchSize,history.history['accuracy'][-1]*100,history.history['val_accuracy'][-1]*100) historyFileName = plotFileName.replace('.png','.dat') utils.saveHistory(historyFileName,history) model.save(modelFileName) plot.plotMetrics(plotFileName,history) keras.backend.clear_session() modelFileList = [] for epoch in range(1,epochs+1): modelFileList.append('../model/'+name+'_intermediate_'+str(epoch).zfill(3)+'.h5') utils.selectBestModelHistory(modelFileList,historyFileName)
def trainUsingVGG16(name,xTrain,yTrain,yTrainInd,xTest,yTest,yTestInd,epochs,batchSize): ''' VGG16 model with additional dense layers of size 100 and 2 at the end. Imagenet weights wer used for the convolution section and training was performed only on the final 2 dense layers. The structure of the model is: x = layers.Flatten()(vgg16.output) x = layers.Dense(100, activation='relu')(x) x = layers.Dense(2, activation='softmax')(x) model = keras.Model(inputs=vgg16.input, outputs=x) The model parameters are saved after every epoch. After the last epoch, the accuracy of all the intermediate models is tested and the most accuracte model is retained. All other intermediate models are removed. The training and test datasets have 3 channels (RGB) and the data renormalization is done by subtracting the mean intensity value from each image. Input parameters: name : (str) Name that the user assigns to the model. All the intermediate and best models are saved with this prefix. xTrain : training dataset stack represented as a 4D numpy array. xTrain.shape yields [N, row, col, channel] where N is the number of images in the training dataset, row and col correspond to the size of the image, and channel is 1 for this model. yTrain : 1D array of labels. 0 is collapsed, 1 is upright. yTrainInd : 2D indicator array for the training dataset. xTest : 4D test dataset. yTest : 1D array of labels for the test dataset. yTestInd: 2D indicator array for the test dataset. epochs : (int) number of iterations for which the training needs to be done. batchSize : (int) number of images to use in every batch. ''' optimizer = optimizers.SGD(learning_rate=0.01,momentum=0.99,nesterov=False) [N,row,col,channel] = xTrain.shape xTrain,xTest = transform.renormalizeDataset(xTrain,xTest,VGG=True) vgg16 = VGG16(input_shape=(row,col,channel),weights='imagenet',include_top=False) for layer in vgg16.layers: layer.trainable = False x = layers.Flatten()(vgg16.output) x = layers.Dense(100, activation='relu')(x) x = layers.Dense(2, activation='softmax')(x) model = keras.Model(inputs=vgg16.input, outputs=x) model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) filepath='../model/'+name+'_intermediate_{epoch:03d}.h5' callbacks_list = [keras.callbacks.ModelCheckpoint('../model/'+name+'_intermediate_{epoch:03d}.h5',monitor='val_accuracy',verbose=0,save_best_only=False,mode='auto',save_freq='epoch')] print(model.summary()) tic = time() history = model.fit(xTrain,yTrainInd,epochs=epochs,batch_size=batchSize,validation_data=(xTest,yTestInd),callbacks=callbacks_list) toc = time(); print ('Time required %f' %(toc-tic)) plotFileName = '../model/'+name+'_epochs_%d_batchsize_%d_trainAcc_%.2f_testAcc_%.2f.png' %(epochs,batchSize,history.history['accuracy'][-1]*100,history.history['val_accuracy'][-1]*100) modelFileName = '../model/'+name+'_epochs_%d_batchsize_%d_trainAcc_%.2f_testAcc_%.2f.h5' %(epochs,batchSize,history.history['accuracy'][-1]*100,history.history['val_accuracy'][-1]*100) historyFileName = plotFileName.replace('.png','.dat') utils.saveHistory(historyFileName,history) model.save(modelFileName) plot.plotMetrics(plotFileName,history) keras.backend.clear_session() modelFileList = [] for epoch in range(1,epochs+1): modelFileList.append('../model/'+name+'_intermediate_'+str(epoch).zfill(3)+'.h5') utils.selectBestModelHistory(modelFileList,historyFileName)