def updateHyperParams(self, fenetre, idConf): # get idDataset nameDataset = fenetre.varDataset.get() idDataset = db.getDatasetIdByName(self.conn, nameDataset) # Get config self.config = db.getConfig(self.conn, idConf) # get hyper parameters hyperParams = db.getHyperParams(self.conn, idDataset, idConf) # get best hyper params (bestHyperParams, bestDevAccuracy, _) = db.getBestHyperParams(self.conn, idDataset, idConf) # Launch window, it may update hps viewHp = view.ViewOrUpdateHyperParamsWindow( fenetre, self.doCreateOrUpdateHyperParams) # launch view with callback viewHp.run(hyperParams, bestHyperParams, bestDevAccuracy)
def optimizeModel(self, conn, idRun, structure, hyperParams, print_cost=True, show_plot=True, extractImageErrors=True): costs = [] # To keep track of the cost DEV_accuracies = [] # for DEV accuracy graph # Get hyper parameters from dico self.beta = hyperParams[const.KEY_BETA] self.keep_prob = hyperParams[const.KEY_KEEP_PROB] self.num_epochs = hyperParams[const.KEY_NUM_EPOCHS] self.minibatch_size = hyperParams[const.KEY_MINIBATCH_SIZE] self.start_learning_rate = hyperParams[const.KEY_START_LEARNING_RATE] self.learning_rate_decay_nb = hyperParams[ const.KEY_LEARNING_RATE_DECAY_NB_EPOCH] self.learning_rate_decay_percent = hyperParams[ const.KEY_LEARNING_RATE_DECAY_PERCENT] self.useBatchNormalization = hyperParams[ const.KEY_USE_BATCH_NORMALIZATION] # Convert ( nbLines, dims... ) to ( None, dims... ) X_shape = [None] X_shape.extend(self.dataInfo[const.KEY_TRN_X_SHAPE][1:]) X_type = self.datasetTrn.X.dtype Y_shape = [None] Y_shape.extend(self.dataInfo[const.KEY_TRN_Y_SHAPE][1:]) Y_type = self.datasetTrn.Y.dtype self.modelInit(structure, X_shape, X_type, Y_shape, Y_type, training=True) seed = 3 # to keep consistent results # Start the session to compute the tensorflow graph with self.getSession() as sess: # initialize session variables self.initSessionVariables(sess) # current iteration iteration = -1 ## optimisation may overshoot locally ## To avoid returning an overshoot, we detect it and run extra epochs if needed finalizationMode = False current_num_epochs = hyperParams[const.KEY_NUM_EPOCHS] iEpoch = 0 minCost = 99999999999999 minCostFinalization = 99999999999999 finished = False # When to we display epochs stats nbStatusEpoch = math.ceil(current_num_epochs / 20) # intercept Ctrl-C self.interrupted = False import signal # signal.signal( signal.SIGINT, self.signal_handler ) self.initializeDataset(sess, self.datasetTrn) # Start time tsStart = time.time() # time to make sure we trace something each N minuts tsTraceStart = tsStart # Do the training loop while (not self.interrupted and not finished and (iEpoch <= current_num_epochs)): epoch_cost = 0. # Defines a cost related to an epoch if (self.minibatch_size < 0): # No mini-batch : do a gradient descent for whole data iteration += 1 epoch_cost = self.runIteration( iEpoch, 1, sess, self.datasetTrn.X, self.datasetTrn.Y, self.keep_prob, ) else: # Minibatch mode, non handled by data source m = self.dataInfo[ const. KEY_TRN_X_SIZE] # m : number of examples in the train set) num_minibatches = math.ceil( m / self.minibatch_size ) # number of minibatches of size minibatch_size in the train set seed = seed + 1 minibatches = self.random_mini_batches( self.datasetTrn.X, self.datasetTrn.Y, self.minibatch_size, seed) iterationMinibatch = 0 for minibatch in minibatches: iteration += 1 iterationMinibatch += 1 # Select a minibatch (minibatch_X, minibatch_Y) = minibatch minibatch_cost = self.runIteration( sess, (minibatch_X, minibatch_Y), iteration, num_minibatches, self.keep_prob) epoch_cost += minibatch_cost / num_minibatches if (print_cost and iteration == 0): # Display iteration 0 to allow verify cost calculation accross machines logger.info( "Current cost epoch %i; iteration %i; %f" % (iEpoch, iteration, epoch_cost)) # time to trace? tsTraceNow = time.time() tsTraceElapsed = tsTraceNow - tsTraceStart # Each 60 seconds if (tsTraceElapsed >= 60): # Display iteration 0 to allow verify cost calculation accross machines logger.info( "Current cost epoch %i; iteration %i; %f" % (iEpoch, iteration, epoch_cost)) # reset trace start tsTraceStart = tsTraceNow if print_cost and iEpoch % nbStatusEpoch == 0: logger.info("Cost after epoch %i; iteration %i; %f" % (iEpoch, iteration, epoch_cost)) if (iEpoch != 0): # Performance counters curElapsedSeconds, curPerfIndex = self.getPerfCounters( tsStart, iEpoch, self.datasetTrn.X.shape) logger.info(" current: elapsedTime:", curElapsedSeconds, "perfIndex:", curPerfIndex) # calculate DEV accuracy DEV_accuracy = self.accuracyEval( (self.datasetDev.X, self.datasetDev.Y), "dev") logger.info(" current: DEV accuracy: %f" % (DEV_accuracy)) DEV_accuracies.append(DEV_accuracy) if print_cost == True and iEpoch % 5 == 0: costs.append(epoch_cost) # Record min cost minCost = min(minCost, epoch_cost) # Next epoch iEpoch += 1 self.var_numEpoch.load(iEpoch) # Close to finish? if (not finalizationMode and (iEpoch > current_num_epochs)): # Activate finalization mode finalizationMode = True # local overshoot? if (epoch_cost > minCost): # Yes, run some extra epochs logger.warn( "Local cost overshoot detected, adding maximum 100 epochs to leave local cost overshoot" ) current_num_epochs += 100 minCostFinalization = minCost if (finalizationMode): # Check overshoot is finished if (epoch_cost <= minCostFinalization): # finished finished = True self.modelOptimizeEnd(sess) if (self.interrupted): logger.info("Training has been interrupted by Ctrl-C") logger.info("Store current epoch number '" + str(iEpoch) + "' in run hyper parameters") # Get runs and hps run = db.getRun(conn, self.idRun) idRunHps = run["idHyperParams"] runHps = db.getHyperParams(conn, idRunHps)["hyperParameters"] # Modify num epochs runHps[const.KEY_NUM_EPOCHS] = iEpoch # update run db.updateRun(conn, self.idRun, runHps) # Final cost print("Parameters have been trained!") logger.info("Final cost:", epoch_cost) ## Elapsed (seconds) elapsedSeconds, perfIndex = self.getPerfCounters( tsStart, iEpoch, self.datasetTrn.X.shape) perfInfo = {} logger.info("Elapsed (s):", elapsedSeconds) logger.info("Perf index :", perfIndex) self.persistModel(sess, idRun) accuracyTrain = self.accuracyEval( (self.datasetTrn.X, self.datasetTrn.Y), "trn") print("Train Accuracy:", accuracyTrain) accuracyDev = self.accuracyEval( (self.datasetDev.X, self.datasetDev.Y), "dev") print("Dev Accuracy:", accuracyDev) if (show_plot): # plot the cost plt.plot(np.squeeze(costs)) plt.ylabel('cost') plt.xlabel('iterations (per tens)') plt.title("Start learning rate =" + str(self.start_learning_rate)) plt.show() # plot the accuracies plt.plot(np.squeeze(DEV_accuracies)) plt.ylabel('DEV accuracy') plt.xlabel('iterations (100)') plt.title("Start learning rate =" + str(self.start_learning_rate)) plt.show() ## Errors resultInfo = {} if (extractImageErrors): # Lists of OK for training oks_train = self.correctPredictionEval( (self.datasetTrn.X, self.datasetTrn.Y)) map1, map2 = self.statsExtractErrors("train", dataset=self.datasetTrn, oks=oks_train, show_plot=show_plot) # Errors nb by data tag resultInfo[const.KEY_TRN_NB_ERROR_BY_TAG] = map1 resultInfo[const.KEY_TRN_PC_ERROR_BY_TAG] = map2 oks_dev = self.correctPredictionEval( (self.datasetDev.X, self.datasetDev.Y)) map1, map2 = self.statsExtractErrors("dev", dataset=self.datasetDev, oks=oks_dev, show_plot=show_plot) # Errors nb by data tag resultInfo[const.KEY_DEV_NB_ERROR_BY_TAG] = map1 resultInfo[const.KEY_DEV_PC_ERROR_BY_TAG] = map2 # Update DB run after execution, add extra info db.updateRunAfter(conn, idRun, perf_info=perfInfo, result_info=resultInfo, perf_index=perfIndex, elapsed_second=elapsedSeconds, train_accuracy=accuracyTrain.astype(float), dev_accuracy=accuracyDev.astype(float)) return accuracyDev, accuracyTrain
dataset = db.getDatasetById( conn, idDataset ) logger.info( "Using dataset {0}".format( dataset ) ) # Read config config = db.getConfig( conn, idConfig ); # get machine name machineName = db.getMachineNameById( conn, config[ "idMachine" ] ) logger.info( "Structure:" ) logger.info( config[ "structure" ] ) # get hyper parameters if ( buttonClicked == "Train" ) : # Get config hyper parameters hyperParams = db.getHyperParams( conn, idDataset, config[ "id" ] ) elif ( buttonClicked == "Predict" ) : # hyper parameters depend on choice choiceHp = predictParams[ "choiceHyperParams" ] if ( choiceHp == 1 ) : # Last idRun idRun = db.getRunIdLast( conn, config[ "id" ] ) # Config hyper params run = db.getRun( conn, idRun ) hyperParams = db.getHyperParamsById( conn, run[ "idHyperParams" ] )
def optimizeModel( self, conn, idRun, structure, hyperParams, print_cost = True, show_plot = True, extractImageErrors = True, isCalculateBestEpoch = False ): tf.reset_default_graph() # Forget the past tf.set_random_seed( 1 ) # Repeatable operations costs = [] # To keep track of the cost DEV_accuracies = [] # for DEV accuracy graph # Get hyper parameters from dico self.beta = hyperParams[ const.KEY_BETA ] self.keep_prob = hyperParams[ const.KEY_KEEP_PROB ] self.num_epochs = hyperParams[ const.KEY_NUM_EPOCHS ] self.minibatch_size = hyperParams[ const.KEY_MINIBATCH_SIZE ] # Minibatch mode, non handled by data source m = self.dataInfo[ const.KEY_TRN_X_SIZE ] # m : number of examples in the train set) self.numMinibatches = math.ceil( m / self.minibatch_size ) # number of minibatches of size minibatch_size in the train set self.start_learning_rate = hyperParams[ const.KEY_START_LEARNING_RATE ] # Decay per epoch NB decayEpochNb = hyperParams[ const.KEY_LEARNING_RATE_DECAY_NB_EPOCH ] # Multiply by nb mini-batches by epoch to get decay by epoch self.learning_rate_decay_nb = decayEpochNb * self.numMinibatches self.learning_rate_decay_percent = hyperParams[ const.KEY_LEARNING_RATE_DECAY_PERCENT ] self.useBatchNormalization = hyperParams[ const.KEY_USE_BATCH_NORMALIZATION ] if ( self.minibatch_size < 0 ) : raise ValueError( "Mini-batch size is required" ) # Convert ( nbLines, dims... ) to ( None, dims... ) X_shape = [ None ] X_shape.extend( self.dataInfo[ const.KEY_TRN_X_SHAPE ] ) X_type = tf.float32 X_real_shape = [ self.minibatch_size ] X_real_shape.extend( self.dataInfo[ const.KEY_TRN_X_SHAPE ] ) Y_shape = [ None ] Y_shape.extend( self.dataInfo[ const.KEY_TRN_Y_SHAPE ] ) Y_type = tf.float32 # Init model self.modelInit( structure, X_shape, X_type, Y_shape, Y_type, training=True ) # Prepare reader if ( self.datasetTrn.inMemory ) : # In memory readers # Convert ( nbLines, dims... ) to ( None, dims... ) self.tfDatasetTrn = tf.data.Dataset.from_tensor_slices( ( self.datasetTrn.X, self.datasetTrn.Y, ) ) else : # TF record file based reader self.tfDatasetTrn = tf.data.TFRecordDataset( self.datasetTrn.XY ) # Shuffle data self.tfDatasetTrn = self.tfDatasetTrn.shuffle( buffer_size=100000, reshuffle_each_iteration=True, seed=1 ) # Pre-fetch for performance self.tfDatasetTrn = self.tfDatasetTrn.prefetch( self.minibatch_size * 16 ) # Data set, minibatch_size slices self.tfDatasetTrn = self.tfDatasetTrn.batch( self.minibatch_size ) # Trn Data set, repeat num_epochs self.tfDatasetTrn = self.tfDatasetTrn.repeat( self.phTrnNumEpochs ) # Prepare reader if ( self.datasetDev.inMemory ) : # In memory readers # Convert ( nbLines, dims... ) to ( None, dims... ) self.tfDatasetDev = tf.data.Dataset.from_tensor_slices( ( self.datasetDev.X, self.datasetDev.Y ) ) else : # TF record file based reader self.tfDatasetDev = tf.data.TFRecordDataset( self.datasetDev.XY ) # Pre-fetch and, minibatch_size slices self.tfDatasetDev = self.tfDatasetDev.prefetch( self.minibatch_size * 16 ).batch( self.minibatch_size ) trnIterator = self.tfDatasetTrn.make_initializable_iterator( shared_name="trnIterator" ) devIterator = self.tfDatasetDev.make_initializable_iterator( shared_name="devIterator" ) # Start the session to compute the tensorflow graph with self.getSession() as sess: self.initSessionVariables( sess ) # initialise variables iterators. sess.run( tf.global_variables_initializer() ) sess.run( [ trnIterator.initializer, devIterator.initializer ], { self.phTrnNumEpochs : self.num_epochs } ) # The `Iterator.string_handle()` method returns a tensor that can be evaluated # and used to feed the `handle` placeholder. trnHandle = sess.run( trnIterator.string_handle() ) devHandle = sess.run( devIterator.string_handle() ) ## optimisation may overshoot locally ## To avoid returning an overshoot, we detect it and run extra epochs if needed finalizationMode = False current_num_epochs = hyperParams[ const.KEY_NUM_EPOCHS ] minCost = 99999999999999 minCostFinalization = 99999999999999 finished = False # intercept Ctrl-C self.interrupted = False import signal # signal.signal( signal.SIGINT, self.signal_handler ) # Do the training loop iEpoch = 1 minibatch_cost = 0 epoch_cost = 0. # Defines a cost related to an epoch # current iteration iteration = 1 # Nb status epoch : if we reach it, calculate DEV efficiency nbStatusEpoch = math.ceil( self.num_epochs / 20 ) # Start time tsStart = time.time() # time to make sure we write epoch status each N seconds tsStatusEpochStart = tsStart secStatusEpoch = 120 # Status epoch each 120 seconds # time to make sure we trace something each N seconds tsTraceStart = tsStart secTrace = 60 #trace each 60 seconds # Best epoch values maxBestAccuracyDevEpoch = -1 maxBestNbEpoch = -1 # Start input enqueue threads. coord = tf.train.Coordinator() threads = tf.train.start_queue_runners( sess=sess, coord=coord ) lastEpochCost = 0 try : while ( not self.interrupted and not finished ) : minibatch_cost = self.runIteration( sess, trnHandle, self.keep_prob, iteration, self.numMinibatches ) epoch_cost += minibatch_cost / self.numMinibatches if ( print_cost and iteration == 0 ) : # Display iteration 0 to allow verify cost calculation accross machines logger.info( "Current cost epoch {0}; iteration {1}; {2}".format( iEpoch, iteration, epoch_cost ) ) # Tracing if ( print_cost and logger.isEnabledFor( logging.DEBUG ) ) : logger.debug( "Current cost epoch {0}; iteration {1}; {2}".format( iEpoch, iteration, epoch_cost ) ) # time to trace? tsTraceNow = time.time() tsTraceElapsed = tsTraceNow - tsTraceStart # Each 60 seconds if ( tsTraceElapsed >= secTrace ) : # Display iteration 0 to allow verify cost calculation accross machines logger.info( "Current cost epoch {0}; iteration {1}; {2}".format( iEpoch, iteration, epoch_cost ) ) # reset trace start tsTraceStart = tsTraceNow # Current epoch finished? if ( ( iteration % self.numMinibatches ) == 0 ) : # time to status epoch? tsEpochStatusNow = time.time() tsEpochStatusElapsed = tsEpochStatusNow - tsStatusEpochStart #print epoch cost if print_cost and ( iteration != 0 ) and ( ( iEpoch % nbStatusEpoch ) == 0 or ( tsEpochStatusElapsed >= secStatusEpoch ) ) : logger.info( "Cost after epoch {0}; iteration {1}; {2}".format( iEpoch, iteration, epoch_cost ) ) if ( iEpoch != 1 ) : # Performance counters, for current batch, m data * nbStatus epochs curElapsedSeconds, curPerfIndex = self.getPerfCounters( tsStart, iEpoch, X_real_shape, m * nbStatusEpoch ) logger.info( " current: elapsedTime; {0}; perfIndex; {1:.2f}".format( curElapsedSeconds, curPerfIndex ) ) # calculate DEV accuracy # Rewind DEV iterator sess.run( [ devIterator.initializer ] ) DEV_accuracy = self.accuracyEval( devHandle, "dev" ) logger.info( " current: DEV accuracy: {:.3%}".format( DEV_accuracy ) ) DEV_accuracies.append( DEV_accuracy ) # Update best epoch var if ( isCalculateBestEpoch and ( iEpoch > ( self.num_epochs / 2 ) ) ) : # max reached? if ( DEV_accuracy > maxBestAccuracyDevEpoch ) : maxBestAccuracyDevEpoch = DEV_accuracy maxBestNbEpoch = iEpoch # Reset status epoch timer tsStatusEpochStart = tsEpochStatusNow # Store cost for graph if print_cost == True and ( iteration != 0 ) and iEpoch % 5 == 0: costs.append( epoch_cost ) # Record min cost minCost = min( minCost, epoch_cost ) # epoch changed iEpoch += 1 lastEpochCost = epoch_cost epoch_cost = 0 # Close to finish? # if ( not finalizationMode and ( iEpoch > current_num_epochs ) ) : # # Activate finalization mode # finalizationMode = True # # local overshoot? # if ( epoch_cost > minCost ) : # # Yes, run some extra epochs # logger.info( "WARNING: local cost overshoot detected, adding maximum 100 epochs to leave local cost overshoot" ) # current_num_epochs += 100 # minCostFinalization = minCost # # if ( finalizationMode ) : # # Check overshoot is finished # if ( epoch_cost <= minCostFinalization ) : # # finished # finished = True iteration += 1 except tf.errors.OutOfRangeError: # walk finished # decrement iteration and epoch that didn't append iteration -= 1 iEpoch -= 1 epoch_cost = lastEpochCost finally : # When done, ask the threads to stop. coord.request_stop() # Wait for threads to finish. coord.join( threads ) self.modelOptimizeEnd( sess ) if ( self.interrupted ) : logger.info( "Training has been interrupted by Ctrl-C" ) logger.info( "Store current epoch number '" + str( iEpoch ) + "' in run hyper parameters" ) # Get runs and hps run = db.getRun( conn, self.idRun ) idRunHps = run[ "idHyperParams" ] runHps = db.getHyperParams( conn, idRunHps )[ "hyperParameters" ] # Modify num epochs runHps[ const.KEY_NUM_EPOCHS ] = iEpoch # update run db.updateRun( conn, self.idRun, runHps ) # Final cost logger.info( "Parameters have been trained!") logger.info( "Final cost after epoch {0}; iteration {1}; {2}".format( iEpoch, iteration, epoch_cost ) ) ## Elapsed (seconds), for whole data set * nb epochs elapsedSeconds, perfIndex = self.getPerfCounters( tsStart, iEpoch, X_real_shape, m * self.num_epochs ) perfInfo = {} logger.info( "Elapsed (s): {0}".format( elapsedSeconds ) ) logger.info( "Perf index : {0:.2f}".format( perfIndex ) ) self.persistModel( sess, idRun ) # Rewind data sets, 1 epoch for TRN data set sess.run( [ trnIterator.initializer, devIterator.initializer ], { self.phTrnNumEpochs : 1 } ) accuracyTrain = self.accuracyEval( trnHandle, "trn" ) logger.info( "TRN Accuracy: {:.3%}".format( accuracyTrain ) ) accuracyDev = self.accuracyEval( devHandle, "dev" ) logger.info( "DEV Accuracy: {:.3%}".format( accuracyDev ) ) if ( isCalculateBestEpoch ) : logger.info( "Best DEV nb epochs: {0}".format( maxBestNbEpoch ) ) logger.info( "Best DEV Accuracy : {:.3%}".format( maxBestAccuracyDevEpoch ) ) if ( show_plot ) : # plot the cost plt.plot(np.squeeze(costs)) plt.ylabel('cost') plt.xlabel('iterations (per tens)') plt.title("Start learning rate =" + str( self.start_learning_rate ) ) plt.show() # plot the accuracies plt.plot( np.squeeze( DEV_accuracies ) ) plt.ylabel('DEV accuracy') plt.xlabel('iterations (100)') plt.title("Start learning rate =" + str( self.start_learning_rate ) ) plt.show() ## Errors resultInfo = {} if ( extractImageErrors ) : # Rewind data sets, 1 epoch for TRN data set sess.run( [ trnIterator.initializer, devIterator.initializer ], { self.phTrnNumEpochs : 1 } ) # Lists of OK for training oks_train = self.correctPredictionEval( trnHandle ) map1, map2 = self.statsExtractErrors( "train", dataset = self.datasetTrn, oks = oks_train, show_plot=show_plot ) # Errors nb by data tag resultInfo[ const.KEY_TRN_NB_ERROR_BY_TAG ] = map1 resultInfo[ const.KEY_TRN_PC_ERROR_BY_TAG ] = map2 oks_dev = self.correctPredictionEval( devHandle ) map1, map2 = self.statsExtractErrors( "dev", dataset = self.datasetDev, oks = oks_dev, show_plot=show_plot ) # Errors nb by data tag resultInfo[ const.KEY_DEV_NB_ERROR_BY_TAG ] = map1 resultInfo[ const.KEY_DEV_PC_ERROR_BY_TAG ] = map2 # Update DB run after execution, add extra info db.updateRunAfter( conn, idRun, perf_info = perfInfo, result_info=resultInfo, perf_index=perfIndex, elapsed_second = elapsedSeconds, train_accuracy=accuracyTrain.astype( float ), dev_accuracy=accuracyDev.astype( float ) ) return accuracyDev, accuracyTrain, maxBestNbEpoch, maxBestAccuracyDevEpoch
def getHyperParams(self, conn, dataset): # Get hyper params values hyperParams = getHyperParams(conn, dataset["id"], self["id"]) return hyperParams