示例#1
0
    def train(self,
              conn,
              dataset,
              config,
              comment,
              isUseBestEpoch=True,
              tune=False,
              nbTuning=20,
              showPlots=True):
        "Train the model"

        # hyper parameters
        confHyperParams = config.getHyperParams(conn, dataset)

        runHyperParams = {}
        runHyperParams.update(confHyperParams["hyperParameters"])

        maxBestNbEpoch, maxBestAccuracyDevEpoch = -1, -1
        initialNbEpoch = runHyperParams[const.KEY_NUM_EPOCHS]
        maxBestBeta = runHyperParams[const.KEY_BETA]
        maxBestKeep_prob = runHyperParams[const.KEY_KEEP_PROB]

        ## Prepare hyper params
        if tune:
            # Tune params
            beta_min = 0.000000000000001
            beta_max = 0.5

            keep_prob_min = 0.5
            keep_prob_max = 1

            tuning = {}

            maxAccuracyDev = -9999999999999
            maxIdRun = -1
        else:
            nbTuning = 1

        # Display hyper parameters info
        logger.info("Start Learning rate : " +
                    str(runHyperParams[const.KEY_START_LEARNING_RATE]))
        logger.info("Num epoch           : " +
                    str(runHyperParams[const.KEY_NUM_EPOCHS]))
        logger.info("Minibatch size      : " +
                    str(runHyperParams[const.KEY_MINIBATCH_SIZE]))
        logger.info("Beta                : " +
                    str(runHyperParams[const.KEY_BETA]))
        logger.info("keep_prob           : " +
                    str(runHyperParams[const.KEY_KEEP_PROB]))
        logger.info("isLoadWeights       : " +
                    str(runHyperParams[const.KEY_USE_WEIGHTS]))

        # Start time
        tsGlobalStart = time.time()

        if tune:
            # make sure seed is unique
            seed = time.time
            random.seed(seed)

        # Nb of pass to run
        nbPass = nbTuning

        # Pass used to recalculate training with best epoch
        bestEpochPass = -1
        if (isUseBestEpoch):
            # Add one for best epoch rerun
            nbPass += 1
            # last pass
            bestEpochPass = nbPass

        for j in range(1, nbPass + 1):

            # best epoch run
            if (j == bestEpochPass):

                logger.info(
                    "***************************************************************************"
                )
                logger.info(
                    "Running interrupted gradient descent, nb epochs={0}".
                    format(maxBestNbEpoch))
                logger.info(
                    "***************************************************************************"
                )

                # select best epoch nb
                runHyperParams[const.KEY_BETA] = maxBestBeta
                runHyperParams[const.KEY_KEEP_PROB] = maxBestKeep_prob
                runHyperParams[const.KEY_NUM_EPOCHS] = maxBestNbEpoch

            else:
                if tune:
                    logger.info("*****************************")
                    logger.info("Tune round " + str(j) + "/" + str(nbTuning))
                    logger.info("*****************************")

                    # calculate beta
                    logBeta = random.uniform(math.log10(beta_min),
                                             math.log10(beta_max))
                    beta = math.pow(10, logBeta)
                    logger.info("Beta = " + str(beta))

                    # calculate keep_prob
                    logKeep_prob = random.uniform(math.log10(keep_prob_min),
                                                  math.log10(keep_prob_max))
                    keep_prob = math.pow(10, logKeep_prob)
                    logger.info("keep_prob = " + str(keep_prob))

                    # update hyper params
                    runHyperParams[const.KEY_BETA] = beta
                    runHyperParams[const.KEY_KEEP_PROB] = keep_prob

            # Create run

            self.idRun = db.createRun(conn, self.idDataset, config["id"],
                                      runHyperParams)

            # Update run before calling model
            db.updateRunBefore(conn,
                               self.idRun,
                               comment=comment,
                               system_info=self.systemInfo,
                               data_info=self.dataInfo)

            # Run model and update DB run with extra info
            accuracyDev, accuracyTrain, bestNbEpoch, bestAccuracyDevEpoch = self.optimizeModel(
                conn,
                self.idRun,
                config["structure"],
                runHyperParams,
                isCalculateBestEpoch=isUseBestEpoch and (j != bestEpochPass),
                show_plot=showPlots and not tune,
                extractImageErrors=not tune)

            # Print run
            run = db.getRun(conn, self.idRun)
            logger.info("Run stored in DB: " + str(run))

            # Update selected run
            db.updateHpRunSelectorForRun(conn, dataset["id"], config["id"],
                                         run["id"])
            conn.commit()

            # Manage best epoch params
            if (
                    # Using best epoch mode and not in best epoch finalization pass
                    isUseBestEpoch and (j != bestEpochPass) and
                    #  mex of best epoch dev accuracy
                (bestAccuracyDevEpoch > maxBestAccuracyDevEpoch)):
                # Store it
                maxBestAccuracyDevEpoch = bestAccuracyDevEpoch
                maxBestNbEpoch = bestNbEpoch
                maxBestBeta = runHyperParams[const.KEY_BETA]
                maxBestKeep_prob = runHyperParams[const.KEY_KEEP_PROB]

            if tune:
                # Store results
                tuning[j] = {
                    "beta": beta,
                    "keep_prob": keep_prob,
                    "accuracyDev": accuracyDev,
                    "accuracyTrain": accuracyTrain
                }

                # Max
                if (accuracyDev > maxAccuracyDev):
                    maxAccuracyDev = accuracyDev
                    maxHyperParams = tuning[j]
                    maxIdRun = self.idRun

                    # get or create hyperparams
                    idMaxHp = db.getOrCreateHyperParams(conn, runHyperParams)
                    # Update config
                    config["idHyperParams"] = idMaxHp
                    # save config
                    db.updateConfig(conn, config)
                    # Update selected run
                    db.updateHpRunSelectorForHp(conn, dataset["id"],
                                                config["id"], idMaxHp)

                    # Commit result
                    conn.commit()

                # print max
                logger.info("Max DEV accuracy: " + str(maxAccuracyDev))
                logger.info("Max hyper params:")
                logger.info(maxHyperParams)
                logger.info("Max Best epoch DEV accuracy : " +
                            str(maxBestAccuracyDevEpoch))
                logger.info("Max Best epoch nb           : " +
                            str(maxBestNbEpoch))

            else:
                # Not in tuning mode
                # if maxBestNbEpoch = nominal nbEpochs, stop last loop
                if (maxBestNbEpoch == initialNbEpoch):
                    # Stop loop
                    break

        # End of loops
        if tune:
            # Print tuning
            logger.info("Tuning:", tuning)
            logger.info("")
            logger.info("Max DEV accuracy            : " + str(maxAccuracyDev))
            logger.info("Max Best epoch DEV accuracy : " +
                        str(maxBestAccuracyDevEpoch))
            logger.info("Max Best epoch nb           : " + str(maxBestNbEpoch))
            logger.info("Max hyper params idRun: " + str(maxIdRun))

            # Change selected hyper-parameters

            # Get max dev accuracy from all runs
            (_, absoluteMaxAccuracyDev,
             _) = db.getBestHyperParams(conn, dataset["id"], config["id"])
            # If our max dev accuracy is better, change current select hps
            if (absoluteMaxAccuracyDev <= maxAccuracyDev):
                db.updateHpRunSelectorForHp(conn, dataset["id"], config["id"],
                                            idMaxHp)

        # Commit result
        conn.commit()

        # Start time
        tsGlobalEnd = time.time()
        globalElapsedSeconds = int(round(tsGlobalEnd - tsGlobalStart))

        logger.info("Finished in " + str(globalElapsedSeconds) + " seconds")
示例#2
0
    def optimizeModel(self,
                      conn,
                      idRun,
                      structure,
                      hyperParams,
                      print_cost=True,
                      show_plot=True,
                      extractImageErrors=True):

        costs = []  # To keep track of the cost
        DEV_accuracies = []  # for DEV accuracy graph

        # Get hyper parameters from dico
        self.beta = hyperParams[const.KEY_BETA]
        self.keep_prob = hyperParams[const.KEY_KEEP_PROB]
        self.num_epochs = hyperParams[const.KEY_NUM_EPOCHS]
        self.minibatch_size = hyperParams[const.KEY_MINIBATCH_SIZE]

        self.start_learning_rate = hyperParams[const.KEY_START_LEARNING_RATE]
        self.learning_rate_decay_nb = hyperParams[
            const.KEY_LEARNING_RATE_DECAY_NB_EPOCH]
        self.learning_rate_decay_percent = hyperParams[
            const.KEY_LEARNING_RATE_DECAY_PERCENT]

        self.useBatchNormalization = hyperParams[
            const.KEY_USE_BATCH_NORMALIZATION]

        # Convert ( nbLines, dims... ) to ( None, dims... )
        X_shape = [None]
        X_shape.extend(self.dataInfo[const.KEY_TRN_X_SHAPE][1:])
        X_type = self.datasetTrn.X.dtype

        Y_shape = [None]
        Y_shape.extend(self.dataInfo[const.KEY_TRN_Y_SHAPE][1:])
        Y_type = self.datasetTrn.Y.dtype

        self.modelInit(structure,
                       X_shape,
                       X_type,
                       Y_shape,
                       Y_type,
                       training=True)

        seed = 3  # to keep consistent results

        # Start the session to compute the tensorflow graph

        with self.getSession() as sess:

            # initialize session variables
            self.initSessionVariables(sess)

            # current iteration
            iteration = -1

            ## optimisation may overshoot locally
            ## To avoid returning an overshoot, we detect it and run extra epochs if needed
            finalizationMode = False
            current_num_epochs = hyperParams[const.KEY_NUM_EPOCHS]
            iEpoch = 0
            minCost = 99999999999999
            minCostFinalization = 99999999999999
            finished = False

            # When to we display epochs stats
            nbStatusEpoch = math.ceil(current_num_epochs / 20)

            # intercept Ctrl-C
            self.interrupted = False
            import signal
            # signal.signal( signal.SIGINT, self.signal_handler )

            self.initializeDataset(sess, self.datasetTrn)

            # Start time
            tsStart = time.time()

            # time to make sure we trace something each N minuts
            tsTraceStart = tsStart

            # Do the training loop
            while (not self.interrupted and not finished
                   and (iEpoch <= current_num_epochs)):

                epoch_cost = 0.  # Defines a cost related to an epoch

                if (self.minibatch_size < 0):

                    # No mini-batch : do a gradient descent for whole data

                    iteration += 1

                    epoch_cost = self.runIteration(
                        iEpoch,
                        1,
                        sess,
                        self.datasetTrn.X,
                        self.datasetTrn.Y,
                        self.keep_prob,
                    )

                else:

                    # Minibatch mode, non handled by data source
                    m = self.dataInfo[
                        const.
                        KEY_TRN_X_SIZE]  # m : number of examples in the train set)
                    num_minibatches = math.ceil(
                        m / self.minibatch_size
                    )  # number of minibatches of size minibatch_size in the train set
                    seed = seed + 1

                    minibatches = self.random_mini_batches(
                        self.datasetTrn.X, self.datasetTrn.Y,
                        self.minibatch_size, seed)

                    iterationMinibatch = 0

                    for minibatch in minibatches:

                        iteration += 1
                        iterationMinibatch += 1

                        # Select a minibatch
                        (minibatch_X, minibatch_Y) = minibatch

                        minibatch_cost = self.runIteration(
                            sess, (minibatch_X, minibatch_Y), iteration,
                            num_minibatches, self.keep_prob)

                        epoch_cost += minibatch_cost / num_minibatches

                        if (print_cost and iteration == 0):
                            # Display iteration 0 to allow verify cost calculation accross machines
                            logger.info(
                                "Current cost epoch %i; iteration %i; %f" %
                                (iEpoch, iteration, epoch_cost))

                        # time to trace?
                        tsTraceNow = time.time()
                        tsTraceElapsed = tsTraceNow - tsTraceStart

                        # Each 60 seconds
                        if (tsTraceElapsed >= 60):

                            # Display iteration 0 to allow verify cost calculation accross machines
                            logger.info(
                                "Current cost epoch %i; iteration %i; %f" %
                                (iEpoch, iteration, epoch_cost))
                            # reset trace start
                            tsTraceStart = tsTraceNow

                if print_cost and iEpoch % nbStatusEpoch == 0:
                    logger.info("Cost after epoch %i; iteration %i; %f" %
                                (iEpoch, iteration, epoch_cost))
                    if (iEpoch != 0):

                        # Performance counters
                        curElapsedSeconds, curPerfIndex = self.getPerfCounters(
                            tsStart, iEpoch, self.datasetTrn.X.shape)
                        logger.info("  current: elapsedTime:",
                                    curElapsedSeconds, "perfIndex:",
                                    curPerfIndex)

                        #  calculate DEV accuracy
                        DEV_accuracy = self.accuracyEval(
                            (self.datasetDev.X, self.datasetDev.Y), "dev")
                        logger.info("  current: DEV accuracy: %f" %
                                    (DEV_accuracy))
                        DEV_accuracies.append(DEV_accuracy)

                if print_cost == True and iEpoch % 5 == 0:
                    costs.append(epoch_cost)

                # Record min cost
                minCost = min(minCost, epoch_cost)

                # Next epoch
                iEpoch += 1
                self.var_numEpoch.load(iEpoch)

                # Close to finish?
                if (not finalizationMode and (iEpoch > current_num_epochs)):
                    # Activate finalization mode
                    finalizationMode = True
                    # local overshoot?
                    if (epoch_cost > minCost):
                        # Yes, run some extra epochs
                        logger.warn(
                            "Local cost overshoot detected, adding maximum 100 epochs to leave local cost overshoot"
                        )
                        current_num_epochs += 100
                        minCostFinalization = minCost

                if (finalizationMode):
                    # Check overshoot is finished
                    if (epoch_cost <= minCostFinalization):
                        # finished
                        finished = True

            self.modelOptimizeEnd(sess)

            if (self.interrupted):
                logger.info("Training has been interrupted by Ctrl-C")
                logger.info("Store current epoch number '" + str(iEpoch) +
                            "' in run hyper parameters")
                # Get runs and hps
                run = db.getRun(conn, self.idRun)
                idRunHps = run["idHyperParams"]
                runHps = db.getHyperParams(conn, idRunHps)["hyperParameters"]
                # Modify num epochs
                runHps[const.KEY_NUM_EPOCHS] = iEpoch
                # update run
                db.updateRun(conn, self.idRun, runHps)

            # Final cost
            print("Parameters have been trained!")
            logger.info("Final cost:", epoch_cost)

            ## Elapsed (seconds)
            elapsedSeconds, perfIndex = self.getPerfCounters(
                tsStart, iEpoch, self.datasetTrn.X.shape)
            perfInfo = {}

            logger.info("Elapsed (s):", elapsedSeconds)
            logger.info("Perf index :", perfIndex)

            self.persistModel(sess, idRun)

            accuracyTrain = self.accuracyEval(
                (self.datasetTrn.X, self.datasetTrn.Y), "trn")
            print("Train Accuracy:", accuracyTrain)

            accuracyDev = self.accuracyEval(
                (self.datasetDev.X, self.datasetDev.Y), "dev")
            print("Dev Accuracy:", accuracyDev)

            if (show_plot):
                # plot the cost
                plt.plot(np.squeeze(costs))
                plt.ylabel('cost')
                plt.xlabel('iterations (per tens)')
                plt.title("Start learning rate =" +
                          str(self.start_learning_rate))
                plt.show()

                # plot the accuracies
                plt.plot(np.squeeze(DEV_accuracies))
                plt.ylabel('DEV accuracy')
                plt.xlabel('iterations (100)')
                plt.title("Start learning rate =" +
                          str(self.start_learning_rate))
                plt.show()

            ## Errors
            resultInfo = {}

            if (extractImageErrors):

                # Lists of OK for training
                oks_train = self.correctPredictionEval(
                    (self.datasetTrn.X, self.datasetTrn.Y))
                map1, map2 = self.statsExtractErrors("train",
                                                     dataset=self.datasetTrn,
                                                     oks=oks_train,
                                                     show_plot=show_plot)
                # Errors nb by data tag
                resultInfo[const.KEY_TRN_NB_ERROR_BY_TAG] = map1
                resultInfo[const.KEY_TRN_PC_ERROR_BY_TAG] = map2

                oks_dev = self.correctPredictionEval(
                    (self.datasetDev.X, self.datasetDev.Y))
                map1, map2 = self.statsExtractErrors("dev",
                                                     dataset=self.datasetDev,
                                                     oks=oks_dev,
                                                     show_plot=show_plot)
                # Errors nb by data tag
                resultInfo[const.KEY_DEV_NB_ERROR_BY_TAG] = map1
                resultInfo[const.KEY_DEV_PC_ERROR_BY_TAG] = map2

            # Update DB run after execution, add extra info
            db.updateRunAfter(conn,
                              idRun,
                              perf_info=perfInfo,
                              result_info=resultInfo,
                              perf_index=perfIndex,
                              elapsed_second=elapsedSeconds,
                              train_accuracy=accuracyTrain.astype(float),
                              dev_accuracy=accuracyDev.astype(float))

            return accuracyDev, accuracyTrain
示例#3
0
    def optimizeModel(
        self, conn, idRun,
        structure, hyperParams,
        print_cost = True, show_plot = True, extractImageErrors = True, isCalculateBestEpoch = False
    ):

        tf.reset_default_graph() # Forget the past
        tf.set_random_seed( 1 )  # Repeatable operations

        costs = []               # To keep track of the cost
        DEV_accuracies = []      # for DEV accuracy graph

        # Get hyper parameters from dico
        self.beta           = hyperParams[ const.KEY_BETA ]
        self.keep_prob      = hyperParams[ const.KEY_KEEP_PROB ]
        self.num_epochs     = hyperParams[ const.KEY_NUM_EPOCHS ]
        self.minibatch_size = hyperParams[ const.KEY_MINIBATCH_SIZE ]

        # Minibatch mode, non handled by data source
        m = self.dataInfo[ const.KEY_TRN_X_SIZE ]              # m : number of examples in the train set)
        self.numMinibatches = math.ceil( m / self.minibatch_size ) # number of minibatches of size minibatch_size in the train set

        self.start_learning_rate         = hyperParams[ const.KEY_START_LEARNING_RATE ]

        # Decay per epoch NB
        decayEpochNb = hyperParams[ const.KEY_LEARNING_RATE_DECAY_NB_EPOCH ]

        # Multiply by nb mini-batches by epoch to get decay by epoch
        self.learning_rate_decay_nb      = decayEpochNb * self.numMinibatches
        self.learning_rate_decay_percent = hyperParams[ const.KEY_LEARNING_RATE_DECAY_PERCENT ]

        self.useBatchNormalization = hyperParams[ const.KEY_USE_BATCH_NORMALIZATION ]

        if ( self.minibatch_size < 0 ) :
            raise ValueError( "Mini-batch size is required" )

        # Convert ( nbLines, dims... ) to ( None, dims... )
        X_shape = [ None ]
        X_shape.extend( self.dataInfo[ const.KEY_TRN_X_SHAPE ] )
        X_type = tf.float32

        X_real_shape = [ self.minibatch_size ]
        X_real_shape.extend( self.dataInfo[ const.KEY_TRN_X_SHAPE ] )

        Y_shape = [ None ]
        Y_shape.extend( self.dataInfo[ const.KEY_TRN_Y_SHAPE ] )
        Y_type = tf.float32

        # Init model
        self.modelInit( structure, X_shape, X_type, Y_shape, Y_type, training=True )

        # Prepare reader
        if ( self.datasetTrn.inMemory ) :
            # In memory readers
            # Convert ( nbLines, dims... ) to ( None, dims... )
            self.tfDatasetTrn = tf.data.Dataset.from_tensor_slices(
                (
                    self.datasetTrn.X,
                    self.datasetTrn.Y,
                )
            )

        else :

            # TF record file based reader
            self.tfDatasetTrn = tf.data.TFRecordDataset( self.datasetTrn.XY )

        # Shuffle data
        self.tfDatasetTrn = self.tfDatasetTrn.shuffle( buffer_size=100000, reshuffle_each_iteration=True, seed=1 )

        # Pre-fetch for performance
        self.tfDatasetTrn = self.tfDatasetTrn.prefetch( self.minibatch_size * 16 )

        # Data set, minibatch_size slices
        self.tfDatasetTrn = self.tfDatasetTrn.batch( self.minibatch_size )

        # Trn Data set, repeat num_epochs
        self.tfDatasetTrn = self.tfDatasetTrn.repeat( self.phTrnNumEpochs )

        # Prepare reader
        if ( self.datasetDev.inMemory ) :
            # In memory readers
            # Convert ( nbLines, dims... ) to ( None, dims... )
            self.tfDatasetDev = tf.data.Dataset.from_tensor_slices(
                (
                    self.datasetDev.X,
                    self.datasetDev.Y
                )
            )
        else :

            # TF record file based reader
            self.tfDatasetDev = tf.data.TFRecordDataset( self.datasetDev.XY )

        # Pre-fetch and, minibatch_size slices
        self.tfDatasetDev = self.tfDatasetDev.prefetch( self.minibatch_size * 16 ).batch( self.minibatch_size )

        trnIterator = self.tfDatasetTrn.make_initializable_iterator( shared_name="trnIterator" )
        devIterator = self.tfDatasetDev.make_initializable_iterator( shared_name="devIterator" )

        # Start the session to compute the tensorflow graph
        with self.getSession() as sess:

            self.initSessionVariables( sess )

            # initialise variables iterators.
            sess.run( tf.global_variables_initializer() )
            sess.run( [ trnIterator.initializer, devIterator.initializer ], { self.phTrnNumEpochs : self.num_epochs } )

            # The `Iterator.string_handle()` method returns a tensor that can be evaluated
            # and used to feed the `handle` placeholder.
            trnHandle = sess.run( trnIterator.string_handle() )
            devHandle = sess.run( devIterator.string_handle() )

            ## optimisation may overshoot locally
            ## To avoid returning an overshoot, we detect it and run extra epochs if needed
            finalizationMode = False
            current_num_epochs = hyperParams[ const.KEY_NUM_EPOCHS ]
            minCost = 99999999999999
            minCostFinalization = 99999999999999
            finished = False

            # intercept Ctrl-C
            self.interrupted = False
            import signal
            # signal.signal( signal.SIGINT, self.signal_handler )

            # Do the training loop
            iEpoch = 1
            minibatch_cost = 0
            epoch_cost = 0.                       # Defines a cost related to an epoch
            # current iteration
            iteration = 1

            # Nb status epoch : if we reach it, calculate DEV efficiency
            nbStatusEpoch = math.ceil( self.num_epochs / 20 )

            # Start time
            tsStart = time.time()

            # time to make sure we write epoch status each N seconds
            tsStatusEpochStart = tsStart
            secStatusEpoch = 120           # Status epoch each 120 seconds

            # time to make sure we trace something each N seconds
            tsTraceStart = tsStart
            secTrace = 60           #trace each 60 seconds

            # Best epoch values
            maxBestAccuracyDevEpoch = -1
            maxBestNbEpoch = -1

            # Start input enqueue threads.
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners( sess=sess, coord=coord )

            lastEpochCost = 0
            
            try :
                while ( not self.interrupted and not finished ) :

                    minibatch_cost = self.runIteration(
                        sess, trnHandle, self.keep_prob, iteration, self.numMinibatches
                    )

                    epoch_cost += minibatch_cost / self.numMinibatches

                    if ( print_cost and iteration == 0 ) :
                        # Display iteration 0 to allow verify cost calculation accross machines
                        logger.info(  "Current cost epoch {0}; iteration {1}; {2}".format( iEpoch, iteration, epoch_cost ) )

                    # Tracing
                    if ( print_cost and logger.isEnabledFor( logging.DEBUG ) ) :
                        logger.debug(  "Current cost epoch {0}; iteration {1}; {2}".format( iEpoch, iteration, epoch_cost ) )

                    # time to trace?
                    tsTraceNow = time.time()
                    tsTraceElapsed = tsTraceNow - tsTraceStart

                    # Each 60 seconds
                    if ( tsTraceElapsed >= secTrace ) :

                        # Display iteration 0 to allow verify cost calculation accross machines
                        logger.info(  "Current cost epoch {0}; iteration {1}; {2}".format( iEpoch, iteration, epoch_cost ) )
                        # reset trace start
                        tsTraceStart = tsTraceNow

                    # Current epoch finished?
                    if ( ( iteration % self.numMinibatches ) == 0 ) :

                        # time to status epoch?
                        tsEpochStatusNow = time.time()
                        tsEpochStatusElapsed = tsEpochStatusNow - tsStatusEpochStart

                        #print epoch cost
                        if print_cost and ( iteration != 0 ) and ( ( iEpoch % nbStatusEpoch ) == 0 or ( tsEpochStatusElapsed >= secStatusEpoch ) ) :

                            logger.info( "Cost after epoch {0}; iteration {1}; {2}".format( iEpoch, iteration, epoch_cost ) )

                            if ( iEpoch != 1 ) :

                                # Performance counters, for current batch, m data * nbStatus epochs
                                curElapsedSeconds, curPerfIndex = self.getPerfCounters( tsStart, iEpoch, X_real_shape, m * nbStatusEpoch )
                                logger.info( "  current: elapsedTime; {0}; perfIndex; {1:.2f}".format( curElapsedSeconds, curPerfIndex ) )

                                #  calculate DEV accuracy
                                # Rewind DEV iterator
                                sess.run( [ devIterator.initializer ] )
                                DEV_accuracy = self.accuracyEval( devHandle, "dev" )
                                logger.info( "  current: DEV accuracy: {:.3%}".format( DEV_accuracy ) )
                                DEV_accuracies.append( DEV_accuracy )

                                # Update best epoch var
                                if ( isCalculateBestEpoch and ( iEpoch > ( self.num_epochs / 2 ) ) ) :
                                    # max reached?
                                    if ( DEV_accuracy > maxBestAccuracyDevEpoch ) :
                                        maxBestAccuracyDevEpoch = DEV_accuracy
                                        maxBestNbEpoch = iEpoch

                            # Reset status epoch timer
                            tsStatusEpochStart = tsEpochStatusNow

                        # Store cost for graph
                        if print_cost == True and ( iteration != 0 ) and iEpoch % 5 == 0:
                            costs.append( epoch_cost )

                        # Record min cost
                        minCost = min( minCost, epoch_cost )

                        # epoch changed
                        iEpoch += 1
                        lastEpochCost = epoch_cost
                        epoch_cost = 0

                    # Close to finish?
#                     if ( not finalizationMode and ( iEpoch > current_num_epochs ) ) :
#                         # Activate finalization mode
#                         finalizationMode = True
#                         # local overshoot?
#                         if ( epoch_cost > minCost ) :
#                             # Yes, run some extra epochs
#                             logger.info( "WARNING: local cost overshoot detected, adding maximum 100 epochs to leave local cost overshoot" )
#                             current_num_epochs += 100
#                             minCostFinalization = minCost
#
#                     if ( finalizationMode ) :
#                         # Check overshoot is finished
#                         if ( epoch_cost <= minCostFinalization ) :
#                             # finished
#                             finished = True

                    iteration += 1

            except tf.errors.OutOfRangeError:
                # walk finished
                # decrement iteration and epoch that didn't append
                iteration -= 1
                iEpoch -= 1
                epoch_cost = lastEpochCost

            finally :
                # When done, ask the threads to stop.
                coord.request_stop()


            # Wait for threads to finish.
            coord.join( threads )
            self.modelOptimizeEnd( sess )

            if ( self.interrupted ) :
                logger.info( "Training has been interrupted by Ctrl-C" )
                logger.info( "Store current epoch number '" + str( iEpoch ) + "' in run hyper parameters" )
                # Get runs and hps
                run = db.getRun( conn, self.idRun )
                idRunHps = run[ "idHyperParams" ]
                runHps = db.getHyperParams( conn, idRunHps )[ "hyperParameters" ]
                # Modify num epochs
                runHps[ const.KEY_NUM_EPOCHS ] = iEpoch
                # update run
                db.updateRun( conn, self.idRun, runHps )

            # Final cost
            logger.info( "Parameters have been trained!")
            logger.info( "Final cost after epoch {0}; iteration {1}; {2}".format( iEpoch, iteration, epoch_cost ) )

            ## Elapsed (seconds), for whole data set * nb epochs
            elapsedSeconds, perfIndex = self.getPerfCounters( tsStart, iEpoch, X_real_shape, m * self.num_epochs )
            perfInfo = {}

            logger.info( "Elapsed (s): {0}".format( elapsedSeconds ) )
            logger.info( "Perf index : {0:.2f}".format( perfIndex ) )

            self.persistModel( sess, idRun )

            # Rewind data sets, 1 epoch for TRN data set
            sess.run( [ trnIterator.initializer, devIterator.initializer ], { self.phTrnNumEpochs : 1 } )

            accuracyTrain = self.accuracyEval( trnHandle, "trn" )
            logger.info(  "TRN Accuracy: {:.3%}".format( accuracyTrain ) )

            accuracyDev = self.accuracyEval( devHandle, "dev" )
            logger.info(  "DEV Accuracy: {:.3%}".format( accuracyDev ) )

            if ( isCalculateBestEpoch ) :
                logger.info(  "Best DEV nb epochs: {0}".format( maxBestNbEpoch ) )
                logger.info(  "Best DEV Accuracy : {:.3%}".format( maxBestAccuracyDevEpoch ) )

            if ( show_plot ) :
                # plot the cost
                plt.plot(np.squeeze(costs))
                plt.ylabel('cost')
                plt.xlabel('iterations (per tens)')
                plt.title("Start learning rate =" + str( self.start_learning_rate ) )
                plt.show()

                # plot the accuracies
                plt.plot( np.squeeze( DEV_accuracies ) )
                plt.ylabel('DEV accuracy')
                plt.xlabel('iterations (100)')
                plt.title("Start learning rate =" + str( self.start_learning_rate ) )
                plt.show()

            ## Errors
            resultInfo = {}

            if ( extractImageErrors ) :

                # Rewind data sets, 1 epoch for TRN data set
                sess.run( [ trnIterator.initializer, devIterator.initializer ], { self.phTrnNumEpochs : 1 } )

                # Lists of OK for training
                oks_train  = self.correctPredictionEval( trnHandle )
                map1, map2 = self.statsExtractErrors( "train", dataset = self.datasetTrn, oks = oks_train, show_plot=show_plot )
                # Errors nb by data tag
                resultInfo[ const.KEY_TRN_NB_ERROR_BY_TAG ] = map1
                resultInfo[ const.KEY_TRN_PC_ERROR_BY_TAG ] = map2

                oks_dev   = self.correctPredictionEval( devHandle )
                map1, map2 = self.statsExtractErrors( "dev", dataset = self.datasetDev, oks = oks_dev, show_plot=show_plot )
                # Errors nb by data tag
                resultInfo[ const.KEY_DEV_NB_ERROR_BY_TAG ] = map1
                resultInfo[ const.KEY_DEV_PC_ERROR_BY_TAG ] = map2

            # Update DB run after execution, add extra info
            db.updateRunAfter(
                conn, idRun,
                perf_info = perfInfo, result_info=resultInfo,
                perf_index=perfIndex,
                elapsed_second = elapsedSeconds,
                train_accuracy=accuracyTrain.astype( float ),
                dev_accuracy=accuracyDev.astype( float )
            )

            return accuracyDev, accuracyTrain, maxBestNbEpoch, maxBestAccuracyDevEpoch
示例#4
0
            # Get config hyper parameters
            hyperParams = db.getHyperParams( conn, idDataset, config[ "id" ] )

        elif ( buttonClicked == "Predict" ) :

            # hyper parameters depend on choice
            choiceHp = predictParams[ "choiceHyperParams" ]

            if ( choiceHp == 1 ) :

                # Last idRun
                idRun = db.getRunIdLast( conn, config[ "id" ] )

                # Config hyper params
                run = db.getRun( conn, idRun )
                hyperParams = db.getHyperParamsById( conn, run[ "idHyperParams" ] )

            elif ( choiceHp == 2 ) :

                # Get best hyper parameters
                ( hyperParams, _, idRun ) = db.getBestHyperParams( conn, idDataset, idConfig )

                # Check run structure and pixel size match with conf
                run = db.getRun( conn, idRun )

                runStructure = None
                if ( run[ "conf_saved_info" ] != None ) :
                    runStructure = run[ "conf_saved_info" ][ "structure" ]
                    # trim spaces
                    runStructure = runStructure.strip()
示例#5
0
def train( tune = True) :

    # hyper parameters
    hyperParams = {}

    # use tensorboard
    isUseTensorboard = False

    ## Init tensorflow multi-threading
    # When TF 1.8 available...
#     config = tf.ConfigProto()
#     config.intra_op_parallelism_threads = 16
#     config.inter_op_parallelism_threads = 16
#     tf.session( config=config )
    
    # system info
    systemInfo = getSystemInfo( tf.__version__ )
    
    ## Units of layers
#     structure                                   = [ 1 ]
#     hyperParams[ const.KEY_MINIBATCH_SIZE ]     = 64
#     hyperParams[ const.KEY_NUM_EPOCHS ]         = 100
#     hyperParams[ const.KEY_USE_WEIGHTS ]        = False
#     hyperParams[ const.KEY_START_LEARNING_RATE ]= 0.003
#     hyperParams[ const.KEY_BETA ]               = 0
#     hyperParams[ const.KEY_KEEP_PROB ]          = 1

    ## Units of layers
#     structure                                   = [ 50, 24, 1 ]
#     hyperParams[ const.KEY_MINIBATCH_SIZE ]     = 64
#     hyperParams[ const.KEY_NUM_EPOCHS ]         = 2000
#     hyperParams[ const.KEY_USE_WEIGHTS ]        = False
#     hyperParams[ const.KEY_START_LEARNING_RATE ]= 0.0001
#     # From tuning run id=42
#     hyperParams[ const.KEY_BETA ]               = 2.4233061084214308e-15
#     hyperParams[ const.KEY_KEEP_PROB ]          = 10.646631549280114

    ## Units of layers
    structure                                    = [ 100, 48, 1 ]
    hyperParams[ const.KEY_MINIBATCH_SIZE ]      = 64
    hyperParams[ const.KEY_NUM_EPOCHS ]          = 2500
    hyperParams[ const.KEY_USE_WEIGHTS ]         = False
    hyperParams[ const.KEY_START_LEARNING_RATE ] = 0.0001
    hyperParams[ const.KEY_BETA ]                = 0
    hyperParams[ const.KEY_KEEP_PROB ]           = 1

    if tune : 
        # Tune params
        beta_min = 0.000000000000001
        beta_max = 0.5
        
        keep_prob_min = 0.5
        keep_prob_max = 1
    
        nbTuning = 20
        tuning= {}
        
        maxAccuracyDev = -9999999999999
        maxIdRun = -1
    else :
        nbTuning = 1
        
    ## Units of layers
#     structure = [ 50, 24, 12, 1 ]
#     num_epochs = 1000
#     # Result from tuning
#     beta = 0
#     keep_prob = 1
#     learning_rate = 0.0001

    #structure = [ 100, 48, 1 ]
    # Result from tuning
    #beta = 1.6980624617370184e-15
    #keep_prob = 0.724123179663981

#     structure = [ 25, 12, 1 ]
#     # Result from tuning
#     beta = 6.531654400821318e-14
#     keep_prob = 0.8213956561201344
#     learning_rate = 0.0001
#     num_epochs = 1500

    # Loading the dataset
    X_train_orig, Y_train_orig, PATH_train, TAG_train, WEIGHT_train, \
    X_dev_orig  , Y_dev_orig, PATH_dev, TAG_dev= \
        load_dataset( hyperParams[ const.KEY_USE_WEIGHTS ] )

    # Flatten the training and test images
    X_train_flatten = X_train_orig.reshape( X_train_orig.shape[0], -1 ).T
    X_dev_flatten = X_dev_orig.reshape( X_dev_orig.shape[0], -1 ).T
    # Normalize image vectors
    X_train = X_train_flatten / 255.
    X_dev   = X_dev_flatten / 255.

    Y_train = Y_train_orig
    Y_dev = Y_dev_orig

    print( "Structure:", structure )
    print()
    print ("number of training examples = " + str(X_train.shape[1]))
    print ("number of test examples = " + str(X_dev.shape[1]))
    print ("X_train shape: " + str(X_train.shape))
    print ("Y_train shape: " + str(Y_train.shape))
    print ("X_test shape: " + str(X_dev.shape))
    print ("Y_test shape: " + str(Y_dev.shape))
    print ()
    print ("Start Learning rate :", str( hyperParams[ const.KEY_START_LEARNING_RATE ] ) )
    print ("Num epoch           :", str( hyperParams[ const.KEY_NUM_EPOCHS ] ) )
    print ("Minibatch size      :", str( hyperParams[ const.KEY_MINIBATCH_SIZE ] ) )
    print ("Beta                :", str( hyperParams[ const.KEY_BETA ] ) )
    print ("keep_prob           :", str( hyperParams[ const.KEY_KEEP_PROB ] ) )
    print ("isLoadWeights       :", hyperParams[ const.KEY_USE_WEIGHTS ] )
    if ( hyperParams[ const.KEY_USE_WEIGHTS ] ) :
        print ( "  Weights_train shape :", WEIGHT_train.shape )

    dataInfo = {
        const.KEY_TRN_SIZE  : str( X_train.shape[1] ),
        const.KEY_DEV_SIZE       : str( X_dev.shape[1] ),
        const.KEY_TRN_SHAPE : str( X_train.shape ),
        const.KEY_DEV_SHAPE      : str( X_dev.shape ),
        const.KEY_TRN_Y_SIZE         : str( Y_dev.shape[1] ),
        const.KEY_TRN_Y_SHAPE        : str( Y_dev.shape ),
        const.KEY_DEV_Y_SIZE         : str( Y_dev.shape[1] ),
        const.KEY_DEV_Y_SHAPE        : str( Y_dev.shape ),
    }

    #
#    tuning( num_epochs = num_epochs, learning_rate = learning_rate )

    print()
    comment = input( "Run comment: " )

    # Start time
    tsGlobalStart = time.time()
   
    # Init DB
    with db.initDb( APP_KEY, DB_DIR ) as conn:

        for j in range( 1, nbTuning + 1 ) :
            
            if tune:
                print( "*****************************" )
                print( "Tune round", str( j ), "/", str( nbTuning ) )
                print( "*****************************" )
            
                # calculate beta
                logBeta = random.uniform( math.log10( beta_min ), math.log10( beta_max ) )
                beta = math.pow( 10, logBeta )
                print( "Beta = " + str( beta ))
            
                # calculate keep_prob
                logKeep_prob = random.uniform( math.log10( keep_prob_min ), math.log10( keep_prob_max ) )
                keep_prob = math.pow( 10, logKeep_prob )
                print( "keep_prob = " + str( keep_prob ))
                
                # update hyper params
                hyperParams[ const.KEY_BETA         ] = beta
                hyperParams[ const.KEY_KEEP_PROB    ] = keep_prob
        
            # Create run
            idRun = db.createRun( conn )
    
            # Update run before calling model
            db.updateRunBefore(
                conn, idRun,
                structure=structure, comment=comment,
                system_info=systemInfo, hyper_params=hyperParams, data_info=dataInfo
            )
    
            # Run model and update DB run with extra info
            _, accuracyDev, accuracyTrain = model(
                conn, idRun, structure,
                X_train, Y_train, PATH_train, TAG_train, WEIGHT_train,
                X_dev, Y_dev, PATH_dev, TAG_dev,
                X_train_orig, X_dev_orig,
                hyperParams,
                isTensorboard = isUseTensorboard,
                show_plot = not tune, extractImageErrors = not tune
            )
    
            # Print run
            run = db.getRun( conn, idRun )
            print( "Run stored in DB:", str( run ) )

            if tune :
                # Store results
                tuning[ j ] = { 
                    "beta": beta, "keep_prob": keep_prob, 
                    "accuracyDev": accuracyDev, "accuracyTrain": accuracyTrain
                }
            
                # Max
                if ( accuracyDev > maxAccuracyDev ) :
                    maxAccuracyDev = accuracyDev
                    maxHyperParams = tuning[ j ]
                    maxIdRun = idRun
                    
                # print max
                print( "Max DEV accuracy:", maxAccuracyDev )
                print( "Max hyper params:" )
                print( maxHyperParams )
            
                
        if tune :
            # Print tuning
            print( "Tuning:" , tuning )
            print()
            print( "Max DEV accuracy      :", maxAccuracyDev )
            print( "Max hyper params idRun:", maxIdRun )

    # Start time
    tsGlobalEnd = time.time()   
    globalElapsedSeconds = int( round( tsGlobalEnd - tsGlobalStart ) )

    print( "Finished in", globalElapsedSeconds, "seconds" )