def fitAndEvaluateModel(model, currentModelVersion): global CURRENT_CONTENDER_VERSION # This should already be done, but just to be safe. There's no danger in overriding this, # since every other worker should already have the latest weights and won't update since we don't increase the # currentModelVersion model.save_weights(Hyperparameters.CURRENT_MODEL_WEIGHTS_PATH, overwrite=True) t1 = time.time() # Change to the contender version before training if (os.path.isfile(Hyperparameters.CONTENDER_MODEL_WEIGHTS_PATH)): print("Loading old contender model...") model.load_weights(Hyperparameters.CONTENDER_MODEL_WEIGHTS_PATH) # In the paper the sample training data from their training buffer, here we just run through all our current samples #inStates, valueLabels, policyLabels = MemoryBuffers.getAllTrainingData() inStates, valueLabels, policyLabels = MemoryBuffers.getDistinctTrainingData( ) model.fit( np.array(inStates), [np.array(valueLabels), np.array(policyLabels)], epochs=Hyperparameters.EPOCHS_PER_TRAINING, batch_size=Hyperparameters.MINI_BATCH_SIZE, verbose=2, shuffle=True) print("Training finished: {} ms".format(time.time() - t1)) return currentModelVersion + 1
def benchmark(): import RootDir print("Loading training data...") MemoryBuffers.loadOldTrainingDataFromDisk() absPath = RootDir.getAbsolutePath(input("ModelName: ")) gpuSettings = input("Gpu Settings: ") t1 = time.time() dStates, dEvals, dPolics = MemoryBuffers.getDistinctTrainingData() print("Data pre-processing finished:", time.time() - t1) useMultipleModels = MachineSpecificSettings.AMOUNT_OF_GPUS > 1 _fitModelProc(absPath, useMultipleModels, gpuSettings, 0, dStates, dEvals, dPolics, t1) print("Full training finished:", time.time() - t1)
def fitModel(modelAbsPath, gpuSettings, modelGeneration, startTime): import numpy as np print("Stored data points: ", MemoryBuffers.getAmountOfStoredDataPoints()) t1 = time.time() inStates, valueLabels, policyLabels = MemoryBuffers.getDistinctTrainingData( ) s = np.array(inStates) v = np.array(valueLabels) p = np.array(policyLabels) dataProcessingTime = time.time() - t1 print("Data preprocessing finished: {}".format(dataProcessingTime)) if (MachineSpecificSettings.REMOTE_WORKER_AND_TRAINER and dataProcessingTime < 5): print("Waiting for GPU") time.sleep(5 - dataProcessingTime) multipleGPUs = MachineSpecificSettings.AMOUNT_OF_GPUS > 1 proc = mp.Process(target=_fitModelProc, args=(modelAbsPath, multipleGPUs, gpuSettings, modelGeneration, s, v, p, startTime)) proc.start() proc.join()
def loopingTrainer(port, gpuSettings): connection, modelAbsPath = _init(port) import os, StartInit StartInit.init() print("Starting Trainer GPU-Settings: {}".format(gpuSettings)) os.environ['CUDA_VISIBLE_DEVICES'] = gpuSettings from Main.AlphaZero import NeuralNetworks import numpy as np import keras MachineSpecificSettings.setupHyperparameters() singleModel = keras.models.load_model(modelAbsPath) # In our experiments we ended up using only a single GPU for training. Since a to big batch-size gave weird results if (MachineSpecificSettings.AMOUNT_OF_GPUS > 1): trainingModel = NeuralNetworks.createMultipleGPUModel(singleModel) else: trainingModel = singleModel # Training Loop while (True): status, data = connection.readMessage() print("Got msg:", status) if (status == STATUS_TRAIN_DATA ): # TODO: Create an informative else statement t1 = time.time( ) # Only used for displaying elapsed time to the user modelVersion, states, values, policies, weights = data # Setup settings for this training turn keras.backend.set_value(trainingModel.optimizer.lr, _getLearningRate(modelVersion)) MemoryBuffers.CURRENT_MODEL_VERSION = modelVersion MemoryBuffers.addLabelsToReplayBuffer(states, values, policies) # Get all the data contained in the Replay Buffers. With pre-calculated average of similair states inStates, valueLabels, policyLabels = MemoryBuffers.getDistinctTrainingData( ) s = np.array(inStates) v = np.array(valueLabels) p = np.array(policyLabels) # Run the supervised-learning dataProcessingTime = time.time() - t1 print("Data preprocessing finished: {}".format(dataProcessingTime)) print("Using LR:", keras.backend.get_value(trainingModel.optimizer.lr)) trainingModel.fit([np.array(s), np.array(p)], np.array(v), epochs=Hyperparameters.EPOCHS_PER_TRAINING, batch_size=Hyperparameters.MINI_BATCH_SIZE, verbose=2, shuffle=True) singleModel.save(modelAbsPath, overwrite=True) singleModel.save(Hyperparameters.MODELS_SAVE_PATH + str(modelVersion + 1)) trainedModelAsBytes = _readModelFromDisk() print("Training finished:", time.time() - t1) connection.sendMessage("Finished", (trainedModelAsBytes, )) MemoryBuffers.storeTrainingDataToDisk()