Пример #1
0
def run_experiment(args):
    parameters = Parameters.processArguments(args, __doc__)

    #if the nnFile is a directory, check for a previous experiment run in it and start from there
    #load its parameters, append to its evalresults file, open its largest network file
    #If its none, create a experiment directory. create a results file, save parameters, save network files here.

    experimentDirectory = parameters.rom + "_" + time.strftime(
        "%d-%m-%Y-%H-%M") + "/"
    resultsFileName = experimentDirectory + "results.csv"
    startingEpoch = 1
    if parameters.nnFile is None or parameters.nnFile.endswith(".pkl"):
        #Create your experiment directory, results file, save parameters
        if not os.path.isdir(experimentDirectory):
            os.mkdir(experimentDirectory)

        resultsFile = open(resultsFileName, "a")
        resultsFile.write("Epoch,\tAverageReward,\tMean Q Value\n")
        resultsFile.close()

        parametersFile = open(experimentDirectory + "parameters.pkl", 'wb', -1)
        cPickle.dump(parameters, parametersFile)
        parametersFile.close()

    if parameters.nnFile is not None and os.path.isdir(parameters.nnFile):
        #Found a experiment directory
        if not parameters.nnFile.endswith("/"):
            parameters.nnFile += "/"

        experimentDirectory = parameters.nnFile
        resultsFileName = experimentDirectory + "results.csv"

        if os.path.exists(experimentDirectory + "parameters.pkl"):
            parametersFile = open(experimentDirectory + "parameters.pkl", 'rb')
            parameters = cPickle.load(parametersFile)
            parametersFile.close()
        else:
            parametersFile = open(experimentDirectory + "parameters.pkl", 'wb',
                                  -1)
            cPickle.dump(parameters, parametersFile)
            parametersFile.close()

        contents = os.listdir(experimentDirectory)
        networkFiles = []
        for handle in contents:
            if handle.startswith("network") and handle.endswith(".pkl"):
                networkFiles.append(handle)

        if len(networkFiles) == 0:
            #Found a premature experiment, didnt finish a single training epoch
            parameters.nnFile = None
        else:
            #Found a previous experiments network files, now find the highest epoch number
            highestNNFile = networkFiles[0]
            highestNetworkEpochNumber = int(
                highestNNFile[highestNNFile.index("_") +
                              1:highestNNFile.index(".")])
            for networkFile in networkFiles:
                networkEpochNumber = int(networkFile[networkFile.index("_") +
                                                     1:networkFile.index(".")])
                if networkEpochNumber > highestNetworkEpochNumber:
                    highestNNFile = networkFile
                    highestNetworkEpochNumber = networkEpochNumber

            startingEpoch = highestNetworkEpochNumber + 1
            #dont use full exploration, its not a good way to fill the replay memory when we already have a decent policy
            if startingEpoch > 1:
                parameters.epsilonStart = parameters.epsilonEnd

            parameters.nnFile = experimentDirectory + highestNNFile
            print "Loaded experiment: " + experimentDirectory + "\nLoaded network file:" + highestNNFile

    sys.setrecursionlimit(10000)
    ale = ALEInterface()

    Environment.initializeALEParameters(ale, parameters.seed,
                                        parameters.frameSkip,
                                        parameters.repeatActionProbability,
                                        parameters.displayScreen)
    ale.loadROM(parameters.fullRomPath)
    minimalActions = ale.getMinimalActionSet()

    agent = DQNAgent.DQNAgent(
        minimalActions, parameters.croppedHeight, parameters.croppedWidth,
        parameters.batchSize, parameters.phiLength, parameters.nnFile,
        parameters.loadWeightsFlipped, parameters.updateFrequency,
        parameters.replayMemorySize, parameters.replayStartSize,
        parameters.networkType, parameters.updateRule,
        parameters.batchAccumulator, parameters.networkUpdateDelay,
        parameters.discountRate, parameters.learningRate, parameters.rmsRho,
        parameters.rmsEpsilon, parameters.momentum, parameters.epsilonStart,
        parameters.epsilonEnd, parameters.epsilonDecaySteps,
        parameters.evalEpsilon, parameters.useSARSAUpdate,
        parameters.kReturnLength)

    for epoch in xrange(startingEpoch, parameters.epochs + 1):
        agent.startTrainingEpoch(epoch)
        runTrainingEpoch(ale, agent, epoch, parameters.stepsPerEpoch)
        agent.endTrainingEpoch(epoch)

        networkFileName = experimentDirectory + "network_" + str(
            epoch) + ".pkl"
        DeepNetworks.saveNetworkParams(agent.network.qValueNetwork,
                                       networkFileName)

        if parameters.stepsPerTest > 0 and epoch % parameters.evaluationFrequency == 0:
            agent.startEvaluationEpoch(epoch)
            avgReward = runEvaluationEpoch(ale, agent, epoch,
                                           parameters.stepsPerTest)
            holdoutQVals = agent.computeHoldoutQValues(3200)

            resultsFile = open(resultsFileName, 'a')
            resultsFile.write(
                str(epoch) + ",\t" + str(round(avgReward, 4)) + ",\t\t" +
                str(round(holdoutQVals, 4)) + "\n")
            resultsFile.close()

            agent.endEvaluationEpoch(epoch)

    agent.agentCleanup()
Пример #2
0
def run_experiment(args):
    parameters = Parameters.processArguments(args, __doc__)

    #if the nnFile is a directory, check for a previous experiment run in it and start from there
    #load its parameters, append to its evalresults file, open its largest network file
    #If its none, create a experiment directory. create a results file, save parameters, save network files here. 

    experimentDirectory = parameters.rom + "_" + time.strftime("%d-%m-%Y-%H-%M") +"/"
    resultsFileName = experimentDirectory + "results.csv"
    startingEpoch = 1
    if parameters.nnFile is None or parameters.nnFile.endswith(".pkl"):
        #Create your experiment directory, results file, save parameters
        if not os.path.isdir(experimentDirectory):
            os.mkdir(experimentDirectory)

        resultsFile = open(resultsFileName, "a")
        resultsFile.write("Epoch,\tAverageReward,\tMean Q Value\n")
        resultsFile.close()

        parametersFile = open(experimentDirectory + "parameters.pkl" , 'wb', -1)
        cPickle.dump(parameters,parametersFile)
        parametersFile.close()


    if parameters.nnFile is not None and os.path.isdir(parameters.nnFile):
        #Found a experiment directory
        if not parameters.nnFile.endswith("/"):
            parameters.nnFile += "/"

        experimentDirectory = parameters.nnFile
        resultsFileName = experimentDirectory + "results.csv"

        if os.path.exists(experimentDirectory + "parameters.pkl"):
            parametersFile = open(experimentDirectory + "parameters.pkl" , 'rb')
            parameters = cPickle.load(parametersFile)
            parametersFile.close()
        else:
            parametersFile = open(experimentDirectory + "parameters.pkl" , 'wb', -1)
            cPickle.dump(parameters,parametersFile)
            parametersFile.close()

        contents = os.listdir(experimentDirectory)
        networkFiles = []
        for handle in contents:
            if handle.startswith("network") and handle.endswith(".pkl"):
                networkFiles.append(handle)

        if len(networkFiles) == 0:
            #Found a premature experiment, didnt finish a single training epoch
            parameters.nnFile = None
        else:
            #Found a previous experiments network files, now find the highest epoch number
            highestNNFile = networkFiles[0]
            highestNetworkEpochNumber = int(highestNNFile[highestNNFile.index("_") + 1 : highestNNFile.index(".")])
            for networkFile in networkFiles:
                networkEpochNumber =  int(networkFile[networkFile.index("_") + 1 : networkFile.index(".")])
                if networkEpochNumber > highestNetworkEpochNumber:
                    highestNNFile = networkFile
                    highestNetworkEpochNumber = networkEpochNumber

            startingEpoch = highestNetworkEpochNumber + 1
            #dont use full exploration, its not a good way to fill the replay memory when we already have a decent policy
            if startingEpoch > 1:
                parameters.epsilonStart = parameters.epsilonEnd

            parameters.nnFile = experimentDirectory + highestNNFile
            print "Loaded experiment: " + experimentDirectory + "\nLoaded network file:" + highestNNFile


    sys.setrecursionlimit(10000)
    ale = ALEInterface()

    Environment.initializeALEParameters(ale, parameters.seed, parameters.frameSkip, parameters.repeatActionProbability, parameters.displayScreen)
    ale.loadROM(parameters.fullRomPath)
    minimalActions = ale.getMinimalActionSet()


    agent = DQNAgent.DQNAgent(minimalActions, parameters.croppedHeight, parameters.croppedWidth, 
                parameters.batchSize, 
                parameters.phiLength,
                parameters.nnFile, 
                parameters.loadWeightsFlipped, 
                parameters.updateFrequency, 
                parameters.replayMemorySize, 
                parameters.replayStartSize,
                parameters.networkType, 
                parameters.updateRule, 
                parameters.batchAccumulator, 
                parameters.networkUpdateDelay,
                parameters.discountRate, 
                parameters.learningRate, 
                parameters.rmsRho, 
                parameters.rmsEpsilon, 
                parameters.momentum,
                parameters.epsilonStart, 
                parameters.epsilonEnd, 
                parameters.epsilonDecaySteps,
                parameters.evalEpsilon,
                parameters.useSARSAUpdate,
                parameters.kReturnLength)



    for epoch in xrange(startingEpoch, parameters.epochs + 1):
        agent.startTrainingEpoch(epoch)
        runTrainingEpoch(ale, agent, epoch, parameters.stepsPerEpoch)
        agent.endTrainingEpoch(epoch)

        networkFileName = experimentDirectory + "network_" + str(epoch) + ".pkl"
        DeepNetworks.saveNetworkParams(agent.network.qValueNetwork, networkFileName)

        if parameters.stepsPerTest > 0 and epoch % parameters.evaluationFrequency == 0:
            agent.startEvaluationEpoch(epoch)
            avgReward = runEvaluationEpoch(ale, agent, epoch, parameters.stepsPerTest)
            holdoutQVals = agent.computeHoldoutQValues(3200)

            resultsFile = open(resultsFileName, 'a')
            resultsFile.write(str(epoch) + ",\t" + str(round(avgReward, 4)) + ",\t\t" + str(round(holdoutQVals, 4)) + "\n")
            resultsFile.close()

            agent.endEvaluationEpoch(epoch)

    agent.agentCleanup()
Пример #3
0
def run_experiment(args):
    parameters = Parameters.processArguments(args, __doc__)

    #if the nnFile is a directory, check for a previous experiment run in it and start from there
    #load its parameters, append to its evalresults file, open its largest network file
    #If its none, create a experiment directory. create a results file, save parameters, save network files here. 

    experimentDirectory = parameters.rom + "_" + time.strftime("%d-%m-%Y-%H-%M") +"/"
    resultsFileName = experimentDirectory + "results.csv"
    startingEpoch = 0
    if parameters.nnFile is None or parameters.nnFile.endswith(".pkl"):
        #Create your experiment directory, results file, save parameters
        if not os.path.isdir(experimentDirectory):
            os.mkdir(experimentDirectory)

        resultsFile = open(resultsFileName, "a")
        resultsFile.write("Epoch,\tAverageReward,\tMean Q Value\n")
        resultsFile.close()

        parametersFile = open(experimentDirectory + "parameters.pkl" , 'wb', -1)
        cPickle.dump(parameters,parametersFile)
        parametersFile.close()


    if parameters.nnFile is not None and os.path.isdir(parameters.nnFile):
        #Found a experiment directory
        if not parameters.nnFile.endswith("/"):
            parameters.nnFile += "/"

        experimentDirectory = parameters.nnFile
        resultsFileName = experimentDirectory + "results.csv"

        if os.path.exists(experimentDirectory + "parameters.pkl"):
            parametersFile = open(experimentDirectory + "parameters.pkl" , 'rb')
            parameters = cPickle.load(parametersFile)
            parametersFile.close()
        else:
            parametersFile = open(experimentDirectory + "parameters.pkl" , 'wb', -1)
            cPickle.dump(parameters,parametersFile)
            parametersFile.close()

        contents = os.listdir(experimentDirectory)
        networkFiles = []
        for handle in contents:
            if handle.startswith("network") and handle.endswith(".pkl"):
                networkFiles.append(handle)

        if len(networkFiles) == 0:
            #Found a premature experiment, didnt finish a single training epoch
            parameters.nnFile = None
        else:
            #Found a previous experiments network files, now find the highest epoch number
            highestNNFile = networkFiles[0]
            highestNetworkEpochNumber = int(highestNNFile[highestNNFile.index("_") + 1 : highestNNFile.index(".")])
            for networkFile in networkFiles:
                networkEpochNumber =  int(networkFile[networkFile.index("_") + 1 : networkFile.index(".")])
                if networkEpochNumber > highestNetworkEpochNumber:
                    highestNNFile = networkFile
                    highestNetworkEpochNumber = networkEpochNumber

            startingEpoch = highestNetworkEpochNumber + 1
            #dont use full exploration, its not a good way to fill the replay memory when we already have a decent policy
            if startingEpoch > 4:
                parameters.epsilonStart = parameters.epsilonEnd

            parameters.nnFile = experimentDirectory + highestNNFile
            print "Loaded experiment: " + experimentDirectory + "\nLoaded network file:" + highestNNFile

    
    sys.setrecursionlimit(10000)
    ale = ALEInterface()

    Environment.initializeALEParameters(ale, parameters.seed, parameters.frameSkip, parameters.repeatActionProbability, parameters.displayScreen)

    # ale.loadROM(parameters.fullRomPath)

    # minimalActions = ale.getMinimalActionSet()

    # difficulties = ale.getAvailableDifficulties()
    # modes = ale.getAvailableModes()

    # maxNumFlavors = len(difficulties) * len(modes)

    # difficulties = createFlavorList(parameters.difficultyString, len(difficulties))
    # modes = createFlavorList(parameters.modeString, len(modes))

    # transferTaskModule = TransferTaskModule.TransferTaskModule(difficulties, modes)


    transferTaskModule = TransferTaskModule.TransferTaskModule(ale, parameters.roms, parameters.difficultyString, parameters.modeString, parameters.taskBatchFlag)
    numActionsToUse = transferTaskModule.getNumTotalActions()
    print "Number of total tasks:" + str(transferTaskModule.getNumTasks()) + " across " + str(transferTaskModule.getNumGames()) + " games."
    print "Actions List:" + str(transferTaskModule.getTotalActionsList())
    # print "Num difficulties: " + str(len(difficulties)) + " num modes: " + str(len(modes)) + " numtasks: " + str(transferTaskModule.getNumTasks())
    # print "Modes: " + str(modes)
    # print "Difficulties: " + str(difficulties)

    numTransferTasks = transferTaskModule.getNumTasks()

    if (parameters.reduceEpochLengthByNumFlavors):
        parameters.stepsPerEpoch = int(parameters.stepsPerEpoch / numTransferTasks)

    agent = DQTNAgent.DQTNAgent(transferTaskModule.getTotalActionsList(), parameters.croppedHeight, parameters.croppedWidth, 
                parameters.batchSize, 
                parameters.phiLength,
                parameters.nnFile, 
                parameters.loadWeightsFlipped, 
                parameters.updateFrequency, 
                parameters.replayMemorySize, 
                parameters.replayStartSize,
                parameters.networkType, 
                parameters.updateRule, 
                parameters.batchAccumulator, 
                parameters.networkUpdateDelay,
                transferTaskModule,
                parameters.transferExperimentType,
                numTransferTasks,
                parameters.discountRate, 
                parameters.learningRate, 
                parameters.rmsRho, 
                parameters.rmsEpsilon, 
                parameters.momentum,
                parameters.epsilonStart, 
                parameters.epsilonEnd, 
                parameters.epsilonDecaySteps,
                parameters.evalEpsilon,
                parameters.useSARSAUpdate,
                parameters.kReturnLength,
                parameters.deathEndsEpisode)



    for epoch in xrange(startingEpoch, parameters.epochs + 1):
        agent.startTrainingEpoch(epoch)
        runTrainingEpoch(ale, agent, epoch, parameters.stepsPerEpoch, transferTaskModule, parameters.frameSkip, parameters.maxNoActions)
        agent.endTrainingEpoch(epoch)

        networkFileName = experimentDirectory + "network_" + str(epoch) + ".pkl"
        DeepNetworks.saveNetworkParams(agent.network.qValueNetwork, networkFileName)

        print "Total number of samples seen per task: "
        print str(agent.trainingMemory.totalTaskSampleCount)

        if parameters.stepsPerTest > 0 and epoch % parameters.evaluationFrequency == 0:
            agent.startEvaluationEpoch(epoch)
            avgRewardPerTask = runEvaluationEpoch(ale, agent, epoch, parameters.stepsPerTest, transferTaskModule, parameters.frameSkip, parameters.maxNoActions)
            holdoutQVals = agent.computeHoldoutQValues(parameters.numHoldoutQValues)

            resultsFile = open(resultsFileName, 'a')
            resultsFile.write(str(epoch) + ",\t")
            resultsString = ""

            for avgReward in avgRewardPerTask:
                resultsString += str(round(avgReward, 4)) + ",\t"

            resultsFile.write(resultsString)
            resultsFile.write("\t" + str([round(x, 4) for x in holdoutQVals]) + "\n")
            resultsFile.close()

            agent.endEvaluationEpoch(epoch)

    agent.agentCleanup()
Пример #4
0
def run_experiment(args):
    parameters = Parameters.processArguments(args, __doc__)

    #if the nnFile is a directory, check for a previous experiment run in it and start from there
    #load its parameters, append to its evalresults file, open its largest network file
    #If its none, create a experiment directory. create a results file, save parameters, save network files here.

    experimentDirectory = parameters.rom + "_" + time.strftime(
        "%d-%m-%Y-%H-%M") + "/"
    resultsFileName = experimentDirectory + "results.csv"
    startingEpoch = 0
    if parameters.nnFile is None or parameters.nnFile.endswith(".pkl"):
        #Create your experiment directory, results file, save parameters
        if not os.path.isdir(experimentDirectory):
            os.mkdir(experimentDirectory)

        resultsFile = open(resultsFileName, "a")
        resultsFile.write("Epoch,\tAverageReward,\tMean Q Value\n")
        resultsFile.close()

        parametersFile = open(experimentDirectory + "parameters.pkl", 'wb', -1)
        cPickle.dump(parameters, parametersFile)
        parametersFile.close()

    if parameters.nnFile is not None and os.path.isdir(parameters.nnFile):
        #Found a experiment directory
        if not parameters.nnFile.endswith("/"):
            parameters.nnFile += "/"

        experimentDirectory = parameters.nnFile
        resultsFileName = experimentDirectory + "results.csv"

        if os.path.exists(experimentDirectory + "parameters.pkl"):
            parametersFile = open(experimentDirectory + "parameters.pkl", 'rb')
            parameters = cPickle.load(parametersFile)
            parametersFile.close()
        else:
            parametersFile = open(experimentDirectory + "parameters.pkl", 'wb',
                                  -1)
            cPickle.dump(parameters, parametersFile)
            parametersFile.close()

        contents = os.listdir(experimentDirectory)
        networkFiles = []
        for handle in contents:
            if handle.startswith("network") and handle.endswith(".pkl"):
                networkFiles.append(handle)

        if len(networkFiles) == 0:
            #Found a premature experiment, didnt finish a single training epoch
            parameters.nnFile = None
        else:
            #Found a previous experiments network files, now find the highest epoch number
            highestNNFile = networkFiles[0]
            highestNetworkEpochNumber = int(
                highestNNFile[highestNNFile.index("_") +
                              1:highestNNFile.index(".")])
            for networkFile in networkFiles:
                networkEpochNumber = int(networkFile[networkFile.index("_") +
                                                     1:networkFile.index(".")])
                if networkEpochNumber > highestNetworkEpochNumber:
                    highestNNFile = networkFile
                    highestNetworkEpochNumber = networkEpochNumber

            startingEpoch = highestNetworkEpochNumber + 1
            #dont use full exploration, its not a good way to fill the replay memory when we already have a decent policy
            if startingEpoch > 4:
                parameters.epsilonStart = parameters.epsilonEnd

            parameters.nnFile = experimentDirectory + highestNNFile
            print "Loaded experiment: " + experimentDirectory + "\nLoaded network file:" + highestNNFile

    sys.setrecursionlimit(10000)
    ale = ALEInterface()

    Environment.initializeALEParameters(ale, parameters.seed,
                                        parameters.frameSkip,
                                        parameters.repeatActionProbability,
                                        parameters.displayScreen)

    # ale.loadROM(parameters.fullRomPath)

    # minimalActions = ale.getMinimalActionSet()

    # difficulties = ale.getAvailableDifficulties()
    # modes = ale.getAvailableModes()

    # maxNumFlavors = len(difficulties) * len(modes)

    # difficulties = createFlavorList(parameters.difficultyString, len(difficulties))
    # modes = createFlavorList(parameters.modeString, len(modes))

    # transferTaskModule = TransferTaskModule.TransferTaskModule(difficulties, modes)

    transferTaskModule = TransferTaskModule.TransferTaskModule(
        ale, parameters.roms, parameters.difficultyString,
        parameters.modeString, parameters.taskBatchFlag)
    numActionsToUse = transferTaskModule.getNumTotalActions()
    print "Number of total tasks:" + str(
        transferTaskModule.getNumTasks()) + " across " + str(
            transferTaskModule.getNumGames()) + " games."
    print "Actions List:" + str(transferTaskModule.getTotalActionsList())
    # print "Num difficulties: " + str(len(difficulties)) + " num modes: " + str(len(modes)) + " numtasks: " + str(transferTaskModule.getNumTasks())
    # print "Modes: " + str(modes)
    # print "Difficulties: " + str(difficulties)

    numTransferTasks = transferTaskModule.getNumTasks()

    if (parameters.reduceEpochLengthByNumFlavors):
        parameters.stepsPerEpoch = int(parameters.stepsPerEpoch /
                                       numTransferTasks)

    agent = DQTNAgent.DQTNAgent(
        transferTaskModule.getTotalActionsList(), parameters.croppedHeight,
        parameters.croppedWidth, parameters.batchSize, parameters.phiLength,
        parameters.nnFile, parameters.loadWeightsFlipped,
        parameters.updateFrequency, parameters.replayMemorySize,
        parameters.replayStartSize, parameters.networkType,
        parameters.updateRule, parameters.batchAccumulator,
        parameters.networkUpdateDelay, transferTaskModule,
        parameters.transferExperimentType, numTransferTasks,
        parameters.discountRate, parameters.learningRate, parameters.rmsRho,
        parameters.rmsEpsilon, parameters.momentum, parameters.epsilonStart,
        parameters.epsilonEnd, parameters.epsilonDecaySteps,
        parameters.evalEpsilon, parameters.useSARSAUpdate,
        parameters.kReturnLength, parameters.deathEndsEpisode)

    for epoch in xrange(startingEpoch, parameters.epochs + 1):
        agent.startTrainingEpoch(epoch)
        runTrainingEpoch(ale, agent, epoch, parameters.stepsPerEpoch,
                         transferTaskModule, parameters.frameSkip,
                         parameters.maxNoActions)
        agent.endTrainingEpoch(epoch)

        networkFileName = experimentDirectory + "network_" + str(
            epoch) + ".pkl"
        DeepNetworks.saveNetworkParams(agent.network.qValueNetwork,
                                       networkFileName)

        print "Total number of samples seen per task: "
        print str(agent.trainingMemory.totalTaskSampleCount)

        if parameters.stepsPerTest > 0 and epoch % parameters.evaluationFrequency == 0:
            agent.startEvaluationEpoch(epoch)
            avgRewardPerTask = runEvaluationEpoch(ale, agent, epoch,
                                                  parameters.stepsPerTest,
                                                  transferTaskModule,
                                                  parameters.frameSkip,
                                                  parameters.maxNoActions)
            holdoutQVals = agent.computeHoldoutQValues(
                parameters.numHoldoutQValues)

            resultsFile = open(resultsFileName, 'a')
            resultsFile.write(str(epoch) + ",\t")
            resultsString = ""

            for avgReward in avgRewardPerTask:
                resultsString += str(round(avgReward, 4)) + ",\t"

            resultsFile.write(resultsString)
            resultsFile.write("\t" + str([round(x, 4)
                                          for x in holdoutQVals]) + "\n")
            resultsFile.close()

            agent.endEvaluationEpoch(epoch)

    agent.agentCleanup()