def loopNetworksDownload(storage):
    secret = sys.argv[sys.argv.index("--secret") + 1]
    run = sys.argv[sys.argv.index("--run") + 1]
    commandHost = sys.argv[sys.argv.index("--command") + 1]

    while True:

        #1 get list of existing networks
        #2 for every network in the list, check if the file exists, if not download the network into the file
        #3 store the new list of networks

        networks = requestJson(commandHost + "/api/networks/list/" + run,
                               secret)

        for network in networks:
            spath = os.path.join(storage, network["id"])
            if not os.path.exists(spath):
                netbytes = requestBytes(
                    commandHost + "/api/networks/download/" + network["id"],
                    secret)
                storeFileUnderPath(spath, netbytes)
                logMsg("Downloaded a new network to %s" % spath)

        writeJsonFile(os.path.join(storage, "networks.json"), networks)

        time.sleep(2)
示例#2
0
def measureFrametime(configPath, idx, run):
    setproctitle.setproctitle("x0_fe_worker_" + str(idx))
    startTime = time.monotonic()

    core = loadMlConfig(configPath)
    setLoggingEnabled(True)

    worker = core.worker(recursive=True)

    worker.initSelfplay(run)

    times = []
    exs = []
    ns = []

    for _ in range(BATCH_COUNT):
        tx, ex, n = worker.playBatch()
        times.append(tx)
        exs.append(ex)
        ns.append(n)

    while time.monotonic() - startTime < MIN_TIME:
        tx, ex, n = worker.playBatch()
        times.append(tx)
        exs.append(ex)
        ns.append(n)

    if not None in exs:
        logMsg("Avg number of mcts nodes used by playBatch(): ", np.mean(exs))

    return np.mean(times), np.sum(ns)
示例#3
0
    def main(self):
        setLoggingEnabled(True)

        self.pullThread = threading.Thread(target=self.pollWork)
        self.pullThread.daemon = True
        self.pullThread.start()

        self.pushThread = threading.Thread(target=self.pushResults)
        self.pushThread.daemon = True
        self.pushThread.start()

        printNoWork = True

        while True:
            while len(self.workQueue) == 0:
                if printNoWork:
                    logMsg("I have no work!")
                    printNoWork = False
                time.sleep(0.05)

            printNoWork = True

            self.policy = self.policyUpdater.update(self.policy)

            nextWork = self.workQueue[0]
            rpack = self.doEvaluation(nextWork)

            self.resultsQueue.append(rpack)
            del self.workQueue[0]
示例#4
0
def postJson(url, secret, data, timeout=30, retries=999999, getResponse=False):
    cnt = 0
    while True:
        cnt += 1
        try:
            dj = json.dumps(data)
            response = requests.post(url=url,
                                     data=dj,
                                     headers={
                                         "secret":
                                         secret,
                                         "Content-Type":
                                         "application/json;charset=utf-8"
                                     })
            response.raise_for_status()
            if getResponse:
                return response.json()
            else:
                return
        except Exception as error:
            if cnt > retries:
                raise error
            else:
                logMsg("Failed postJson %s will retry soon" % url, error)
                time.sleep(timeout // 4 + random.random() * 20)
 def openPackage(self, idName):
     try:
         data = readFileUnderPath(os.path.join(self.storageDirectory, idName))
         return decodeFromBson(data)
     except Exception as error:
         logMsg("Could not open downloaded package" + idName + "!", error)
         return []
示例#6
0
    def doEvaluation(self, nextWork):
        startTime = time.monotonic()
        games = nextWork["work"]
        workId = nextWork["id"]

        self.lastIterationCompleted = time.monotonic()
        iteratedPolicy = self.policyIterator.iteratePolicy(self.policy, games)
        self.iterateTimes.append(time.monotonic() - self.lastIterationCompleted)

        if len(self.iterateTimes) > 20:
            self.iterateTimes = self.iterateTimes[-20:]

        result = dict()
        result["iterations"] = iteratedPolicy
        if self.initialPolicyID != self.policy.getUUID() and not self.isFrameTimeTest:
            result["network"] = self.policy.getUUID()
        else:
            result["network"] = None
        result["workerName"] = self.workerName

        rpack = dict()
        rpack["id"] = workId
        rpack["data"] = result

        logMsg("Completed work package %s in %.2fs using network %s. Average completion time is now %.2f" % (workId, (time.monotonic() - startTime), result["network"], np.mean(self.iterateTimes)))
        return rpack
    def run(self):
        logMsg("Starting states downloader, storing files in", self.storageDirectory)
        while self.running:
            try:
                # first download the current file describing the states on the server
                list = requestJson(self.commandHost + "/api/state/list/" + self.runId, self.secret)

                sumNewStates = 0
                newEntries = []
                for remoteEntry in list:
                    if not (remoteEntry["id"] in self.downloadedStatesObject):
                        newEntries.append(remoteEntry)
                        sumNewStates += remoteEntry["packageSize"]
                
                # download newest ones first, they are the most interesting
                newEntries.sort(key = lambda x: x["creation"], reverse=True)

                if len(newEntries) > 0:
                    #logMsg("Found %i new state packages with %i states on the server!" % (len(newEntries), sumNewStates))

                    for newEntry in newEntries:
                        statesData = requestBytes(self.commandHost + "/api/state/download/" + newEntry["id"], self.secret)
                        storeFileUnderPath(os.path.join(self.storageDirectory, newEntry["id"]), statesData)
                        self.downloadedStatesObject[newEntry["id"]] = newEntry
                        self.downloadedStatesHistory.append(newEntry)
                        self.downloadedStatesHistory.sort(key = lambda x: x["creation"], reverse=True)
                        self.history = self.downloadedStatesHistory.copy()
                        self.numStatesAvailable += newEntry["packageSize"]
                        self.store()

            except Exception as error:
                logMsg("Could not download states, will try again soon", error)
                time.sleep(10)
            
            time.sleep(5)
def getRunConfig(runId, commandHost, secret):
    """
    returns a path to a temporary file, which contains the run config.
    Use "with getRunConfig as configFile":
    """

    runConfig = ""
    while True:
        try:
            runConfig = requestJson(commandHost + "/api/runs/" + runId,
                                    secret)["config"]
            break
        except Exception as error:
            logMsg(
                "Could not get run configuration for run, will try again soon",
                error)
            time.sleep(15)

    ff = tempfile.NamedTemporaryFile(suffix=".yaml", mode="w+")
    ff.write(runConfig)
    ff.flush()

    logMsg("Using tempfile for configuration:", ff.name)

    return ff
示例#9
0
    def main(self):
        startEval = time.monotonic()
        logMsg("Begin test")

        testInput = list(zip(self.states, self.histories))
        movePredictions, resultPredictions = list(
            zip(*self.playerUnderTest.getMoves(testInput)))

        correctMoves = sum(
            map(lambda x: 1 if x[0] in x[1] else 0,
                zip(movePredictions, self.solutions)))
        correctGameResults = sum(
            map(lambda x: 1 if x[0] == x[1] else 0,
                zip(resultPredictions, self.gresults)))

        moveAccuracy = 100.0 * (correctMoves / len(self.states))
        resultAccuracy = 100.0 * (correctGameResults / len(self.states))

        evalTime = time.monotonic() - startEval

        logMsg(
            "Test on %i examples took %.2f seconds, perfect play accuracies: for moves %.2f%% , for result: %.2f%%"
            % (len(self.states), evalTime, moveAccuracy, resultAccuracy))

        return moveAccuracy, resultAccuracy
示例#10
0
    def __init__(self, storage):
        logMsg("Using DistributedNetworkUpdater2!")
        self.lastNetworkCheck = -999
        self.checkInterval = 4
        self.storage = storage

        hasArgs = ("--secret" in sys.argv) and ("--run"
                                                in sys.argv) and ("--command"
                                                                  in sys.argv)

        if not hasArgs:
            raise Exception(
                "You need to provide arguments for the distributed worker: --secret <server password>, --run <uuid> and --command <command server host>!"
            )

        self.secret = sys.argv[sys.argv.index("--secret") + 1]
        self.run = sys.argv[sys.argv.index("--run") + 1]
        self.commandHost = sys.argv[sys.argv.index("--command") + 1]

        self.downloader = subprocess.Popen([
            "python", "-m", "core.mains.networks_downloader", "--path",
            self.storage, "--secret", self.secret, "--command",
            self.commandHost, "--run", self.run
        ],
                                           preexec_fn=set_pdeathsig(
                                               signal.SIGTERM))
示例#11
0
    def update(self, policy):
        global reportedData
        global needsFitting

        if not (self.loadedPolicyBytes is None):
            policy.load(self.loadedPolicyBytes)
            self.loadedPolicyBytes = None
            logMsg("Loaded stored policy with UUID %s!" % policy.getUUID())

        if needsFitting:
            prepared = [policy.prepareExample(d) for d in reportedData]

            for _ in range(self.trainEpochs):
                policy.fit(policy.packageExamplesBatch(prepared))
            needsFitting = False
            self.storeState(policy)

            if self.policyIterator is not None and self.moveDecider is not None and self.batchSize is not None and self.datasetFile is not None and self.initialGameState is not None:
                testPlayer = PolicyIteratorPlayer(policy, self.policyIterator, NoopPolicyUpdater(), self.moveDecider, self.batchSize);
                policyTester = DatasetPolicyTester(testPlayer, self.datasetFile, self.initialGameState, "shell", self.batchSize)
                policyTester.main()
            else:
                logMsg("Single Process Policy Updater is not configured to evaluate!")

        return policy
示例#12
0
    def update(self, policy):
        if self.loadedUUID is None or policy.getUUID() != self.loadedUUID:
            policy.load(self.policyBytes)
            self.loadedUUID = policy.getUUID()
            logMsg("Loaded stored policy with UUID %s!" % (policy.getUUID()))

        return policy        
示例#13
0
def postBytes(url,
              secret,
              data,
              timeout=30,
              retries=999999,
              expectResponse=False):
    cnt = 0
    while True:
        cnt += 1
        try:
            response = requests.post(url,
                                     data=data,
                                     headers={"secret": secret},
                                     timeout=timeout)
            response.raise_for_status()
            if expectResponse:
                return response.json()
            else:
                return
        except Exception as error:
            if cnt > retries:
                raise error
            else:
                logMsg("Failed postBytes %s will retry soon" % url, error)
                time.sleep(timeout // 4 + random.random() * 20)
示例#14
0
    def getMatchHistory(self, pool, runId):
        if not self.loadedMatchHistory:
            self.loadedMatchHistory = True

            try:
                con = pool.getconn()
                cursor = con.cursor()

                cursor.execute(
                    "SELECT player1, player2, result, ratingChange, creation from league_matches where run = %s",
                    (runId, ))
                rows = cursor.fetchall()

                for row in rows:
                    self.matchHistory.append((row[0], row[1], row[2], row[3],
                                              int(row[4].timestamp() * 1000)))

            finally:
                if cursor:
                    cursor.close()
                pool.putconn(con)

            logMsg("Loaded a history of %i matches for run %s" %
                   (len(self.matchHistory), runId))

            self.matchHistory.sort(key=lambda x: x[4])

            self.recalcPlayerStats()

        return self.matchHistory
示例#15
0
    def test_ExampleGames(self):
        """
        provided examples should play out as expected
        """
        prntVerbose = ('-v' in sys.argv) or ('--verbose' in sys.argv)
        setLoggingEnabled(prntVerbose)

        def playGameByMoves(moves):
            game = self.subject
            if prntVerbose:
                self.printGameWithTensor(game)
            for move in moves:
                if game.hasEnded():
                    break
                game = game.playMove(move)
                if prntVerbose:
                    self.printGameWithTensor(game)
            return game

        examples = self.getExampleGameSequences()

        logMsg("\nPlaying", len(examples), "examples")

        for idx, (moves, expectedTurns, expectedWinner) in enumerate(examples):
            logMsg("Playing example game", idx)
            resultState = playGameByMoves(moves)
            self.assertEqual(resultState.getTurn(), expectedTurns)
            self.assertTrue(resultState.hasEnded())
            self.assertEqual(resultState.getWinnerNumber(), expectedWinner)
示例#16
0
    def __init__(self, initialState, policy, policyIterator, policyUpdater, isFrameTimeTest = False):
        self.initialState = initialState
        self.policy = policy
        self.policyIterator = policyIterator
        self.policyUpdater = policyUpdater
        self.workQueue = []
        self.resultsQueue = []

        self.initialPolicyID = self.policy.getUUID()

        self.isFrameTimeTest = isFrameTimeTest

        self.command = sys.argv[sys.argv.index("--command")+1].replace("https", "http").replace(":8042", "")
        self.command += ":4242"

        # can be used in case the eval server is somewhere else. That is the case in frametime evaluation.
        if "--evalserver" in sys.argv:
            self.command = sys.argv[sys.argv.index("--evalserver")+1]

        self.workerName = "unknown"
        if "--eval" in sys.argv:
            self.workerName = sys.argv[sys.argv.index("--eval")+1]

        logMsg("Started evaluation worker, talking to eval server on %s" % self.command)

        self.iterateTimes = [1]

        self.lastIterationCompleted = time.monotonic()

        self.printNoWork = True
示例#17
0
def mlConfigBasedMain(configPath):
    setLoggingEnabled(True)
    registerClasses()
    logMsg("Running", *sys.argv)

    config = mlconfig.load(configPath)

    return config
示例#18
0
 def __init__(self, explorationPlyCount, minProp=-1):
     """
     @paramm explorationPlyCount: Until which ply (turn) to explore randomly
     """
     logMsg("Creating TemperatureMoveDecider(explorationPlyCount=%i)" %
            explorationPlyCount)
     self.explorationPlyCount = explorationPlyCount
     self.minProp = minProp
示例#19
0
 def on_get(self, req, resp, runId):
     self.lastDataRequest = time.monotonic()
     if not runId in self.cached:
         self.cached[runId] = self.queryPlayerList(runId)
     logMsg("players_proxy responding with %i players" %
            len(self.cached[runId]))
     resp.media = self.cached[runId]
     resp.status = falcon.HTTP_200
 def reportEvaluationNumber(self):
     drep = dict()
     drep["evals"] = self.numPositionEvalsRequested
     drep["iteration"] = len(self.seenNetworks) - 1
     postJson(self.commandHost + "/api/evalscnt/" + self.run, self.secret,
              drep)
     logMsg("Iteration %i used %i evaluation requests!" %
            (drep["evals"], drep["iteration"]))
     self.numPositionEvalsRequested = 0
示例#21
0
    def playBatch(self):
        """
        playBatch should play one move on every open game.
        Since the number of expansions is dynamic here, this means playBatch should
        play moves until len(self.iterators) number of moves have been tracked, even if they
        happen to be played on the same game.
        """

        moveTimeNs = 0

        movesPlayed = self.prevMoves

        iterationsDone = 0

        if not self.playedBatch:
            self.playedBatch = True
            iterationsDone += self.stepIteratorsOnce()

        while movesPlayed < len(self.iterators):

            self.initSelfplay(self.runId)

            moveThinkStart = time.monotonic_ns()
            mustMoves = self.getMustMove()
            moveTimeNs += time.monotonic_ns() - moveThinkStart

            for idx, mustMove in enumerate(mustMoves):
                if mustMove:
                    # the player is out of time, or wants to make a move
                    # -> play a move here for that index
                    moveTimeNs += self.playMove(idx)
                    movesPlayed += 1

            iterStartTime = time.monotonic_ns()

            iterationsDone += self.stepIteratorsOnce()

            moveTimeNs += time.monotonic_ns() - iterStartTime

            #self.debugPrintState(moveTimeNs)

        numMovesInBatch = (movesPlayed - self.prevMoves)

        avgIterationsPerMove = iterationsDone / numMovesInBatch

        moveTimeNs /= numMovesInBatch

        self.prevMoves = movesPlayed - len(self.iterators)

        moveAvgMs = moveTimeNs / 1000000.0

        logMsg(
            "played a batch of %i moves with %.2f avg ms per move and %i avg nodes per move"
            % (numMovesInBatch, moveAvgMs, avgIterationsPerMove))

        return moveAvgMs, avgIterationsPerMove, numMovesInBatch
示例#22
0
    def parseMove(self, gameState, moveStr):

        result = int(moveStr) - 1
        if result in gameState.getLegalMoves():
            return result
        else:
            logMsg("That move is illegal")
            return -1

        return -1
    def playOpenGames(self):
        # play a batch until all positions in it are not known in the cache. If a game ends, report the game and replace it with a new one.
        # once all positions in a batch need an evaluation, request it

        addedGames = False

        for gidx in range(len(self.activeGames)):
            agame, ahistory = self.activeGames[gidx]

            while True:
                self.activeGames[gidx] = [agame, ahistory]
                if agame in self.pendingGames:
                    # cannot continue this game at this time, it is already pending for an evaluation.
                    break
                elif agame.hasEnded():
                    self.finalizeGame(agame, ahistory)

                    if self.initialState in self.cache or self.initialState in self.pendingGames:
                        retryPoints = self.pickRetryPoints(ahistory, agame)
                        self.retryTurns += list(
                            map(lambda x: x[0].getTurn(), retryPoints))
                        if len(retryPoints) > 0:
                            self.foundRetry += 1
                            agame, ahistory = retryPoints[0]
                            for anotherPoint in retryPoints[1:]:
                                self.activeGames.append(anotherPoint)
                                addedGames = True

                        else:
                            self.foundNoRetry += 1
                            #logMsg("No retry points were generated!")
                            agame = self.initialState
                            ahistory = []
                    else:
                        logMsg(
                            "Initial state not evaluated in current iteration, starting a new game from the start!"
                        )
                        agame = self.initialState
                        ahistory = []

                elif agame in self.cache:
                    iteratedPolicy = self.cache[agame]
                    ahistory.append([agame, iteratedPolicy])
                    moveToPlay = self.moveDecider.decideMove(
                        agame, iteratedPolicy[0], iteratedPolicy[1])
                    self.playedMoves.add((agame, moveToPlay))
                    agame = agame.playMove(moveToPlay)
                else:
                    self.requestEvals.add(agame)
                    break

        if addedGames:
            logMsg("There are now %i active games!" % len(self.activeGames))

        self.requestEvaluations()
    def checkForNewIteration(self, evalResults):
        for uuid in evalResults:
            results, networkSeen, workerName = evalResults[uuid]
            if networkSeen is not None and not (networkSeen
                                                in self.seenNetworks):
                self.seenNetworks.add(networkSeen)
                self.currentNetwork = networkSeen
                self.cache = dict()
                logMsg("A new iteration has begun, cleared the cache!")

                self.reportEvaluationNumber()
示例#25
0
    def loadNewestNetwork(self, policy):
        networkList = self.getNetworkList()
        if len(networkList) > 0:
            networkList.sort(key=lambda x: x["creation"], reverse=True)
            logMsg("Continue training of an existing network", networkList[0])
            networkId = networkList[0]["id"]

            networkData = self.downloadNetwork(networkId)

            policy.load(networkData)

            logMsg("Network %s loaded" % policy.getUUID())
示例#26
0
 def __init__(self, trainEpochs, state, policyIterator = None, moveDecider = None, batchSize = None, datasetFile = None, initialGameState = None):
     logMsg("Initialized SingleProcessUpdater trainEpochs=%i, state=%s" % (trainEpochs, state))
     self.trainEpochs = trainEpochs
     self.state = state
     self.statePath = os.path.join(self.state, "policy.npy")
     self.loadedPolicyBytes = None
     self.policyIterator = policyIterator
     self.moveDecider = moveDecider
     self.batchSize = batchSize
     self.initialGameState = initialGameState
     self.datasetFile = datasetFile
     self.loadState()
示例#27
0
    def test_hashProperties(self):
        """
        When playing random games there should be less than 20% hash collisions and 
        at most 16 states that share a single hash in the generated states.
        """
        prntVerbose = ('-v' in sys.argv) or ('--verbose' in sys.argv)
        setLoggingEnabled(prntVerbose)

        numTestGames = 250
        states = map(lambda x: self.playRandomGame(x), range(numTestGames))
        statesByHash = dict()
        uniqueStates = 0
        oCnt = 0
        allStates = []
        worstLen = 0
        worstHashValue = 0
        for ss in states:
            for s in ss:
                allStates.append(s)
                oCnt += 1
                h = hash(s)
                if not h in statesByHash:
                    statesByHash[h] = [s]
                    uniqueStates += 1
                else:
                    isKnownState = len(
                        list(filter(lambda x: x == s, statesByHash[h]))) > 0
                    if not isKnownState:
                        statesByHash[h].append(s)
                        if len(statesByHash[h]) > worstLen:
                            worstLen = len(statesByHash[h])
                            worstHashValue = h
                        uniqueStates += 1

        for aIdx in range(len(allStates)):
            for bIdx in range(aIdx + 1, len(allStates)):
                a = allStates[aIdx]
                b = allStates[bIdx]
                if a == b:
                    self.assertEqual(hash(a), hash(b),
                                     "Equality must imply equal hash values")

        uniqueHashes = len(statesByHash)
        dupes = uniqueStates - uniqueHashes
        result = dupes / float(uniqueStates)

        logMsg("\nFound ", uniqueHashes, "unique hashs for", uniqueStates,
               "unique states. Overall ", oCnt, "moves played! Worst hash has",
               worstLen, "collisions, it is the hash number", worstHashValue)
        self.assertTrue(uniqueHashes <= uniqueStates)
        self.assertTrue(result < 0.2)
        self.assertTrue(worstLen < 17)
示例#28
0
    def pushResults(self):
        logMsg("Started poll results thread")

        while True:
            while len(self.resultsQueue) == 0:
                time.sleep(0.2)
            
            nextResult = self.resultsQueue[0]
            del self.resultsQueue[0]

            resultId = nextResult["id"]
            resultData = encodeToBson(nextResult["data"])
            postBytes(self.command + "/checkin/" + resultId, "", resultData)
    def receiveGameEvals(self):
        evalResults = dict()
        while len(evalResults) == 0:
            evalResults = self.evalAccess.pollEvaluationResults()
            self.checkForNewIteration(evalResults)
            self.checkForRejectEvals(evalResults)

        for uuid in evalResults:
            if uuid in self.pendingEvals:
                evalGames = self.pendingEvals[uuid]
                del self.pendingEvals[uuid]
                self.addResultsToCache(evalGames, evalResults[uuid])
            else:
                logMsg("Received evaluation of unknown UUID!", uuid)
示例#30
0
    def prepareWindow(self, downloader, currentIteration):
        logMsg("Need to prepare a window for iteration", currentIteration)

        windowSize = currentIteration * self.nextIterationStatesCount
        if windowSize > self.maxSize:
            windowSize = self.maxSize

        printed = False
        while self.countAvailableForIteration(
                downloader, currentIteration) < self.nextIterationStatesCount:
            if not printed:
                printed = True
                logMsg("Waiting for more data to train next network!")
            time.sleep(0.5)

        logMsg("Have enough data to train next network now!")

        newPolicyStates = self.getNextStatesForIteration(
            downloader, currentIteration)
        olderPolicyStates = self.getStatesBeforeIteration(
            downloader, currentIteration,
            self.maxSize - self.nextIterationStatesCount)

        trainingWindow = newPolicyStates + olderPolicyStates
        logMsg(
            "Using a window of %i states with %i states representing the new policy"
            % (len(trainingWindow), len(newPolicyStates)))
        return trainingWindow