def loopNetworksDownload(storage): secret = sys.argv[sys.argv.index("--secret") + 1] run = sys.argv[sys.argv.index("--run") + 1] commandHost = sys.argv[sys.argv.index("--command") + 1] while True: #1 get list of existing networks #2 for every network in the list, check if the file exists, if not download the network into the file #3 store the new list of networks networks = requestJson(commandHost + "/api/networks/list/" + run, secret) for network in networks: spath = os.path.join(storage, network["id"]) if not os.path.exists(spath): netbytes = requestBytes( commandHost + "/api/networks/download/" + network["id"], secret) storeFileUnderPath(spath, netbytes) logMsg("Downloaded a new network to %s" % spath) writeJsonFile(os.path.join(storage, "networks.json"), networks) time.sleep(2)
def measureFrametime(configPath, idx, run): setproctitle.setproctitle("x0_fe_worker_" + str(idx)) startTime = time.monotonic() core = loadMlConfig(configPath) setLoggingEnabled(True) worker = core.worker(recursive=True) worker.initSelfplay(run) times = [] exs = [] ns = [] for _ in range(BATCH_COUNT): tx, ex, n = worker.playBatch() times.append(tx) exs.append(ex) ns.append(n) while time.monotonic() - startTime < MIN_TIME: tx, ex, n = worker.playBatch() times.append(tx) exs.append(ex) ns.append(n) if not None in exs: logMsg("Avg number of mcts nodes used by playBatch(): ", np.mean(exs)) return np.mean(times), np.sum(ns)
def main(self): setLoggingEnabled(True) self.pullThread = threading.Thread(target=self.pollWork) self.pullThread.daemon = True self.pullThread.start() self.pushThread = threading.Thread(target=self.pushResults) self.pushThread.daemon = True self.pushThread.start() printNoWork = True while True: while len(self.workQueue) == 0: if printNoWork: logMsg("I have no work!") printNoWork = False time.sleep(0.05) printNoWork = True self.policy = self.policyUpdater.update(self.policy) nextWork = self.workQueue[0] rpack = self.doEvaluation(nextWork) self.resultsQueue.append(rpack) del self.workQueue[0]
def postJson(url, secret, data, timeout=30, retries=999999, getResponse=False): cnt = 0 while True: cnt += 1 try: dj = json.dumps(data) response = requests.post(url=url, data=dj, headers={ "secret": secret, "Content-Type": "application/json;charset=utf-8" }) response.raise_for_status() if getResponse: return response.json() else: return except Exception as error: if cnt > retries: raise error else: logMsg("Failed postJson %s will retry soon" % url, error) time.sleep(timeout // 4 + random.random() * 20)
def openPackage(self, idName): try: data = readFileUnderPath(os.path.join(self.storageDirectory, idName)) return decodeFromBson(data) except Exception as error: logMsg("Could not open downloaded package" + idName + "!", error) return []
def doEvaluation(self, nextWork): startTime = time.monotonic() games = nextWork["work"] workId = nextWork["id"] self.lastIterationCompleted = time.monotonic() iteratedPolicy = self.policyIterator.iteratePolicy(self.policy, games) self.iterateTimes.append(time.monotonic() - self.lastIterationCompleted) if len(self.iterateTimes) > 20: self.iterateTimes = self.iterateTimes[-20:] result = dict() result["iterations"] = iteratedPolicy if self.initialPolicyID != self.policy.getUUID() and not self.isFrameTimeTest: result["network"] = self.policy.getUUID() else: result["network"] = None result["workerName"] = self.workerName rpack = dict() rpack["id"] = workId rpack["data"] = result logMsg("Completed work package %s in %.2fs using network %s. Average completion time is now %.2f" % (workId, (time.monotonic() - startTime), result["network"], np.mean(self.iterateTimes))) return rpack
def run(self): logMsg("Starting states downloader, storing files in", self.storageDirectory) while self.running: try: # first download the current file describing the states on the server list = requestJson(self.commandHost + "/api/state/list/" + self.runId, self.secret) sumNewStates = 0 newEntries = [] for remoteEntry in list: if not (remoteEntry["id"] in self.downloadedStatesObject): newEntries.append(remoteEntry) sumNewStates += remoteEntry["packageSize"] # download newest ones first, they are the most interesting newEntries.sort(key = lambda x: x["creation"], reverse=True) if len(newEntries) > 0: #logMsg("Found %i new state packages with %i states on the server!" % (len(newEntries), sumNewStates)) for newEntry in newEntries: statesData = requestBytes(self.commandHost + "/api/state/download/" + newEntry["id"], self.secret) storeFileUnderPath(os.path.join(self.storageDirectory, newEntry["id"]), statesData) self.downloadedStatesObject[newEntry["id"]] = newEntry self.downloadedStatesHistory.append(newEntry) self.downloadedStatesHistory.sort(key = lambda x: x["creation"], reverse=True) self.history = self.downloadedStatesHistory.copy() self.numStatesAvailable += newEntry["packageSize"] self.store() except Exception as error: logMsg("Could not download states, will try again soon", error) time.sleep(10) time.sleep(5)
def getRunConfig(runId, commandHost, secret): """ returns a path to a temporary file, which contains the run config. Use "with getRunConfig as configFile": """ runConfig = "" while True: try: runConfig = requestJson(commandHost + "/api/runs/" + runId, secret)["config"] break except Exception as error: logMsg( "Could not get run configuration for run, will try again soon", error) time.sleep(15) ff = tempfile.NamedTemporaryFile(suffix=".yaml", mode="w+") ff.write(runConfig) ff.flush() logMsg("Using tempfile for configuration:", ff.name) return ff
def main(self): startEval = time.monotonic() logMsg("Begin test") testInput = list(zip(self.states, self.histories)) movePredictions, resultPredictions = list( zip(*self.playerUnderTest.getMoves(testInput))) correctMoves = sum( map(lambda x: 1 if x[0] in x[1] else 0, zip(movePredictions, self.solutions))) correctGameResults = sum( map(lambda x: 1 if x[0] == x[1] else 0, zip(resultPredictions, self.gresults))) moveAccuracy = 100.0 * (correctMoves / len(self.states)) resultAccuracy = 100.0 * (correctGameResults / len(self.states)) evalTime = time.monotonic() - startEval logMsg( "Test on %i examples took %.2f seconds, perfect play accuracies: for moves %.2f%% , for result: %.2f%%" % (len(self.states), evalTime, moveAccuracy, resultAccuracy)) return moveAccuracy, resultAccuracy
def __init__(self, storage): logMsg("Using DistributedNetworkUpdater2!") self.lastNetworkCheck = -999 self.checkInterval = 4 self.storage = storage hasArgs = ("--secret" in sys.argv) and ("--run" in sys.argv) and ("--command" in sys.argv) if not hasArgs: raise Exception( "You need to provide arguments for the distributed worker: --secret <server password>, --run <uuid> and --command <command server host>!" ) self.secret = sys.argv[sys.argv.index("--secret") + 1] self.run = sys.argv[sys.argv.index("--run") + 1] self.commandHost = sys.argv[sys.argv.index("--command") + 1] self.downloader = subprocess.Popen([ "python", "-m", "core.mains.networks_downloader", "--path", self.storage, "--secret", self.secret, "--command", self.commandHost, "--run", self.run ], preexec_fn=set_pdeathsig( signal.SIGTERM))
def update(self, policy): global reportedData global needsFitting if not (self.loadedPolicyBytes is None): policy.load(self.loadedPolicyBytes) self.loadedPolicyBytes = None logMsg("Loaded stored policy with UUID %s!" % policy.getUUID()) if needsFitting: prepared = [policy.prepareExample(d) for d in reportedData] for _ in range(self.trainEpochs): policy.fit(policy.packageExamplesBatch(prepared)) needsFitting = False self.storeState(policy) if self.policyIterator is not None and self.moveDecider is not None and self.batchSize is not None and self.datasetFile is not None and self.initialGameState is not None: testPlayer = PolicyIteratorPlayer(policy, self.policyIterator, NoopPolicyUpdater(), self.moveDecider, self.batchSize); policyTester = DatasetPolicyTester(testPlayer, self.datasetFile, self.initialGameState, "shell", self.batchSize) policyTester.main() else: logMsg("Single Process Policy Updater is not configured to evaluate!") return policy
def update(self, policy): if self.loadedUUID is None or policy.getUUID() != self.loadedUUID: policy.load(self.policyBytes) self.loadedUUID = policy.getUUID() logMsg("Loaded stored policy with UUID %s!" % (policy.getUUID())) return policy
def postBytes(url, secret, data, timeout=30, retries=999999, expectResponse=False): cnt = 0 while True: cnt += 1 try: response = requests.post(url, data=data, headers={"secret": secret}, timeout=timeout) response.raise_for_status() if expectResponse: return response.json() else: return except Exception as error: if cnt > retries: raise error else: logMsg("Failed postBytes %s will retry soon" % url, error) time.sleep(timeout // 4 + random.random() * 20)
def getMatchHistory(self, pool, runId): if not self.loadedMatchHistory: self.loadedMatchHistory = True try: con = pool.getconn() cursor = con.cursor() cursor.execute( "SELECT player1, player2, result, ratingChange, creation from league_matches where run = %s", (runId, )) rows = cursor.fetchall() for row in rows: self.matchHistory.append((row[0], row[1], row[2], row[3], int(row[4].timestamp() * 1000))) finally: if cursor: cursor.close() pool.putconn(con) logMsg("Loaded a history of %i matches for run %s" % (len(self.matchHistory), runId)) self.matchHistory.sort(key=lambda x: x[4]) self.recalcPlayerStats() return self.matchHistory
def test_ExampleGames(self): """ provided examples should play out as expected """ prntVerbose = ('-v' in sys.argv) or ('--verbose' in sys.argv) setLoggingEnabled(prntVerbose) def playGameByMoves(moves): game = self.subject if prntVerbose: self.printGameWithTensor(game) for move in moves: if game.hasEnded(): break game = game.playMove(move) if prntVerbose: self.printGameWithTensor(game) return game examples = self.getExampleGameSequences() logMsg("\nPlaying", len(examples), "examples") for idx, (moves, expectedTurns, expectedWinner) in enumerate(examples): logMsg("Playing example game", idx) resultState = playGameByMoves(moves) self.assertEqual(resultState.getTurn(), expectedTurns) self.assertTrue(resultState.hasEnded()) self.assertEqual(resultState.getWinnerNumber(), expectedWinner)
def __init__(self, initialState, policy, policyIterator, policyUpdater, isFrameTimeTest = False): self.initialState = initialState self.policy = policy self.policyIterator = policyIterator self.policyUpdater = policyUpdater self.workQueue = [] self.resultsQueue = [] self.initialPolicyID = self.policy.getUUID() self.isFrameTimeTest = isFrameTimeTest self.command = sys.argv[sys.argv.index("--command")+1].replace("https", "http").replace(":8042", "") self.command += ":4242" # can be used in case the eval server is somewhere else. That is the case in frametime evaluation. if "--evalserver" in sys.argv: self.command = sys.argv[sys.argv.index("--evalserver")+1] self.workerName = "unknown" if "--eval" in sys.argv: self.workerName = sys.argv[sys.argv.index("--eval")+1] logMsg("Started evaluation worker, talking to eval server on %s" % self.command) self.iterateTimes = [1] self.lastIterationCompleted = time.monotonic() self.printNoWork = True
def mlConfigBasedMain(configPath): setLoggingEnabled(True) registerClasses() logMsg("Running", *sys.argv) config = mlconfig.load(configPath) return config
def __init__(self, explorationPlyCount, minProp=-1): """ @paramm explorationPlyCount: Until which ply (turn) to explore randomly """ logMsg("Creating TemperatureMoveDecider(explorationPlyCount=%i)" % explorationPlyCount) self.explorationPlyCount = explorationPlyCount self.minProp = minProp
def on_get(self, req, resp, runId): self.lastDataRequest = time.monotonic() if not runId in self.cached: self.cached[runId] = self.queryPlayerList(runId) logMsg("players_proxy responding with %i players" % len(self.cached[runId])) resp.media = self.cached[runId] resp.status = falcon.HTTP_200
def reportEvaluationNumber(self): drep = dict() drep["evals"] = self.numPositionEvalsRequested drep["iteration"] = len(self.seenNetworks) - 1 postJson(self.commandHost + "/api/evalscnt/" + self.run, self.secret, drep) logMsg("Iteration %i used %i evaluation requests!" % (drep["evals"], drep["iteration"])) self.numPositionEvalsRequested = 0
def playBatch(self): """ playBatch should play one move on every open game. Since the number of expansions is dynamic here, this means playBatch should play moves until len(self.iterators) number of moves have been tracked, even if they happen to be played on the same game. """ moveTimeNs = 0 movesPlayed = self.prevMoves iterationsDone = 0 if not self.playedBatch: self.playedBatch = True iterationsDone += self.stepIteratorsOnce() while movesPlayed < len(self.iterators): self.initSelfplay(self.runId) moveThinkStart = time.monotonic_ns() mustMoves = self.getMustMove() moveTimeNs += time.monotonic_ns() - moveThinkStart for idx, mustMove in enumerate(mustMoves): if mustMove: # the player is out of time, or wants to make a move # -> play a move here for that index moveTimeNs += self.playMove(idx) movesPlayed += 1 iterStartTime = time.monotonic_ns() iterationsDone += self.stepIteratorsOnce() moveTimeNs += time.monotonic_ns() - iterStartTime #self.debugPrintState(moveTimeNs) numMovesInBatch = (movesPlayed - self.prevMoves) avgIterationsPerMove = iterationsDone / numMovesInBatch moveTimeNs /= numMovesInBatch self.prevMoves = movesPlayed - len(self.iterators) moveAvgMs = moveTimeNs / 1000000.0 logMsg( "played a batch of %i moves with %.2f avg ms per move and %i avg nodes per move" % (numMovesInBatch, moveAvgMs, avgIterationsPerMove)) return moveAvgMs, avgIterationsPerMove, numMovesInBatch
def parseMove(self, gameState, moveStr): result = int(moveStr) - 1 if result in gameState.getLegalMoves(): return result else: logMsg("That move is illegal") return -1 return -1
def playOpenGames(self): # play a batch until all positions in it are not known in the cache. If a game ends, report the game and replace it with a new one. # once all positions in a batch need an evaluation, request it addedGames = False for gidx in range(len(self.activeGames)): agame, ahistory = self.activeGames[gidx] while True: self.activeGames[gidx] = [agame, ahistory] if agame in self.pendingGames: # cannot continue this game at this time, it is already pending for an evaluation. break elif agame.hasEnded(): self.finalizeGame(agame, ahistory) if self.initialState in self.cache or self.initialState in self.pendingGames: retryPoints = self.pickRetryPoints(ahistory, agame) self.retryTurns += list( map(lambda x: x[0].getTurn(), retryPoints)) if len(retryPoints) > 0: self.foundRetry += 1 agame, ahistory = retryPoints[0] for anotherPoint in retryPoints[1:]: self.activeGames.append(anotherPoint) addedGames = True else: self.foundNoRetry += 1 #logMsg("No retry points were generated!") agame = self.initialState ahistory = [] else: logMsg( "Initial state not evaluated in current iteration, starting a new game from the start!" ) agame = self.initialState ahistory = [] elif agame in self.cache: iteratedPolicy = self.cache[agame] ahistory.append([agame, iteratedPolicy]) moveToPlay = self.moveDecider.decideMove( agame, iteratedPolicy[0], iteratedPolicy[1]) self.playedMoves.add((agame, moveToPlay)) agame = agame.playMove(moveToPlay) else: self.requestEvals.add(agame) break if addedGames: logMsg("There are now %i active games!" % len(self.activeGames)) self.requestEvaluations()
def checkForNewIteration(self, evalResults): for uuid in evalResults: results, networkSeen, workerName = evalResults[uuid] if networkSeen is not None and not (networkSeen in self.seenNetworks): self.seenNetworks.add(networkSeen) self.currentNetwork = networkSeen self.cache = dict() logMsg("A new iteration has begun, cleared the cache!") self.reportEvaluationNumber()
def loadNewestNetwork(self, policy): networkList = self.getNetworkList() if len(networkList) > 0: networkList.sort(key=lambda x: x["creation"], reverse=True) logMsg("Continue training of an existing network", networkList[0]) networkId = networkList[0]["id"] networkData = self.downloadNetwork(networkId) policy.load(networkData) logMsg("Network %s loaded" % policy.getUUID())
def __init__(self, trainEpochs, state, policyIterator = None, moveDecider = None, batchSize = None, datasetFile = None, initialGameState = None): logMsg("Initialized SingleProcessUpdater trainEpochs=%i, state=%s" % (trainEpochs, state)) self.trainEpochs = trainEpochs self.state = state self.statePath = os.path.join(self.state, "policy.npy") self.loadedPolicyBytes = None self.policyIterator = policyIterator self.moveDecider = moveDecider self.batchSize = batchSize self.initialGameState = initialGameState self.datasetFile = datasetFile self.loadState()
def test_hashProperties(self): """ When playing random games there should be less than 20% hash collisions and at most 16 states that share a single hash in the generated states. """ prntVerbose = ('-v' in sys.argv) or ('--verbose' in sys.argv) setLoggingEnabled(prntVerbose) numTestGames = 250 states = map(lambda x: self.playRandomGame(x), range(numTestGames)) statesByHash = dict() uniqueStates = 0 oCnt = 0 allStates = [] worstLen = 0 worstHashValue = 0 for ss in states: for s in ss: allStates.append(s) oCnt += 1 h = hash(s) if not h in statesByHash: statesByHash[h] = [s] uniqueStates += 1 else: isKnownState = len( list(filter(lambda x: x == s, statesByHash[h]))) > 0 if not isKnownState: statesByHash[h].append(s) if len(statesByHash[h]) > worstLen: worstLen = len(statesByHash[h]) worstHashValue = h uniqueStates += 1 for aIdx in range(len(allStates)): for bIdx in range(aIdx + 1, len(allStates)): a = allStates[aIdx] b = allStates[bIdx] if a == b: self.assertEqual(hash(a), hash(b), "Equality must imply equal hash values") uniqueHashes = len(statesByHash) dupes = uniqueStates - uniqueHashes result = dupes / float(uniqueStates) logMsg("\nFound ", uniqueHashes, "unique hashs for", uniqueStates, "unique states. Overall ", oCnt, "moves played! Worst hash has", worstLen, "collisions, it is the hash number", worstHashValue) self.assertTrue(uniqueHashes <= uniqueStates) self.assertTrue(result < 0.2) self.assertTrue(worstLen < 17)
def pushResults(self): logMsg("Started poll results thread") while True: while len(self.resultsQueue) == 0: time.sleep(0.2) nextResult = self.resultsQueue[0] del self.resultsQueue[0] resultId = nextResult["id"] resultData = encodeToBson(nextResult["data"]) postBytes(self.command + "/checkin/" + resultId, "", resultData)
def receiveGameEvals(self): evalResults = dict() while len(evalResults) == 0: evalResults = self.evalAccess.pollEvaluationResults() self.checkForNewIteration(evalResults) self.checkForRejectEvals(evalResults) for uuid in evalResults: if uuid in self.pendingEvals: evalGames = self.pendingEvals[uuid] del self.pendingEvals[uuid] self.addResultsToCache(evalGames, evalResults[uuid]) else: logMsg("Received evaluation of unknown UUID!", uuid)
def prepareWindow(self, downloader, currentIteration): logMsg("Need to prepare a window for iteration", currentIteration) windowSize = currentIteration * self.nextIterationStatesCount if windowSize > self.maxSize: windowSize = self.maxSize printed = False while self.countAvailableForIteration( downloader, currentIteration) < self.nextIterationStatesCount: if not printed: printed = True logMsg("Waiting for more data to train next network!") time.sleep(0.5) logMsg("Have enough data to train next network now!") newPolicyStates = self.getNextStatesForIteration( downloader, currentIteration) olderPolicyStates = self.getStatesBeforeIteration( downloader, currentIteration, self.maxSize - self.nextIterationStatesCount) trainingWindow = newPolicyStates + olderPolicyStates logMsg( "Using a window of %i states with %i states representing the new policy" % (len(trainingWindow), len(newPolicyStates))) return trainingWindow