def openPackage(self, idName): try: data = readFileUnderPath(os.path.join(self.storageDirectory, idName)) return decodeFromBson(data) except Exception as error: logMsg("Could not open downloaded package" + idName + "!", error) return []
def update(self, policy): if time.monotonic() - self.lastNetworkCheck > self.checkInterval: self.downloader.poll() self.lastNetworkCheck = time.monotonic() npath = os.path.join(self.storage, "networks.json") if os.path.exists(npath): try: networks = openJsonFile(npath) if len(networks) > 0: networks.sort(key=lambda n: n["creation"]) bestNetwork = networks[-1] if bestNetwork["id"] != policy.getUUID(): logMsg( "New network found created at UTC", datetime.utcfromtimestamp( bestNetwork["creation"] / 1000).strftime('%Y-%m-%d %H:%M:%S')) policy.load( decodeFromBson( readFileUnderPath( os.path.join(self.storage, bestNetwork["id"])))) logMsg("Policy replaced, now working with policy ", policy.getUUID()) except Exception as error: logMsg("Failed to check for a new network", error) return policy
def reportsApiTest3(): policy = PytorchPolicy(32, 1, 32, 3, 64, Connect4GameState(7, 6, 4), "cuda:0", "torch.optim.adamw.AdamW", { "lr": 0.001, "weight_decay": 0.0001 }) encoded = encodeToBson(policy.store()) print("Encoded network into " + str(len(encoded))) response = requests.post( url= "http://127.0.0.1:8042/api/networks/c48b01d7-18e8-4df0-9c8a-d9886473bb49/" + policy.getUUID(), data=encoded) response.raise_for_status() response = requests.get( url="http://127.0.0.1:8042/api/networks/download/" + policy.getUUID(), stream=True) response.raise_for_status() redownloaded = decodeFromBson(response.raw.data) prevId = policy.getUUID() policy.reset() policy.load(redownloaded) print(policy.getUUID(), prevId)
def test_dicts(self): root = dict() root["A"] = 1 root["B"] = [1, 2, 3] transfered = decodeFromBson(encodeToBson(root)) self.assertEqual(transfered, root)
def test_network_posting(self): run = self.postARun() policy = PytorchPolicy(32, 1, 32, 3, 64, Connect4GameState(7, 6, 4), "cuda:0", "torch.optim.adamw.AdamW", { "lr": 0.001, "weight_decay": 0.0001 }) postBytes(urlBase + "api/networks/" + run["id"] + "/" + policy.getUUID(), config["secret"], encodeToBson(policy.store()), retries=1) networkList = requestJson(urlBase + "api/networks/list/" + run["id"], config["secret"], retries=1) self.assertEqual(len(networkList), 1) self.assertEqual(networkList[0]["id"], policy.getUUID()) redownloaded = decodeFromBson( requestBytes(urlBase + "api/networks/download/" + policy.getUUID(), config["secret"], retries=1)) game = Connect4GameState(7, 6, 4) game = game.playMove(np.random.randint(7)) game = game.playMove(np.random.randint(7)) policy.isRandom = False forwardResultPre = policy.forward([game])[0] preUUID = policy.getUUID() policy.reset() policy.isRandom = False forwardResultReset = policy.forward([game])[0] policy.load(redownloaded) policy.isRandom = False forwardResultPost = policy.forward([game])[0] self.assertEqual(preUUID, policy.getUUID()) self.assertTrue(np.all(forwardResultPre[0] == forwardResultPost[0])) self.assertTrue(np.all(forwardResultPre[1] == forwardResultPost[1])) self.assertFalse(np.all(forwardResultPre[0] == forwardResultReset[0])) self.assertFalse(np.all(forwardResultPre[1] == forwardResultReset[1]))
def loadStates(run, iteration): """ Load the states of the given run that are associated with the given iteration, cut away as much of the state package as possible however. @returns: (stateObject, results, final)[] """ result = [] startTime = time.monotonic() try: con = pool.getconn() cursor = con.cursor() cursor.execute( "select id from states where run = %s and iteration = %s", (run, iteration)) rows = cursor.fetchall() for row in rows: fpath = os.path.join(config["dataPath"], getUUIDPath(row[0])) fbytes = readFileUnderPath(fpath) statesPackage = decodeFromBson(fbytes) for record in statesPackage: protoState = constructor_for_class_name( record["gameCtor"])(**record["gameParams"]) gameState = protoState.load(record["state"]) final = False if "final" in record and record["final"]: final = True niters = 0 if "numIterations" in record: niters = record["numIterations"] result.append( (gameState, record["knownResults"], final, niters)) finally: if cursor: cursor.close() pool.putconn(con) procTime = time.monotonic() - startTime logMsg("Loaded %i states for run %s iteration %i in %.2fs" % (len(result), run, iteration, procTime)) return result
def update(self, policy): if time.monotonic() - self.lastNetworkCheck > self.checkInterval: self.lastNetworkCheck = time.monotonic() try: response = requests.get(url=self.commandHost + "/api/networks/list/" + self.run, headers={"secret": self.secret}) response.raise_for_status() list = response.json() list.sort(key=lambda n: n["creation"]) if len(list) > 0: bestNetwork = list[-1] if bestNetwork["id"] != policy.getUUID(): logMsg( "New network found created at UTC", datetime.utcfromtimestamp( bestNetwork["creation"] / 1000).strftime('%Y-%m-%d %H:%M:%S'), "\nDownloading...") response = requests.get( url=self.commandHost + "/api/networks/download/" + bestNetwork["id"], headers={"secret": self.secret}, stream=True) response.raise_for_status() logMsg("Download completed!") policy.load(decodeFromBson(response.raw.data)) logMsg("Policy replaced, now working with policy ", policy.getUUID()) except Exception as error: logMsg( "Could not query for new network this time, will try again soon!", error) return policy
def reportsApiTest(): report = makeReport() # print(report) # enc = encodeToBson(report) # print("\nEncoded into %i bytes: " % len(enc), enc) # print("\nDecoded:\n", decodeFromBson(enc)) reps = [makeReport() for _ in range(1000)] print(reps[0]) encStart = time.time() repEnc = encodeToBson(reps) print("Encoding time taken:", time.time() - encStart) print("Encoded %i reports into %i kbyte" % (len(reps), 1 + (len(repEnc) / 1000))) reportId = requests.post(url="http://127.0.0.1:8042/api/reports/", data=repEnc).json() print("Posted report of %i bytes and got response: %s" % (len(repEnc), reportId)) response = requests.get(url="http://127.0.0.1:8042/api/reports/" + reportId, stream=True) reponse.raise_for_status() redownloaded = response.raw.data print("Get report gave us %i bytes" % len(redownloaded)) decStart = time.time() repDec = decodeFromBson(redownloaded) print("Decode time taken:", time.time() - decStart) print("Decoded %i" % len(repDec)) print(repDec[0])
def test_deepNumpyDicts(self): root = dict() root["A"] = 1 root["B"] = dict() root["B"]["FOO"] = [1, 2, 3] root["C"] = dict() root["C"]["data"] = [np.array([3.1415, -3.1415], dtype=np.float32), 42] root["D"] = [dict(), dict()] root["D"][0]["A"] = 42 root["D"][1]["B"] = "hello world" enc = encodeToBson(root) transfered = decodeFromBson(enc) self.assertEqual(transfered["A"], root["A"]) self.assertEqual(transfered["B"], root["B"]) self.assertTrue("C" in transfered and "data" in transfered["C"] and isinstance(transfered["C"]["data"], list)) self.assertTrue( np.sum(transfered["C"]["data"][0] == root["C"]["data"][0]) == 2) self.assertEqual(transfered["C"]["data"][1], root["C"]["data"][1]) self.assertTrue(transfered["D"], root["D"])
def pollWork(self): logMsg("Started work poll thread") lastSuccess = time.monotonic() while True: while (len(self.workQueue) == 1 and (time.monotonic() - max(lastSuccess, self.lastIterationCompleted)) > np.mean(self.iterateTimes) * 0.8) or len(self.workQueue) > 1: time.sleep(0.05) #print("wqueue length", len(self.workQueue), (time.monotonic() - max(lastSuccess, self.lastIterationCompleted)) > np.mean(self.iterateTimes) * 0.8) workList = requestJson(self.command + "/queue", "") if len(workList) > 0: pickWork = random.choice(workList[:5]) try: myWork = requestBytes(self.command + "/checkout/" + pickWork, "", retries=0) except: # somebody else took the work before us logMsg("Failed to checkout a task %s" % pickWork) time.sleep(0.3 + random.random() * 0.2) continue # decodedWork should be a list of game.store(), so load them via game.load() decodedWork = decodeFromBson(myWork) games = [self.initialState.load(w) for w in decodedWork] self.printNoWork = True logMsg("Got work: %i game states" % len(games)) dwork = dict() dwork["work"] = games dwork["id"] = pickWork self.workQueue.append(dwork) lastSuccess = time.monotonic() else: if self.printNoWork: logMsg("No work found on the server, will keep trying...") self.printNoWork = False time.sleep(0.5)
def on_post(self, req, resp, key, entity_id): """ reports are as files on disk. The database only contains the path of the file, relative to the dataPath that is configured in the config file. """ worker_name = key newId = str(uuid.uuid4()) binary = req.bounded_stream.read() decoded = decodeFromBson(binary) try: con = self.pool.getconn() cursor = con.cursor() packageSize = len(decoded) cursor.execute("select iterations, id from runs_info where id = %s", (entity_id, )) rows = cursor.fetchall() if cursor: cursor.close() if rows is None or len(rows) != 1: raise falcon.HTTPError(falcon.HTTP_400, "Unknown run id") iteration = int(rows[0][0]) networkUUID = decoded[-1]["policyUUID"] cursor = con.cursor() cursor.execute("select id from networks where id = %s", (networkUUID, )) rows = cursor.fetchall() if cursor: cursor.close() if len(rows) == 0: networkUUID = None if iteration > 0: print("Warning: Got a package with unknown network UUID and iteration above 0. If this happens a lot something might be wrong!") cursor = con.cursor() cursor.execute("insert into states (id, package_size, worker, iteration, network, run) VALUES (%s, %s, %s, %s, %s, %s)", (newId, packageSize, worker_name, iteration, networkUUID, entity_id)); storeFileUnderPath(os.path.join(self.config["dataPath"], getUUIDPath(newId)), binary) con.commit() finally: if cursor: cursor.close() con.rollback() self.pool.putconn(con) resp.media = newId resp.status = falcon.HTTP_200
def test_state_posting(self): run1 = self.postARun() run2 = self.postARun() states1 = [makeReport() for _ in range(123)] states2 = [makeReport() for _ in range(456)] report1Id = requests.post(url=urlBase + "api/state/test/" + run1["id"], data=encodeToBson(states1), headers={ "secret": config["secret"] }).json() report2Id = requests.post(url=urlBase + "api/state/test2/" + run2["id"], data=encodeToBson(states2), headers={ "secret": config["secret"] }).json() listRun1 = requestJson(urlBase + "api/state/list/" + run1["id"], config["secret"], retries=1) listRun2 = requestJson(urlBase + "api/state/list/" + run2["id"], config["secret"], retries=1) self.assertEqual(len(listRun1), 1) self.assertEqual(len(listRun2), 1) self.assertEqual(listRun1[0]["id"], report1Id) self.assertEqual(listRun2[0]["id"], report2Id) self.assertEqual(listRun1[0]["worker"], "test") self.assertEqual(listRun2[0]["worker"], "test2") self.assertEqual(listRun1[0]["packageSize"], len(states1)) self.assertEqual(listRun2[0]["packageSize"], len(states2)) states1Downloaded = decodeFromBson( requestBytes(urlBase + "api/state/download/" + report1Id, config["secret"], retries=1)) states2Downloaded = decodeFromBson( requestBytes(urlBase + "api/state/download/" + report2Id, config["secret"], retries=1)) self.assertEqual(len(states1), len(states1Downloaded)) self.assertEqual(len(states2), len(states2Downloaded)) def checkStatesEqual(statesPre, statesPost): for pre, post in zip(statesPre, statesPost): piPre = pre["policyIterated"] piPost = post["policyIterated"] stPre = pre["state"] stPost = post["state"] del pre["policyIterated"] del post["policyIterated"] del pre["state"] del post["state"] self.assertDictEqual(pre, post) self.assertTrue(np.all(piPre == piPost)) self.assertTrue(np.all(stPre == stPost)) checkStatesEqual(states1, states1Downloaded) checkStatesEqual(states2, states2Downloaded)
def __init__(self, datasetFile, initialGame, policy, windowSizeSplits, trainingRuns, workingDirectory, testSamples, validationSamples, batchSize, lrStart, lrPatience, featureProvider=None, featureNetwork=None): logMsg("Starting to initialize supervised training") self.featureProvider = featureProvider self.featureNetwork = featureNetwork if self.featureProvider is not None: logMsg("Using feature provider network!") if self.featureNetwork is not None: with open(self.featureNetwork, "rb") as f: networkData = decodeFromBson(f.read()) uuid, modelDict, netConfig = unpackTorchNetwork( networkData) self.featureProvider.load_state_dict(modelDict) logMsg("Loaded feature network %s" % uuid) if torch.cuda.is_available(): gpuCount = torch.cuda.device_count() device = "cuda" if "--windex" in sys.argv and gpuCount > 1: windex = int(sys.argv[sys.argv.index("--windex") + 1]) gpuIndex = windex % gpuCount device = "cuda:" + str(gpuIndex) logMsg( "Found multiple gpus with set windex, extended cuda device to %s" % device) self.device = torch.device(device) logMsg("Feature network will use the gpu!", self.device) else: logMsg("No GPU is available, falling back to cpu!") self.device = torch.device("cpu") self.featureProvider = self.featureProvider.to(self.device) self.featureProvider.train(False) self.examples = loadExamples2(initialGame, datasetFile) random.seed(42) random.shuffle(self.examples) sawFutures = False self.records = [] for lex in self.examples: record = dict() record["state"] = lex[0].store() moveOutput = np.zeros(lex[0].getMoveCount(), dtype=np.float32) for m in lex[2]: moveOutput[m] += 1 moveOutput /= np.sum(moveOutput) record["policyIterated"] = moveOutput assert lex[0].getPlayerCount( ) == 2, "The whole solved dataset thing is not really meant for more than 2 players right now. Would need to rethink the file format a bit" gresult = [] if lex[3] == 0: gresult = [0] elif lex[3] == -1: gresult = [(lex[0].getPlayerOnTurnNumber() + 1) % lex[0].getPlayerCount()] elif lex[3] == 1: gresult = [lex[0].getPlayerOnTurnNumber()] else: assert False, "Expected game result from database file to be in [-1, 0, 1]" record["knownResults"] = gresult if len(lex) > 4 and self.featureProvider is not None: if not sawFutures: sawFutures = True logMsg( "Dataset contains future positions to create features from!" ) record["winFeatures"] = lex[0] record["winFeatures+1"] = lex[-1][0] record["winFeatures+2"] = lex[-1][1] record["winFeatures+3"] = lex[-1][2] record["moveFeatures"] = lex[0] record["moveFeatures+1"] = lex[-1][0] record["moveFeatures+2"] = lex[-1][1] record["moveFeatures+3"] = lex[-1][2] self.records.append(record) if sawFutures: logMsg("Creating extra features!") bsize = 1024 for ix in range(0, len(self.records), bsize): batch = self.records[ix:(ix + bsize)] fillRecordForFeatures(self.featureProvider, batch, self.device) logMsg("Extra features created!") self.initialGame = initialGame self.policy = policy self.windowSizeSplits = windowSizeSplits self.trainingRuns = trainingRuns self.workingDirectory = workingDirectory self.testSamples = testSamples self.validationSamples = validationSamples self.trainingSamples = len( self.records) - self.testSamples - self.validationSamples self.batchSize = batchSize self.lrStart = lrStart self.lrPatience = lrPatience self.trainingResults = [] logMsg("Initialization completed")
def downloadNetwork(self, networkId): return decodeFromBson( requestBytes( self.commandHost + "/api/networks/download/" + networkId, self.secret))
def __init__(self, path): logMsg("Using FilePolicyUpdater!"); self.loadedUUID = None with open(path, "rb") as f: self.policyBytes = decodeFromBson(f.read())
def test_SimpleList(self): lst = [1, 2, 3] enc = encodeToBson(lst) dec = decodeFromBson(enc) self.assertEqual(lst, dec)
def test_numpy(self): data = np.array([1.23456789, 9.87654321]) transfered = decodeFromBson(encodeToBson(data)) self.assertEqual(len(data), len(transfered)) self.assertEqual(np.sum(data == transfered), len(data))
def test_number(self): number = 42 enc = encodeToBson(number) dec = decodeFromBson(enc) self.assertEqual(number, dec)
def test_numpy_tuples(self): data1 = (np.random.dirichlet([0.9] * 2), np.random.dirichlet([0.9] * 2)) data2 = decodeFromBson(encodeToBson(data1)) self.assertTrue(np.all(data1[0] == data2[0])) self.assertTrue(np.all(data1[1] == data2[1]))
def test_numpyList(self): lst = [np.array([1, 2, 3]), np.array([4, 5, 6])] transfered = decodeFromBson(encodeToBson(lst)) self.assertEqual(len(transfered), 2) self.assertEqual(transfered[0].tolist(), [1, 2, 3]) self.assertEqual(transfered[1].tolist(), [4, 5, 6])