def StochasticGradientDescent(trainingSet, featureExtractor, lossFunction=None): # HYPERPARAMETERS: step size (eta), number of iterations (T) weights = collections.defaultdict(float) T = 20 for t in range(T): eta = float(1/float(math.sqrt(t+1))) for example in trainingSet: features = featureExtractor(example[0]) loss = 1 - (example[1] * dotProduct(weights, features)) for feature in features: weights[feature] -= eta*(-features[feature]*example[1]) if (loss >= 1) else 0 return weights
def loadGameData(self, filePath): numWeights = len(self.weights) ret = [] with open(filePath, 'r') as f: data = f.readlines() # cleanup data = [i.rstrip(' \n') for i in data] data = [i.rstrip('\n') for i in data] for i in range(self.numberOfGames): newGame = SingleGame() counter = 0 while data[counter] != 'END': newGame.movesList.append(data[counter]) counter += 1 try: featureValues = [ float(i) for i in data[counter].split(' ') ] except: print([i for i in data[counter].split(' ')]) sys.exit(0) if (counter % 4 == 3): #player 2's move, so features need to be interchanged t = len(featureValues) featureValues = featureValues[t / 2:] + featureValues[:t / 2] newGame.featureValuesList.append(featureValues) newGame.stateValues.append( sigmoid(dotProduct(featureValues, self.weights))) if (data[counter + 1] == 'END'): #Victory & loss score newGame.stateValues[ -1] = 1.0 #+ 0.01*newGame.featureValuesList[ -1][params.features.index("OppRingsCount")] newGame.stateValues[-2] = 0.0 try: assert (len( newGame.featureValuesList[-1]) == numWeights) except AssertionError: print(len(newGame.featureValuesList[-1])) print(numWeights) sys.exit(0) counter += 1 ret.append(newGame) return ret
def MinibatchGradientDescent(trainingSet, featureExtractor, lossFunction=None): # HYPERPARAMETERS: step size (eta), number of iterations (T), number of batches (numBatches) weights = collections.defaultdict(float) T = 20 numBatches = 50 for t in range(T): eta = float(1/float(math.sqrt(t+1))) batch = random.sample(trainingSet, numBatches) features = featureExtractor(batch) #TODO: support batch support in your feat. extractor loss = sum(example * dotProduct(weights, features) for example in batch) for feature in features: weights[feature] -= eta*(-features[feature]*example[1]) if (loss >= 1) else 0 return weights
def train(gamePlayData, lamda=0.5, numEpochs=10, nstep=1, log_step=10, selfPlayer=0, learning_rate=0.01, rms_error_step=1): writer = SummaryWriter(log_dir=get_log_directory(params.comment)) weights = [i for i in gamePlayData.weights] print(len(weights)) for epoch in range(numEpochs): print("EPOCH " + str(epoch) + "/" + str(numEpochs) + "\n\n") log_rms_error(epoch, gamePlayData, weights, selfPlayer, writer) for j in range(len(gamePlayData.gamesList)): game = gamePlayData.gamesList[j] prevVal = 0 for i in range(len(game.stateValues) - 1, -1, -1): # In reverse order stateVal = game.stateValues[i] if i % 2 == selfPlayer: if (i == len(game.stateValues) - 1 or i == len(game.stateValues) - 2): # terminal state (win/loss) nextVal = stateVal stateVal = sigmoid( dotProduct(game.featureValuesList[i], weights)) else: nextVal = (1 - lamda) * stateVal + lamda * (prevVal) prevVal = nextVal # update feature weights tdUpdate(nextVal, stateVal, weights, game.featureValuesList[i], learning_rate) # print("Press") # sys.stdin.read(1) if (j % 10 == 0 and j != 0): # log feature weights print("Episode " + str(j) + "/" + str(len(gamePlayData.gamesList))) # print(weights) log_weights(epoch * len(gamePlayData.gamesList) + j, weights, writer) print("Finished ")
def log_rms_error(epoch, gamePlayData, weights, selfPlayer, writer): # not sure if this is a useful metric errorinWin, errorinLoss = 0, 0 for game in gamePlayData.gamesList: isWin = False if (isWinner(game, selfPlayer)): target = game.stateValues[-1] isWin = True else: target = game.stateValues[-2] isWin = False for featureValues in game.featureValuesList: if (isWin): errorinWin += ((target - dotProduct(featureValues, weights))**2) else: errorinLoss += ((target - dotProduct(featureValues, weights))**2) writer.add_scalar("rms_error/win", errorinWin, epoch) writer.add_scalar("rms_error/loss", errorinLoss, epoch) writer.add_scalar("rms_error/total", errorinLoss + errorinWin, epoch)
def StochasticGradientDescent(trainingSet, featureExtractor, lossFunction=None): # HYPERPARAMETERS: step size (eta), number of iterations (T) weights = collections.defaultdict(float) T = 20 for t in range(T): eta = float(1 / float(math.sqrt(t + 1))) for example in trainingSet: features = featureExtractor(example[0]) loss = 1 - (example[1] * dotProduct(weights, features)) for feature in features: weights[feature] -= eta * (-features[feature] * example[1]) if (loss >= 1) else 0 return weights
def MinibatchGradientDescent(trainingSet, featureExtractor, lossFunction=None): # HYPERPARAMETERS: step size (eta), number of iterations (T), number of batches (numBatches) weights = collections.defaultdict(float) T = 20 numBatches = 50 for t in range(T): eta = float(1 / float(math.sqrt(t + 1))) batch = random.sample(trainingSet, numBatches) features = featureExtractor( batch) #TODO: support batch support in your feat. extractor loss = sum(example * dotProduct(weights, features) for example in batch) for feature in features: weights[feature] -= eta * (-features[feature] * example[1]) if ( loss >= 1) else 0 return weights