def main(_):
    # total_x, total_y, x_dim, y_dim
    ckpt_path = os.path.join(FLAGS.ckpt_dir, FLAGS.name)

    batch = model.Batch(total_x, total_y, 128)

    with tf.Session() as sess:
        basic_model = model.BasicModel(x_dim, y_dim, FLAGS.learning_rate, FLAGS.beta1)
        writer = tf.summary.FileWriter(os.path.join(FLAGS.summary_dir, FLAGS.name), sess.graph)

        sess.run(tf.global_variables_initializer())
        for i in range(FLAGS.epoch):
            for n in range(batch.iter_per_epoch):
                batch_x, batch_y = batch()
                basic_model.train(sess, batch_x, batch_y)

                summary = basic_model.inference(sess, basic_model.summary, batch_x, batch_y)
                writer.add_summary(summary)

            if (i + 1) % FLAGS.ckpt_interval == 0:
                basic_model.dump(sess, ckpt_path)
示例#2
0
    def __init__(self, teams, limit=100, numProcesses=1, format='1v1'):
        self.teams = teams
        self.limit = limit
        self.numProcesses = numProcesses
        self.format = format

        def gamma(iter):
            return 0.3

        self.valueModel = model.BasicModel()

        self.mcDataset = [{
            'gamma': gamma,
            'getExpValue': self.valueModel.getExpValue,
            'addReward': self.valueModel.addReward,
        }, {
            'gamma': gamma,
            'getExpValue': self.valueModel.getExpValue,
            'addReward': self.valueModel.addReward,
        }]

        self.probCutoff = 0.03
示例#3
0
            loss = criterion(y_pred.transpose(1, 2), targets)

            hidden = hidden.detach()
            state_c = state_c.detach()

            loss.backward()
            optimizer.step()

            print({'epoch': epoch, 'batch': batch, 'loss': loss.item()})

            batch += 1
            i += bptt


if args.model == 'BASIC':
    model = model.BasicModel(ntokens, args.emsize, args.nhid, args.nlayers,
                             args.dropout)
    basic_train()

else:
    model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid,
                           args.nlayers, args.dropout, args.dropouth,
                           args.dropouti, args.dropoute, args.wdrop, args.tied)

# -------------------------------------------------------------------------------------------------------------------- #

if args.resume:
    print('Resuming model ...')
    model_load(args.resume)
    optimizer.param_groups[0]['lr'] = args.lr
    model.dropouti, model.dropouth, model.dropout, model.dropoute = args.dropouti, args.dropouth, args.dropout, args.dropoute
    if args.wdrop:
示例#4
0
async def humanGame(teams,
                    limit=100,
                    numProcesses=1,
                    format='1v1',
                    valueModel=None,
                    file=sys.stdout,
                    initMoves=([], [])):
    try:

        mainPs = await getPSProcess()

        searchPs = [await getPSProcess() for i in range(numProcesses)]

        seed = [
            random.random() * 0x10000,
            random.random() * 0x10000,
            random.random() * 0x10000,
            random.random() * 0x10000,
        ]

        game = Game(mainPs,
                    format=format,
                    teams=teams,
                    seed=seed,
                    names=['meat sack', 'Your Robot Overlords'],
                    verbose=True,
                    file=file)

        #holds the montecarlo data
        #each entry goes to one process
        #this will get really big with n=3 after 20ish turns if you
        #don't purge old data
        mcData = []

        def gamma(iter):
            return 0.3

        if not valueModel:
            valueModel = model.BasicModel()

        mcDataset = [{
            'gamma': gamma,
            'getExpValue': valueModel.getExpValue,
            'addReward': valueModel.addReward,
        }, {
            'gamma': gamma,
            'getExpValue': valueModel.getExpValue,
            'addReward': valueModel.addReward,
        }]

        #moves with probabilites below this are not considered
        probCutoff = 0.03

        await game.startGame()

        #this needs to be a coroutine so we can cancel it when the game ends
        #which due to concurrency issues might not be until we get into the MCTS loop
        async def play():
            i = 0
            #actions taken so far by in the actual game
            p1Actions = []
            p2Actions = []
            #we reassign this later, so we have to declare it nonlocal
            nonlocal mcDataset
            while True:
                i += 1
                print('starting turn', i, file=sys.stderr)

                #don't search if we aren't going to use the results
                if len(initMoves[0]) == 0 or len(initMoves[1]) == 0:
                    #I suspect that averaging two runs together will
                    #give us more improvement than running for twice as long
                    #and it should run faster than a single long search due to parallelism

                    searches = []
                    for j in range(numProcesses):
                        search = mc.mcSearchRM(searchPs[j],
                                               format,
                                               teams,
                                               limit=limit,
                                               seed=seed,
                                               p1InitActions=p1Actions,
                                               p2InitActions=p2Actions,
                                               mcData=mcDataset,
                                               pid=j,
                                               initExpVal=0,
                                               probScaling=2,
                                               regScaling=1.5)
                        searches.append(search)

                    await asyncio.gather(*searches)

                    #combine the processes results together, purge unused information
                    #this assumes that any state that isn't seen in two consecutive iterations isn't worth keeping
                    #it also takes a little bit of processing but that should be okay
                    print('combining', file=sys.stderr)
                    mcDataset = mc.combineRMData([mcDataset], valueModel)[0]

                #this assumes that both player1 and player2 get requests each turn
                #which I think is accurate, but most formats will give one player a waiting request
                #this will lock up if a player causes an error, so don't do that

                async def playTurn(queue, myMcData, actionList, cmdHeader,
                                   initMoves):

                    request = await queue.get()

                    if len(initMoves) > 0:
                        action = initMoves[0]
                        del initMoves[0]
                        print('|c|' + cmdHeader + '|Turn ' + str(i) +
                              ' pre-set action:',
                              action,
                              file=file)
                    else:
                        #figure out what kind of action we need
                        state = request[1]['stateHash']
                        actions = moves.getMoves(format, request[1])

                        #the mcdatasets are all combined, so we can just look at the first
                        data = myMcData[0]
                        #probs = mc.getProbsExp3(data, state, actions)
                        probs = mc.getProbsRM(data, state, actions)
                        #remove low probability moves, likely just noise
                        #this can remove every action, but if that's the case then it's doesn't really matter
                        #as all the probabilites are low
                        normProbs = np.array(
                            [p if p > probCutoff else 0 for p in probs])
                        normProbs = normProbs / np.sum(normProbs)

                        action = np.random.choice(actions, p=normProbs)

                    actionList.append(action)
                    await game.cmdQueue.put(cmdHeader + action)

                async def userTurn(queue, actionList, cmdHeader, initMoves):

                    request = await queue.get()

                    if len(initMoves) > 0:
                        action = initMoves[0]
                        del initMoves[0]
                        print('|c|' + cmdHeader + '|Turn ' + str(i) +
                              ' pre-set action:',
                              action,
                              file=file)
                    else:
                        #figure out what kind of action we need
                        state = request[1]['stateHash']
                        actions = moves.getMoves(format, request[1])

                        actionTexts = []
                        for j in range(len(actions)):
                            action = actions[j].split(',')
                            actionText = []
                            for k in range(len(action)):
                                a = action[k]
                                a = a.strip()
                                if 'pass' in a:
                                    actionText.append('pass')
                                elif 'move' in a:
                                    parts = a.split(' ')
                                    moveNum = int(parts[1])
                                    if len(parts) < 3:
                                        targetNum = 0
                                    else:
                                        targetNum = int(parts[2])
                                    move = request[1]['active'][k]['moves'][
                                        moveNum - 1]['move']
                                    if targetNum != 0:
                                        actionText.append(move +
                                                          ' into slot ' +
                                                          str(targetNum))
                                    else:
                                        actionText.append(move)
                                elif 'team' in a:
                                    actionText.append(a)
                                elif 'switch' in a:
                                    actionText.append(a)
                                else:
                                    actionText.append('unknown action: ' + a)
                            actionString = ','.join(actionText)
                            actionTexts.append(actionString)

                        #ask the user which action to take
                        print('Legal actions:')
                        for j in range(len(actions)):
                            print(j, actionTexts[j], '(' + actions[j] + ')')
                        #humans are dumb and make mistakes
                        while True:
                            try:
                                actionIndex = int(input('Your action:'))
                                if actionIndex >= 0 and actionIndex < len(
                                        actions):
                                    action = actions[actionIndex]
                                    break
                            except ValueException:
                                pass
                            print('try again')

                        actionList.append(action)

                        await game.cmdQueue.put(cmdHeader + action)

                await userTurn(game.p1Queue, p1Actions, '>p1', initMoves[0])
                await playTurn(game.p2Queue, [mcDataset[1]], p2Actions, '>p2',
                               initMoves[1])

        gameTask = asyncio.ensure_future(play())
        winner = await game.winner
        gameTask.cancel()
        print('winner:', winner, file=sys.stderr)

    except Exception as e:
        print(e, file=sys.stderr)

    finally:
        mainPs.terminate()
        for ps in searchPs:
            ps.terminate()