Пример #1
0
    def __init__(self,seed,nS,nA,nB,params,hidden_dims=(64,64),activation=F.leaky_relu):
        """
        Network capable of processing any number of prior actions
        Num Categories: nA (check,fold,call,bet,raise)
        Num Betsizes: nB (various betsizes)
        """
        super().__init__()
        self.activation = activation
        # self.seed = torch.manual_seed(seed)
        self.nS = nS
        self.nA = nA
        self.nB = nB

        self.hand_emb = Embedder(5,64)
        self.action_emb = Embedder(6,63)
        self.combined_output = nA - 2 + nB
        self.helper_functions = NetworkFunctions(self.nA,self.nB)
        self.preprocess = PreProcessHistory(params)
        self.max_length = 10
        self.emb = 512
        n_heads = 8
        depth = 2
        self.positional_emb = Embedder(self.max_length,128)
        self.lstm = nn.LSTM(self.emb, 256)
        # self.transformer = CTransformer(self.emb,n_heads,depth,self.max_length,self.combined_output,max_pool=False)
        self.mapping = params['mapping']
        self.noise = GaussianNoise(is_relative_detach=True)
        self.fc1 = nn.Linear(128,hidden_dims[0])
        self.fc2 = nn.Linear(hidden_dims[0],hidden_dims[1])
        self.fc3 = nn.Linear(2560,self.combined_output)
Пример #2
0
 def __init__(self,
              seed,
              nO,
              nA,
              nB,
              params,
              hidden_dims=(64, 64),
              activation=F.leaky_relu):
     super().__init__()
     self.activation = activation
     self.nO = nO
     self.nA = nA
     self.nB = nB
     self.combined_output = nA - 2 + nB
     self.maxlen = params['maxlen']
     self.mapping = params['state_mapping']
     self.device = params['device']
     # self.emb = params['embedding_size']
     self.helper_functions = NetworkFunctions(self.nA, self.nB)
     self.process_input = PreProcessLayer(params)
     self.lstm = nn.LSTM(1280, 128)
     self.policy_out = nn.Linear(1280, self.combined_output)
     self.noise = GaussianNoise(self.device)
     emb = params['transformer_in']
     n_heads = 8
     depth = 2
     self.transformer = CTransformer(emb, n_heads, depth, self.maxlen,
                                     params['transformer_out'])
     self.dropout = nn.Dropout(0.5)
     self.value_output = nn.Linear(params['transformer_out'], 1)
     self.advantage_output = nn.Linear(params['transformer_out'],
                                       self.combined_output)
Пример #3
0
    def __init__(self,
                 seed,
                 nS,
                 nA,
                 nB,
                 params,
                 hidden_dims=(64, 64),
                 activation=F.leaky_relu):
        super().__init__()
        self.activation = activation
        self.nS = nS
        self.nA = nA
        self.nB = nB
        self.combined_output = nA - 2 + nB
        self.helper_functions = NetworkFunctions(self.nA, self.nB)
        self.maxlen = params['maxlen']
        self.process_input = PreProcessLayer(params)

        # self.seed = torch.manual_seed(seed)
        self.mapping = params['mapping']
        self.hand_emb = Embedder(5, 64)
        self.action_emb = Embedder(6, 64)
        self.betsize_emb = Embedder(self.nB, 64)
        self.noise = GaussianNoise()
        self.emb = 1248
        n_heads = 8
        depth = 2
        self.lstm = nn.LSTM(self.emb, 128)
        # self.transformer = CTransformer(emb,n_heads,depth,self.max_length,self.nA)

        self.fc1 = nn.Linear(528, hidden_dims[0])
        self.fc2 = nn.Linear(hidden_dims[0], hidden_dims[1])
        self.fc3 = nn.Linear(1280, self.combined_output)
        self.dropout = nn.Dropout(0.5)
Пример #4
0
class FlatBetsizeActor(nn.Module):
    def __init__(self,seed,nS,nA,nB,params,hidden_dims=(64,64),activation=F.leaky_relu):
        """
        Num Categories: nA (check,fold,call,bet,raise)
        Num Betsizes: nB (various betsizes)
        """
        super().__init__()
        self.activation = activation
        self.nS = nS
        self.nA = nA
        self.nB = nB
        self.combined_output = nA - 2 + nB
        self.helper_functions = NetworkFunctions(self.nA,self.nB)
        self.mapping = params['mapping']
        self.hand_emb = Embedder(5,64)
        self.action_emb = Embedder(6,64)
        self.betsize_emb = Embedder(self.nB,64)
        self.noise = GaussianNoise()
        self.fc1 = nn.Linear(129,hidden_dims[0])
        self.fc2 = nn.Linear(hidden_dims[0],hidden_dims[1])
        self.fc3 = nn.Linear(hidden_dims[1],self.combined_output)
        
    def forward(self,state,action_mask,betsize_mask):
        mask = combined_masks(action_mask,betsize_mask)
        x = state
        hand = x[:,self.mapping['state']['rank']].long()
        last_action = x[:,self.mapping['state']['previous_action']].long()
        previous_betsize = x[:,self.mapping['state']['previous_betsize']].float()
        if previous_betsize.dim() == 1:
            previous_betsize = previous_betsize.unsqueeze(1)
        hand = self.hand_emb(hand)
        last_action_emb = self.action_emb(last_action)
        # print('hand,last_action_emb,previous_betsize',hand.size(),last_action_emb.size(),previous_betsize.size())
        x = torch.cat([hand,last_action_emb,previous_betsize],dim=-1)
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        cateogry_logits = self.fc3(x)
        cateogry_logits = self.noise(cateogry_logits)
        action_soft = F.softmax(cateogry_logits,dim=-1)
        # print(action_soft.size(),mask.size())
        action_probs = norm_frequencies(action_soft,mask)
        # action_probs = action_probs * mask
        # action_probs /= torch.sum(action_probs)
        m = Categorical(action_probs)
        action = m.sample()

        action_category,betsize_category = self.helper_functions.unwrap_action(action,last_action)
        # print('state',state)
        # print('action_category,betsize_category',action_category,betsize_category)
        
        outputs = {
            'action':action,
            'action_category':action_category,
            'action_prob':m.log_prob(action),
            'action_probs':action_probs,
            'betsize':betsize_category
            }
        return outputs
Пример #5
0
 def __init__(self,seed,nS,nA,nB,params,hidden_dims=(64,64),activation=F.leaky_relu):
     """
     Num Categories: nA (check,fold,call,bet,raise)
     Num Betsizes: nB (various betsizes)
     """
     super().__init__()
     self.activation = activation
     self.nS = nS
     self.nA = nA
     self.nB = nB
     self.combined_output = nA - 2 + nB
     self.helper_functions = NetworkFunctions(self.nA,self.nB)
     self.mapping = params['mapping']
     self.hand_emb = Embedder(5,64)
     self.action_emb = Embedder(6,64)
     self.betsize_emb = Embedder(self.nB,64)
     self.noise = GaussianNoise()
     self.fc1 = nn.Linear(129,hidden_dims[0])
     self.fc2 = nn.Linear(hidden_dims[0],hidden_dims[1])
     self.fc3 = nn.Linear(hidden_dims[1],self.combined_output)
Пример #6
0
    def __init__(self,
                 seed,
                 nS,
                 nA,
                 nB,
                 params,
                 hidden_dims=(64, 64),
                 activation=F.leaky_relu):
        super().__init__()
        self.activation = activation
        self.nS = nS
        self.nA = nA
        self.nB = nB
        self.combined_output = nA - 2 + nB
        self.helper_functions = NetworkFunctions(self.nA, self.nB)
        self.maxlen = params['maxlen']
        self.device = params['device']
        self.epsilon = params['epsilon']
        self.epsilon_weights = params['epsilon_weights'].to(self.device)
        self.process_input = PreProcessLayer(params)

        # self.seed = torch.manual_seed(seed)
        self.state_mapping = params['state_mapping']
        self.action_emb = Embedder(Action.UNOPENED, 64)
        self.betsize_emb = Embedder(self.nB, 64)
        self.noise = GaussianNoise(self.device)
        self.emb = 1248
        n_heads = 8
        depth = 2
        # self.attention = EncoderAttention(params['lstm_in'],params['lstm_out'])
        self.lstm = nn.LSTM(params['lstm_in'],
                            params['lstm_out'],
                            bidirectional=True)
        self.batchnorm = nn.BatchNorm1d(self.maxlen)
        # self.blocks = nn.Sequential(
        #     IdentityBlock(hidden_dims=(2560,2560,512),activation=F.leaky_relu),
        #     IdentityBlock(hidden_dims=(512,512,256),activation=F.leaky_relu),
        # )
        self.fc_final = nn.Linear(5120, self.combined_output)
Пример #7
0
class CombinedNet(Network):
    def __init__(self,
                 seed,
                 nO,
                 nA,
                 nB,
                 params,
                 hidden_dims=(64, 64),
                 activation=F.leaky_relu):
        super().__init__()
        self.activation = activation
        self.nO = nO
        self.nA = nA
        self.nB = nB
        self.combined_output = nA - 2 + nB
        self.maxlen = params['maxlen']
        self.mapping = params['state_mapping']
        self.device = params['device']
        # self.emb = params['embedding_size']
        self.helper_functions = NetworkFunctions(self.nA, self.nB)
        self.process_input = PreProcessLayer(params)
        self.lstm = nn.LSTM(1280, 128)
        self.policy_out = nn.Linear(1280, self.combined_output)
        self.noise = GaussianNoise(self.device)
        emb = params['transformer_in']
        n_heads = 8
        depth = 2
        self.transformer = CTransformer(emb, n_heads, depth, self.maxlen,
                                        params['transformer_out'])
        self.dropout = nn.Dropout(0.5)
        self.value_output = nn.Linear(params['transformer_out'], 1)
        self.advantage_output = nn.Linear(params['transformer_out'],
                                          self.combined_output)

    def forward(self, state, action_mask, betsize_mask):
        x = torch.tensor(state, dtype=torch.float32).to(self.device)
        action_mask = torch.tensor(action_mask,
                                   dtype=torch.float).to(self.device)
        betsize_mask = torch.tensor(betsize_mask,
                                    dtype=torch.float).to(self.device)
        mask = combined_masks(action_mask, betsize_mask)
        out = self.process_input(x)
        # Actor
        B, M, c = out.size()
        n_padding = self.maxlen - M
        if n_padding < 0:
            h = out[:, -self.maxlen:, :]
        else:
            padding = torch.zeros(B, n_padding, out.size(-1)).to(self.device)
            h = torch.cat((out, padding), dim=1)
        lstm_out, _ = self.lstm(h)
        t_logits = self.policy_out(lstm_out.view(-1))
        category_logits = self.noise(t_logits)

        action_soft = F.softmax(category_logits, dim=-1)
        action_probs = norm_frequencies(action_soft, mask)
        m = Categorical(action_probs)
        action = m.sample()

        action_category, betsize_category = self.helper_functions.unwrap_action(
            action, state[:, -1, self.mapping['last_action']])
        outputs = {
            'action': action.item(),
            'action_category': action_category.item(),
            'action_prob': m.log_prob(action),
            'action_probs': action_probs,
            'betsize': betsize_category.item()
        }
        # Critic
        q_input = self.transformer(out)
        a = self.advantage_output(q_input)
        v = self.value_output(q_input)
        v = v.expand_as(a)
        q = v + a - a.mean(-1, keepdim=True).expand_as(a)
        outputs['value'] = q.squeeze(0)
        return outputs
Пример #8
0
class HoldemBaseline(Network):
    def __init__(self,
                 seed,
                 nS,
                 nA,
                 nB,
                 params,
                 hidden_dims=(64, 64),
                 activation=F.leaky_relu):
        super().__init__()
        self.activation = activation
        self.nS = nS
        self.nA = nA
        self.nB = nB
        self.combined_output = nA - 2 + nB
        self.helper_functions = NetworkFunctions(self.nA, self.nB)
        self.maxlen = params['maxlen']
        self.process_input = PreProcessLayer(params)

        # self.seed = torch.manual_seed(seed)
        self.mapping = params['mapping']
        self.hand_emb = Embedder(5, 64)
        self.action_emb = Embedder(6, 64)
        self.betsize_emb = Embedder(self.nB, 64)
        self.noise = GaussianNoise()
        self.emb = 1248
        n_heads = 8
        depth = 2
        self.lstm = nn.LSTM(self.emb, 128)
        # self.transformer = CTransformer(emb,n_heads,depth,self.max_length,self.nA)

        self.fc1 = nn.Linear(528, hidden_dims[0])
        self.fc2 = nn.Linear(hidden_dims[0], hidden_dims[1])
        self.fc3 = nn.Linear(1280, self.combined_output)
        self.dropout = nn.Dropout(0.5)

    def forward(self, state, action_mask, betsize_mask):
        mask = combined_masks(action_mask, betsize_mask)
        x = state
        if x.dim() == 2:
            x = x.unsqueeze(0)
        out = self.process_input(x).unsqueeze(0)
        B, M, c = out.size()
        n_padding = max(self.maxlen - M, 0)
        padding = torch.zeros(B, n_padding, out.size(-1))
        h = torch.cat((out, padding), dim=1)
        lstm_out, _ = self.lstm(h)
        t_logits = self.fc3(lstm_out.view(-1))
        category_logits = self.noise(t_logits)

        action_soft = F.softmax(category_logits, dim=-1)
        action_probs = norm_frequencies(action_soft, mask)
        m = Categorical(action_probs)
        action = m.sample()

        action_category, betsize_category = self.helper_functions.unwrap_action(
            action, state[:, -1, self.mapping['state']['previous_action']])
        outputs = {
            'action': action,
            'action_category': action_category,
            'action_prob': m.log_prob(action),
            'action_probs': action_probs,
            'betsize': betsize_category
        }
        return outputs
Пример #9
0
class OmahaActor(Network):
    def __init__(self,
                 seed,
                 nS,
                 nA,
                 nB,
                 params,
                 hidden_dims=(64, 64),
                 activation=F.leaky_relu):
        super().__init__()
        self.activation = activation
        self.nS = nS
        self.nA = nA
        self.nB = nB
        self.combined_output = nA - 2 + nB
        self.helper_functions = NetworkFunctions(self.nA, self.nB)
        self.maxlen = params['maxlen']
        self.device = params['device']
        self.epsilon = params['epsilon']
        self.epsilon_weights = params['epsilon_weights'].to(self.device)
        self.process_input = PreProcessLayer(params)

        # self.seed = torch.manual_seed(seed)
        self.state_mapping = params['state_mapping']
        self.action_emb = Embedder(Action.UNOPENED, 64)
        self.betsize_emb = Embedder(self.nB, 64)
        self.noise = GaussianNoise(self.device)
        self.emb = 1248
        n_heads = 8
        depth = 2
        # self.attention = EncoderAttention(params['lstm_in'],params['lstm_out'])
        self.lstm = nn.LSTM(params['lstm_in'],
                            params['lstm_out'],
                            bidirectional=True)
        self.batchnorm = nn.BatchNorm1d(self.maxlen)
        # self.blocks = nn.Sequential(
        #     IdentityBlock(hidden_dims=(2560,2560,512),activation=F.leaky_relu),
        #     IdentityBlock(hidden_dims=(512,512,256),activation=F.leaky_relu),
        # )
        self.fc_final = nn.Linear(5120, self.combined_output)

    def set_device(self, device):
        self.device = device
        self.process_input.set_device(device)

    def forward(self, state, action_mask, betsize_mask, target=False):
        """
        state: B,M,39
        """
        if not isinstance(state, torch.Tensor):
            state = torch.tensor(state, dtype=torch.float32).to(self.device)
            action_mask = torch.tensor(action_mask,
                                       dtype=torch.float32).to(self.device)
            betsize_mask = torch.tensor(betsize_mask,
                                        dtype=torch.float32).to(self.device)
        mask = combined_masks(action_mask, betsize_mask)
        if target and np.random.random() < self.epsilon:
            B = state.size(0)
            # pick random legal move
            action_masked = self.epsilon_weights * mask
            action_probs = action_masked / action_masked.sum(-1).unsqueeze(-1)
            action = action_probs.multinomial(num_samples=1, replacement=False)
            action_prob = torch.zeros(B, 1)
        else:
            out = self.process_input(state)
            B, M, c = state.size()
            n_padding = self.maxlen - M
            if n_padding < 0:
                h = out[:, -self.maxlen:, :]
            else:
                padding = torch.zeros(B, n_padding,
                                      out.size(-1)).to(self.device)
                h = torch.cat((padding, out), dim=1)
            lstm_out, hidden_states = self.lstm(h)
            norm = self.batchnorm(lstm_out)
            # self.attention(out)
            # blocks_out = self.blocks(lstm_out.view(-1))
            t_logits = self.fc_final(norm.view(B, -1))
            category_logits = self.noise(t_logits)
            # skip connection
            # category_logits += h
            action_soft = F.softmax(category_logits, dim=-1)
            action_probs = norm_frequencies(action_soft, mask)
            m = Categorical(action_probs)
            action = m.sample()
            action_prob = m.log_prob(action)
        previous_action = torch.as_tensor(
            state[:, -1, self.state_mapping['last_action']]).to(self.device)
        action_category, betsize_category = self.helper_functions.batch_unwrap_action(
            action, previous_action)
        if B > 1:
            # batch training
            outputs = {
                'action': action,
                'action_category': action_category,
                'action_prob': action_prob,
                'action_probs': action_probs,
                'betsize': betsize_category
            }
        else:
            # playing hand
            outputs = {
                'action': action.item(),
                'action_category': action_category.item(),
                'action_prob': action_prob,
                'action_probs': action_probs,
                'betsize': betsize_category.item()
            }
        return outputs
Пример #10
0
class OmahaBatchActor(Network):
    def __init__(self,
                 seed,
                 nS,
                 nA,
                 nB,
                 params,
                 hidden_dims=(64, 64),
                 activation=F.leaky_relu):
        super().__init__()
        self.activation = activation
        self.nS = nS
        self.nA = nA
        self.nB = nB
        self.combined_output = nA - 2 + nB
        self.helper_functions = NetworkFunctions(self.nA, self.nB)
        self.maxlen = params['maxlen']
        self.device = params['device']
        self.process_input = PreProcessLayer(params)

        # self.seed = torch.manual_seed(seed)
        self.state_mapping = params['state_mapping']
        self.hand_emb = Embedder(5, 64)
        self.action_emb = Embedder(Action.UNOPENED, 64)
        self.betsize_emb = Embedder(self.nB, 64)
        self.noise = GaussianNoise(self.device)
        self.emb = 1248
        n_heads = 8
        depth = 2
        self.lstm = nn.LSTM(1280, 128, bidirectional=True)
        self.batchnorm = nn.BatchNorm1d(self.maxlen)
        # self.blocks = nn.Sequential(
        #     IdentityBlock(hidden_dims=(2560,2560,512),activation=F.leaky_relu),
        #     IdentityBlock(hidden_dims=(512,512,256),activation=F.leaky_relu),
        # )
        self.fc_final = nn.Linear(2560, self.combined_output)
        self.dropout = nn.Dropout(0.5)

    def forward(self, state, action_mask, betsize_mask):
        x = torch.tensor(state, dtype=torch.float32).to(self.device)
        action_mask = torch.tensor(action_mask,
                                   dtype=torch.float).to(self.device)
        betsize_mask = torch.tensor(betsize_mask,
                                    dtype=torch.float).to(self.device)
        mask = combined_masks(action_mask, betsize_mask)

        out = self.process_input(x)
        B, M, c = out.size()
        n_padding = self.maxlen - M
        if n_padding < 0:
            h = out[:, -self.maxlen:, :]
        else:
            padding = torch.zeros(B, n_padding, out.size(-1)).to(self.device)
            h = torch.cat((out, padding), dim=1)
        lstm_out, _ = self.lstm(h)
        norm = self.batchnorm(lstm_out)
        # blocks_out = self.blocks(lstm_out.view(-1))
        t_logits = self.fc_final(norm.view(-1))
        category_logits = self.noise(t_logits)

        action_soft = F.softmax(category_logits, dim=-1)
        action_probs = norm_frequencies(action_soft, mask)
        m = Categorical(action_probs)
        action = m.sample()

        action_category, betsize_category = self.helper_functions.unwrap_action(
            action, state[:, -1, self.state_mapping['last_action']])
        outputs = {
            'action': action.item(),
            'action_category': action_category.item(),
            'action_prob': m.log_prob(action),
            'action_probs': action_probs,
            'betsize': betsize_category.item()
        }
        return outputs
Пример #11
0
class FlatHistoricalActor(nn.Module):
    def __init__(self,seed,nS,nA,nB,params,hidden_dims=(64,64),activation=F.leaky_relu):
        """
        Network capable of processing any number of prior actions
        Num Categories: nA (check,fold,call,bet,raise)
        Num Betsizes: nB (various betsizes)
        """
        super().__init__()
        self.activation = activation
        # self.seed = torch.manual_seed(seed)
        self.nS = nS
        self.nA = nA
        self.nB = nB

        self.hand_emb = Embedder(5,64)
        self.action_emb = Embedder(6,63)
        self.combined_output = nA - 2 + nB
        self.helper_functions = NetworkFunctions(self.nA,self.nB)
        self.preprocess = PreProcessHistory(params)
        self.max_length = 10
        self.emb = 512
        n_heads = 8
        depth = 2
        self.positional_emb = Embedder(self.max_length,128)
        self.lstm = nn.LSTM(self.emb, 256)
        # self.transformer = CTransformer(self.emb,n_heads,depth,self.max_length,self.combined_output,max_pool=False)
        self.mapping = params['mapping']
        self.noise = GaussianNoise(is_relative_detach=True)
        self.fc1 = nn.Linear(128,hidden_dims[0])
        self.fc2 = nn.Linear(hidden_dims[0],hidden_dims[1])
        self.fc3 = nn.Linear(2560,self.combined_output)
        
    def forward(self,state,action_mask,betsize_mask):
        mask = combined_masks(action_mask,betsize_mask)
        if mask.dim() > 1:
            mask = mask[-1]
        x = state
        if x.dim() == 2:
            x = x.unsqueeze(0)
        out = self.preprocess(x)
        M,C = out.size()
        n_padding = self.max_length - M
        padding = torch.zeros(n_padding,out.size(-1))
        h = torch.cat((out,padding),dim=0).unsqueeze(0)
        # pos_emd = self.positional_emb(torch.arange(self.max_length))
        # padding_mask_o = torch.ones(M,self.emb)
        # padding_mask_z = torch.zeros(n_padding,self.emb)
        # padding_mask = torch.cat((padding_mask_o,padding_mask_z),dim=0)
        # pos_emd = (pos_emd.view(-1) * padding_mask.view(-1)).view(h.size(0),self.emb)
        # h = h + pos_emd
        # x = (h + pos_emd).unsqueeze(0)
        # x = self.activation(self.fc1(h))
        # x = self.activation(self.fc2(x)).view(-1)
        # t_logits = self.fc3(x).unsqueeze(0)
        x,_ = self.lstm(h)
        # x_stripped = (x.view(-1) * padding_mask.view(-1)).view(1,-1)
        t_logits = self.fc3(x.view(-1))
        # t_logits = self.transformer(x)
        cateogry_logits = self.noise(t_logits)
        # distribution_inputs = F.log_softmax(cateogry_logits, dim=1) * mask
        action_soft = F.softmax(cateogry_logits,dim=-1)
        action_probs = norm_frequencies(action_soft,mask)
        last_action = state[M-1,self.mapping['state']['previous_action']].long().unsqueeze(-1)
        m = Categorical(action_probs)
        action = m.sample()
        action_category,betsize_category = self.helper_functions.unwrap_action(action,last_action)
        
        outputs = {
            'action':action,
            'action_category':action_category,
            'action_prob':m.log_prob(action),
            'action_probs':m.probs,
            'betsize':betsize_category
            }
        return outputs
Пример #12
0
class FlatAC(nn.Module):
    def __init__(self,seed,nS,nA,nB,params,hidden_dims=(256,128),activation=F.leaky_relu):
        """
        Network capable of processing any number of prior actions
        Num Categories: nA (check,fold,call,bet,raise)
        Num Betsizes: nB (various betsizes)
        """
        super().__init__()
        self.activation = activation
        self.nS = nS
        self.nA = nA
        self.nB = nB
        self.combined_output = nA - 2 + nB
        self.helper_functions = NetworkFunctions(self.nA,self.nB)
        self.preprocess = PreProcessHistory(params)
        self.max_length = 10
        emb = 128
        n_heads = 8
        depth = 2
        self.positional_emb = Embedder(self.max_length,128)
        self.transformer = CTransformer(emb,n_heads,depth,self.max_length,self.combined_output,max_pool=False)
        self.seed = torch.manual_seed(seed)
        self.mapping = params['mapping']
        self.noise = GaussianNoise(is_relative_detach=True)
        self.fc1 = nn.Linear(513,hidden_dims[0])
        self.fc2 = nn.Linear(hidden_dims[0],hidden_dims[1])
        self.fc3 = nn.Linear(1280,self.combined_output)
        self.value_output = nn.Linear(64,1)
        self.advantage_output = nn.Linear(64,self.combined_output)
        
    def forward(self,state,action_mask,betsize_mask):
        # last_state = state[-1].unsqueeze(0)
        mask = combined_masks(action_mask,betsize_mask)
        if mask.dim() > 1:
            mask = mask[-1]
        x = state
        M,C = x.size()
        out = self.preprocess(x)
        x = self.activation(self.fc1(out))
        x = self.activation(self.fc2(x))
        n_padding = self.max_length - M
        padding = torch.zeros(n_padding,out.size(-1))
        h = torch.cat((out,padding),dim=0)
        pos_emd = self.positional_emb(torch.arange(self.max_length))
        h = h + pos_emd
        # x = (h + pos_emd).unsqueeze(0)
        t_logits = self.fc3(h.view(-1)).unsqueeze(0)
        # t_logits = self.transformer(x)
        cateogry_logits = self.noise(t_logits)
        # distribution_inputs = F.log_softmax(cateogry_logits, dim=1) * mask
        action_soft = F.softmax(cateogry_logits,dim=-1)
        action_probs = norm_frequencies(action_soft,mask)
        last_action = state[-1,self.mapping['state']['previous_action']].long().unsqueeze(-1)
        m = Categorical(action_probs)
        action = m.sample()
        action_category,betsize_category = self.helper_functions.unwrap_action(action,last_action)
        
        q_input = x.view(M,-1)
        a = self.advantage_output(q_input)
        v = self.value_output(q_input)
        v = v.expand_as(a)
        q = v + a - a.mean(1,keepdim=True).expand_as(a)

        outputs = {
            'action':action,
            'action_category':action_category,
            'action_prob':m.log_prob(action),
            'action_probs':m.probs,
            'betsize':betsize_category,
            'value':q
            }
        return outputs
Пример #13
0
class OmahaActor(Network):
    def __init__(self,
                 seed,
                 nS,
                 nA,
                 nB,
                 params,
                 hidden_dims=(64, 64),
                 activation=F.leaky_relu):
        super().__init__()
        self.activation = activation
        self.nS = nS
        self.nA = nA
        self.nB = nB
        self.combined_output = nA - 2 + nB
        self.helper_functions = NetworkFunctions(self.nA, self.nB)
        self.maxlen = params['maxlen']
        self.device = params['device']
        self.process_input = PreProcessLayer(params)

        # self.seed = torch.manual_seed(seed)
        self.state_mapping = params['state_mapping']
        self.action_emb = Embedder(Action.UNOPENED, 64)
        self.betsize_emb = Embedder(self.nB, 64)
        self.noise = GaussianNoise(self.device)
        self.emb = 1248
        n_heads = 8
        depth = 2
        # self.attention = EncoderAttention(params['lstm_in'],params['lstm_out'])
        self.lstm = nn.LSTM(params['lstm_in'],
                            params['lstm_out'],
                            bidirectional=True)
        self.batchnorm = nn.BatchNorm1d(self.maxlen)
        # self.blocks = nn.Sequential(
        #     IdentityBlock(hidden_dims=(2560,2560,512),activation=F.leaky_relu),
        #     IdentityBlock(hidden_dims=(512,512,256),activation=F.leaky_relu),
        # )
        self.fc_final = nn.Linear(2560, self.combined_output)
        self.dropout = nn.Dropout(0.5)

    def forward(self, state, action_mask, betsize_mask):
        """
        state: B,M,39
        """
        x = state
        if not isinstance(x, torch.Tensor):
            x = torch.tensor(x, dtype=torch.float32).to(self.device)
            action_mask = torch.tensor(action_mask,
                                       dtype=torch.float).to(self.device)
            betsize_mask = torch.tensor(betsize_mask,
                                        dtype=torch.float).to(self.device)
        mask = combined_masks(action_mask, betsize_mask)
        out = self.process_input(x)
        B, M, c = out.size()
        n_padding = self.maxlen - M
        if n_padding < 0:
            h = out[:, -self.maxlen:, :]
        else:
            padding = torch.zeros(B, n_padding, out.size(-1)).to(self.device)
            h = torch.cat((padding, out), dim=1)
        lstm_out, hidden_states = self.lstm(h)
        norm = self.batchnorm(lstm_out)
        # self.attention(out)
        # blocks_out = self.blocks(lstm_out.view(-1))
        t_logits = self.fc_final(norm.view(B, -1))
        category_logits = self.noise(t_logits)
        # skip connection
        # category_logits += h
        action_soft = F.softmax(category_logits, dim=-1)
        # if torch.cuda.is_available():
        #     action_probs = norm_frequencies(action_soft,mask.cuda())
        #     previous_action = torch.as_tensor(state[:,-1,self.state_mapping['last_action']]).cuda()#.to(self.device)
        # else:
        action_probs = norm_frequencies(action_soft, mask)
        previous_action = torch.as_tensor(
            state[:, -1, self.state_mapping['last_action']]).to(self.device)
        m = Categorical(action_probs)
        action = m.sample()
        action_category, betsize_category = self.helper_functions.batch_unwrap_action(
            action, previous_action)
        if B > 1:
            # batch training
            outputs = {
                'action': action,
                'action_category': action_category,
                'action_prob': m.log_prob(action),
                'action_probs': action_probs,
                'betsize': betsize_category
            }
        else:
            # playing hand
            outputs = {
                'action': action.item(),
                'action_category': action_category.item(),
                'action_prob': m.log_prob(action),
                'action_probs': action_probs,
                'betsize': betsize_category.item()
            }
        return outputs