示例#1
0
def lay_conv2D(
        input,
        name='conv2d',
        kernels=(3, 5, 7),  # layer kernels
        filters=(36, 12,
                 6),  # int divisible by len(kernels) or tuple of len(kernels)
        dilation=1,
        activation=None,
        useBias=True,
        gatedLU=False,  # Gated Linear Unit architecture
        initializer=None,
        seed=12321,
        verbLev=0):

    if initializer is None: initializer = my_initializer(seed)
    with tf.variable_scope(name):
        variables = []
        subOutList = []
        if type(kernels) is not tuple: kernels = (kernels, )
        if verbLev > 0:
            print(' > %s: kernels %s, filetrs %s, dilation %s' %
                  (name, kernels, filters, dilation))
        for k in range(len(kernels)):
            with tf.variable_scope('kernel_%d' % k):
                subKernel = kernels[k]
                if type(filters) is not tuple:
                    subFilters = filters / len(kernels)
                else:
                    subFilters = filters[k]
                if gatedLU: subFilters *= 2

                convLay = tf.layers.Conv2D(filters=subFilters,
                                           kernel_size=subKernel,
                                           dilation_rate=dilation,
                                           activation=None,
                                           use_bias=useBias,
                                           kernel_initializer=initializer,
                                           padding='valid',
                                           data_format='channels_last')
                subOutput = convLay(input)
                for var in convLay.variables:
                    variables.append(var)

                if verbLev > 1:
                    print(' >> subConv: filters %s, kernel %s' %
                          (subFilters, subKernel))
                subOutList.append(subOutput)

        output = tf.concat(subOutList, axis=-1)
        if gatedLU:
            s1, s2 = tf.split(output, num_or_size_splits=2, axis=-1)
            output = s1 * tf.sigmoid(s2)
        else:
            if activation: output = activation(output)

        variables = flatten_LOTens(variables)

    return output, variables
示例#2
0
def lay_conv1D(
        input,
        name='conv1D',
        kernels=(3, 5, 7),  # layer kernels
        filters=(36, 12,
                 6),  # int divisible by len(kernels) or tuple of len(kernels)
        dilation=1,
        activation=None,
        use_bias=True,
        gated_LU=False,  # Gated Linear Unit architecture
        initializer=None,
        padding='valid',  # 'same' adds padding, 'valid' does not
        seed=12321,
        verb=0):

    if initializer is None: initializer = my_initializer(seed)
    with tf.variable_scope(name):
        sub_out_list = []
        if type(kernels) is not tuple: kernels = (kernels, )
        if verb > 1:
            print(' > %s: kernels %s, filters %s, dilation %s' %
                  (name, kernels, filters, dilation))
        for k in range(len(kernels)):
            with tf.variable_scope('kernel_%d' % k):
                sub_kernel = kernels[k]
                if type(filters) is not tuple:
                    sub_filters = filters // len(kernels)
                else:
                    sub_filters = filters[k]
                if gated_LU: sub_filters *= 2

                conv_lay = tf.layers.Conv1D(filters=sub_filters,
                                            kernel_size=sub_kernel,
                                            dilation_rate=dilation,
                                            activation=None,
                                            use_bias=use_bias,
                                            kernel_initializer=initializer,
                                            padding=padding,
                                            data_format='channels_last')
                sub_output = conv_lay(input)

                if verb > 1:
                    print(' >> sub_conv: filters %s, kernel %s' %
                          (sub_filters, sub_kernel))
                sub_out_list.append(sub_output)

        output = tf.concat(sub_out_list, axis=-1)
        if gated_LU:
            s1, s2 = tf.split(output, num_or_size_splits=2, axis=-1)
            output = s1 * tf.sigmoid(s2)
        elif activation:
            output = activation(output)

    return output
示例#3
0
def decN(
        input,
        dictW,
        predN=1,  # N samples for every feature
        name='decN',
        hLays=None,  # tuple or list of ints
        hActiv=tf.nn.relu,
        initializer=None,
        seed=12321,
        verbLev=0):

    if verbLev > 0: print('\nBuilding decoderN ...')
    if verbLev > 1: print('decoder input:', input)

    if initializer is None: initializer = my_initializer(seed)

    with tf.variable_scope(name):

        # hidden layers
        if hLays:
            for nLay in range(len(hLays)):
                laySize = hLays[nLay]
                input = lay_dense(input=input,
                                  units=laySize,
                                  activation=hActiv,
                                  use_bias=True,
                                  initializer=initializer,
                                  seed=seed,
                                  name='decoderN_Hlay_%s' % nLay)

        # projection to predN x dictW
        logits = lay_dense(input=input,
                           units=predN * dictW,
                           activation=None,
                           use_bias=True,
                           initializer=initializer,
                           seed=seed,
                           name='decoderNProjection')
        if verbLev > 1:
            print(' > projection to logits (%dx dictW):' % predN, logits)

        if predN > 1:
            logits = tf.reshape(logits, [tf.shape(logits)[0], -1, dictW])
            if verbLev > 1:
                print(' > reshaped logits (B,%dxS,dictW):' % predN, logits)

        predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
        if verbLev > 1: print(' > predictions:', predictions)

    return logits, predictions
示例#4
0
def lay_dense(
        input,
        units: int,  # layer width
        name='dense',
        reuse=False,
        activation=None,
        use_bias=True,
        initializer=None,
        seed=12321):

    if initializer is None: initializer = my_initializer(seed)
    dense_lay = tf.layers.Dense(units=units,
                                activation=activation,
                                use_bias=use_bias,
                                kernel_initializer=initializer,
                                name=name,
                                _reuse=reuse)
    output = dense_lay(input)
    return output
示例#5
0
def cards_enc(
        train_flag,  # train flag (bool tensor)
        c_ids,  # seven cards (ids tensor)
        tat_case: bool = False,  # task attention transformer architecture
        emb_width: int = 24,  # cards embedding width
        t_drop: float = 0,
        f_drop: float = 0,
        in_proj: int = None,
        n_layers: int = 8,
        dense_mul: int = 4,  # transformer dense multiplication
        activation=tf.nn.relu,
        dropout: float = 0,  # transformer dropout
        seed=12321,
        verb=0):

    if verb > 0: print('\nBuilding card encoder...')

    with tf.variable_scope('cards_enc'):

        zsL = []
        hist_summ = []

        c_emb = tf.get_variable(  # cards embeddings
            name='c_emb',
            shape=[53, emb_width],  # one card for 'no_card'
            dtype=tf.float32,
            initializer=my_initializer(seed=seed))
        hist_summ += [tf.summary.histogram('1.c_emb', c_emb, family='c_emb')]

        c_emb_look = tf.nn.embedding_lookup(params=c_emb, ids=c_ids)
        if verb > 1: print(' > 1.c_emb_look:', c_emb_look)

        myc_emb = tf.get_variable(  # my cards embeddings
            name='myc_emb',
            shape=[2, c_emb.shape[-1]],
            dtype=tf.float32,
            initializer=my_initializer(seed=seed))

        myc_emb_look = tf.nn.embedding_lookup(params=myc_emb,
                                              ids=[0, 0, 1, 1, 1, 1, 1])
        if verb > 1: print(' > myc_emb_look:', myc_emb_look)

        input = c_emb_look + myc_emb_look

        if t_drop or f_drop:
            input = tf_drop(input=input,
                            time_drop=t_drop,
                            feat_drop=f_drop,
                            train_flag=train_flag,
                            seed=seed)

        # input projection (without activation)
        if in_proj:
            input = lay_dense(input=input,
                              units=in_proj,
                              name='c_proj',
                              reuse=tf.AUTO_REUSE,
                              use_bias=False,
                              seed=seed)
            if verb > 1: print(' > input projected:', input)
        elif verb > 1: print(' > input:', input)

        enc_out = enc_TNS(in_seq=input,
                          name='TAT' if tat_case else 'TNS',
                          seq_out=not tat_case,
                          add_PE=False,
                          n_blocks=n_layers,
                          n_heads=1,
                          dense_mul=dense_mul,
                          activation=activation,
                          max_seq_len=7,
                          dropout=dropout,
                          dropout_att=0,
                          drop_flag=train_flag,
                          seed=seed,
                          n_hist=3,
                          verb=verb)
        output = enc_out['output']
        zsL += enc_out['zeroes']
        hist_summ += enc_out['hist_summ']
        if not tat_case:
            output = tf.unstack(output, axis=-2)
            output = tf.concat(output, axis=-1)
            if verb > 1: print(' > encT reshaped output:', output)
        elif verb > 1: print(' > encT output:', output)

        enc_vars = tf.global_variables(scope=tf.get_variable_scope().name)

    return {
        'output': output,
        'enc_vars': enc_vars,
        'hist_summ': hist_summ,
        'zeroes': zsL
    }
示例#6
0
def enc_DRT(
        input,
        name='enc_DRT',
        shared_lays: bool = False,  # shared variables in enc_layers
        n_layers=12,
        lay_width: int = None,  # for None matches input width
        dns_scale=6,  # scale(*) of first dense
        activation=tf.nn.relu,  # gelu is really worth a try
        dropout=0.0,  # dropout after two denses
        training_flag=None,  # training flag tensor (for dropout)
        initializer=None,
        seed=12321,
        n_hist=4,  # number of histogram layers (for TB)
        verb=0):

    lay_width_matched = ''
    if lay_width is None:
        lay_width = input.shape.as_list()[-1]
        lay_width_matched = '(lay_width taken form input width)'
    if verb > 0:
        drp = 0.0 if not dropout else dropout
        print(
            f'\nBuilding DRTencoder ({n_layers}x{lay_width} drop:{drp:.2f}) {lay_width_matched}...'
        )

    if initializer is None: initializer = my_initializer(seed)

    hist_summ = []
    hist_layers = list_of_layers(n_layers, n_select=n_hist)
    if verb > 1: print(' > histogram layers of DRTencoder:', hist_layers)

    zsL = []  # zeroes list
    with tf.variable_scope(name):

        # input projection
        iW = input.shape[-1]
        if iW != lay_width:
            input = lay_dense(input=input,
                              units=lay_width,
                              use_bias=False,
                              initializer=initializer,
                              seed=seed)
            if verb > 0:
                print('projected input to layWidth(%d) since it differs(%d)' %
                      (lay_width, iW))

        input = tf.keras.layers.LayerNormalization(axis=-1)(
            input)  # input layer_norm

        output = input  # for 0 layers case
        for nL in range(n_layers):

            lay_name = f'DRLay_{nL}' if not shared_lays else 'DRLay_shared'
            lay_out = lay_DRT(input=output,
                              name=lay_name,
                              hist_name=name,
                              dns_scale=dns_scale,
                              activation=activation,
                              dropout=dropout,
                              training_flag=training_flag,
                              initializer=initializer,
                              seed=seed)

            output = lay_out['output']
            if nL in hist_layers: hist_summ.append(lay_out['hist_summ'])
            zsL += lay_out['zeroes']

    return {'output': output, 'hist_summ': hist_summ, 'zeroes': zsL}
示例#7
0
def lay_DRT(
        input,
        name='lay_DRT',  # scope name, be careful when stacked since auto_reuse
        hist_name=None,  # family name of histogram
        dns_scale=4,
        activation=tf.nn.relu,  # gelu is really worth a try
        dropout=None,  # dropout (after two denses)
        training_flag=None,  # training flag tensor (for dropout)
        initializer=None,
        seed=12321):

    if not hist_name: hist_name = name
    lay_width = input.shape[-1]
    if initializer is None: initializer = my_initializer(seed)
    hist_summ = []

    with tf.variable_scope(name_or_scope=name, reuse=tf.AUTO_REUSE):

        hist_summ.append(
            tf.summary.histogram('a_denseSin', input, family=hist_name))

        # dense (scale up)
        output = lay_dense(input=input,
                           units=int(lay_width * dns_scale),
                           activation=None,
                           use_bias=True,
                           initializer=initializer,
                           seed=seed,
                           name='denseS')
        hist_summ.append(
            tf.summary.histogram('b_denseSout', output, family=hist_name))

        # activation
        output = activation(output)
        zsL = [zeroes(output)]  # zeroes list
        hist_summ.append(
            tf.summary.histogram('c_activation', output, family=hist_name))

        # dense (scale down) no activ
        output = lay_dense(input=output,
                           units=lay_width,
                           name='DRTdenseNA',
                           use_bias=True,
                           initializer=initializer,
                           seed=seed)
        hist_summ.append(
            tf.summary.histogram('d_denseNAout', output, family=hist_name))

        # layer dropout
        if dropout:
            output = tf.layers.dropout(inputs=output,
                                       rate=dropout,
                                       training=training_flag,
                                       seed=seed)

        # residual
        output = lay_res(input, output)
        hist_summ.append(
            tf.summary.histogram('e_residual', output, family=hist_name))

        # layer_norm
        output = tf.keras.layers.LayerNormalization(axis=-1)(output)
        hist_summ.append(
            tf.summary.histogram('f_LAYout', output, family=hist_name))

    return {'output': output, 'hist_summ': hist_summ, 'zeroes': zsL}
示例#8
0
def enc_CNN(
        input: tf.Tensor,
        history: tf.
    Tensor = None,  # optional history(state) tensor with shape [bsz, n_layers ,kernel-1, n_filters], >> masked cnn
        name='enc_CNN',
        # layer params
        shared_lays: bool = False,  # shared variables in enc_layers
        n_layers: int = 12,  # num of layers
        kernel: int = 3,  # layer kernel
        n_filters: int = 128,  # num of filters
        activation=tf.nn.
    relu,  # global enc activation func, gelu is really worth a try
        lay_drop: float or None = 0.0,
        ldrt_scale: int or
    None = 0,  # DRT @enc_lay - scale(*) of first dense, for None or 0 DRT @lay won't be build
        ldrt_drop: float or None = 0.0,  # DRT @enc_lay - dropout
        # other
    training_flag: tf.Tensor or bool = None,  # dropout training flag tensor
        initializer=None,
        seed: int = 12321,
        n_hist: int = 4,  # number of histogram layers
        verb=0):

    if verb > 0:
        print(
            f'\n *** enc_CNN *** Building {name} ({n_layers}x{n_filters})...')

    if initializer is None: initializer = my_initializer(seed)

    # manage history
    history_lays = None
    if history is not None:
        history_lays = tf.unstack(history, axis=-3)
        if verb > 1:
            print(
                f' > state_lays len {len(history_lays)} of: {history_lays[0]}')

    hist_summ = []
    hist_layers = list_of_layers(n_layers, n_select=n_hist)
    if verb > 1: print(f' > histogram layers of cnn encoder: {hist_layers}')

    with tf.variable_scope(name, reuse=tf.AUTO_REUSE):

        input_lays = [
        ]  # here we will store inputs of the following layers to extract the state (history)
        zsL = []  # zeroes

        # input projection - to match n_filters and input width
        if verb > 1: print(f' > encoder input: {input}')
        if input.shape[-1] != n_filters:
            input = lay_dense(input=input,
                              units=n_filters,
                              name='enc_input_projection',
                              initializer=initializer)
            if verb > 1: print(f' > encoder projected input: {input}')

        output = input  # for 0 layers case
        sub_output = input  # first input
        for depth in range(n_layers):

            lay_name = f'enc_CNN_lay_{depth}' if not shared_lays else 'enc_CNN_lay_shared'
            if verb > 1: print(f'<< layer {lay_name}:')

            lay_input = tf.concat([history_lays[depth], sub_output],
                                  axis=-2) if history_lays else sub_output
            if verb > 1:
                print(f' > sub_output (previous): {sub_output}')
                print(f' > lay_input (eventually padded): {lay_input}')
            input_lays.append(lay_input)

            hist_lay = depth in hist_layers

            with tf.variable_scope(lay_name):

                if hist_lay:
                    hist_summ.append(
                        tf.summary.histogram('a_lay_in',
                                             lay_input,
                                             family=name))

                # LN
                lay_input = tf.keras.layers.LayerNormalization(
                    axis=-1)(lay_input)
                if hist_lay:
                    hist_summ.append(
                        tf.summary.histogram('b_LN', lay_input, family=name))

                # conv no activation
                output = lay_conv1D(
                    input=lay_input,
                    name='conv1D',
                    kernels=kernel,
                    filters=n_filters,
                    activation=None,
                    initializer=initializer,
                    padding='same' if history is None else 'valid',
                    seed=seed,
                    verb=0)
                if hist_lay:
                    hist_summ.append(
                        tf.summary.histogram('c_cnn', output, family=name))

                # activation
                if activation:
                    output = activation(output)
                    zsL += [zeroes(output)]  # catch zeroes
                    if hist_lay:
                        hist_summ.append(
                            tf.summary.histogram('d_activation',
                                                 output,
                                                 family=name))

                # dropout
                if lay_drop:
                    output = tf.layers.dropout(inputs=output,
                                               rate=lay_drop,
                                               training=training_flag,
                                               seed=seed)
                    if hist_lay:
                        hist_summ.append(
                            tf.summary.histogram('e_drop', output,
                                                 family=name))

                # RES, here we take sub_output, since lay_input may be padded by history
                output += sub_output
                if hist_lay:
                    hist_summ.append(
                        tf.summary.histogram('f_residual', output,
                                             family=name))

                if verb > 1: print(f' > output (layer): {output}')

                if ldrt_scale:
                    lay_out = lay_DRT(input=output,
                                      name=lay_name + '_lay_DRT',
                                      hist_name=name,
                                      dns_scale=ldrt_scale,
                                      activation=activation,
                                      dropout=ldrt_drop,
                                      training_flag=training_flag,
                                      initializer=initializer,
                                      seed=seed)
                    output = lay_out['output']
                    zsL += lay_out['zeroes']
                    if hist_lay: hist_summ.append(lay_out['hist_summ'])

                sub_output = output

    output = tf.keras.layers.LayerNormalization(axis=-1)(output)  # final LN

    # prepare fin_state
    fin_state = None
    if history is not None:
        state = tf.stack(input_lays, axis=-3)
        if verb > 1: print(f' > state (stacked): {state}')
        fin_state = tf.split(state,
                             num_or_size_splits=[-1, kernel - 1],
                             axis=-2)[1]
        if verb > 1: print(f' > fin_state (split): {fin_state}')

    if verb > 1: print(f' > {name} output: {output}')
    return {
        'output': output,
        'state': fin_state,  # history for next
        'hist_summ': hist_summ,
        'zeroes': zsL
    }
示例#9
0
def cnn_DMG(
        name :str,
        train_ce :bool= True,           # train cards encoder
        c_embW :int=    12,             # card emb width >> makes network width (x7)
        n_lay=          12,             # number of CNNR layers >> makes network deep ( >> context length)
        width=          None,           # representation width (number of filters), for None uses cards_encoded_width
        activation=     tf.nn.relu,
        opt_class=      partial(tf.compat.v1.train.AdamOptimizer, beta1=0.7, beta2=0.7),
        iLR=            3e-5,
        warm_up=        100,            # num of steps has to be small (since we do rare updates)
        avt_SVal=       0.04,
        avt_window=     20,
        do_clip=        True,
        verb=           0,
        **kwargs):

    if verb>0: print(f'\nBuilding {name} cnn_DMG (graph)...')

    with tf.variable_scope(name):

        n_hands = tf.get_variable( # number of hands while learning
            name=           'n_hands',
            shape=          [],
            trainable=      False,
            initializer=    tf.constant_initializer(0),
            dtype=          tf.int32)

        cards_PH = tf.placeholder(  # 7 cards placeholder
            name=           'cards_PH',
            dtype=          tf.int32,
            shape=          [None, None, 7])  # [bsz,seq,7cards]

        train_PH = tf.placeholder(  # train placeholder
            name=           'train_PH',
            dtype=          tf.bool,
            shape=          [])

        ce_out = cards_enc(
            train_flag= train_PH,
            c_ids=      cards_PH,
            emb_width=  c_embW)
        cards_encoded = ce_out['output']
        enc_vars =      ce_out['enc_vars']
        enc_zsL =       ce_out['zeroes']
        if verb>1: print(' ### num of enc_vars (%d) %s'%(len(enc_vars),short_scin(num_var_floats(enc_vars))))
        if verb>1: print(' > cards encoded:', cards_encoded)

        switch_PH = tf.placeholder( # switch placeholder
            name=           'switch_PH',
            dtype=          tf.int32, # 0 for move, 1 for cards
            shape=          [None, None, 1])  # [bsz,seq,1]

        event_PH = tf.placeholder(  # event id placeholder
            name=           'event_PH',
            dtype=          tf.int32,
            shape=          [None, None])  # [bsz,seq]

        n_events = 1 + N_TABLE_PLAYERS + len(TBL_MOV)*(N_TABLE_PLAYERS-1)
        event_emb = tf.get_variable(  # event type embeddings
            name=           'event_emb',
            shape=          [n_events, cards_encoded.shape[-1]],
            dtype=          tf.float32,
            initializer=    my_initializer())

        event_in = tf.nn.embedding_lookup(params=event_emb, ids=event_PH)
        if verb>1: print(' > event_in:', event_in)

        # tried with tf.where and switching inputs, but speed was the same...
        switch = tf.cast(switch_PH, dtype=tf.float32)
        input = switch*cards_encoded + (1-switch)*event_in
        if verb>1: print(' > input (merged):', input)

        # projection without activation and bias
        if width:
            input = lay_dense(
                input=          input,
                units=          width,
                use_bias=       False)
            if verb>1: print(' > projected input (projected):', input)
        else: width = cards_encoded.shape[-1]

        # layer_norm
        sub_output = tf.contrib.layers.layer_norm(
            inputs=             input,
            begin_norm_axis=    -1,
            begin_params_axis=  -1)

        state_shape = [n_lay, 2, width]
        single_zero_state = tf.zeros(shape=state_shape)  # [n_lay,2,width]

        state_PH = tf.placeholder(
            name=           'state_PH',
            dtype=          tf.float32,
            shape=          [None] + state_shape) # [bsz,n_lay,2,width]

        cnn_enc_out = enc_CNN(
            input=          sub_output,
            history=        state_PH,
            n_layers=       n_lay,
            n_filters=      width,
            activation=     activation,
            n_hist=         0)
        out =       cnn_enc_out['output']
        fin_state = cnn_enc_out['state']
        cnn_zsL =   cnn_enc_out['zeroes']

        if verb > 1:
            print(' > out:', out)
            print(' > fin_state (split):', fin_state)

        # projection to logits
        logits = lay_dense(
            input=          out,
            units=          len(TBL_MOV),
            use_bias=       False)
        if verb>1: print(' > logits:', logits)

        probs = tf.nn.softmax(logits)

        cnn_vars = tf.trainable_variables(scope=tf.get_variable_scope().name) + [n_hands]
        cnn_vars = [var for var in cnn_vars if var not in enc_vars]
        if verb>1: print(' ### num of cnn_vars (%d) %s'%(len(cnn_vars),short_scin(num_var_floats(cnn_vars))))

        move_PH = tf.placeholder(  # move made (label)
            name=           'move_PH',
            dtype=          tf.int32,
            shape=          [None, None])  # [bsz,seq]

        rew_PH = tf.placeholder(  # reward for move made
            name=           'rew_PH',
            dtype=          tf.float32,
            shape=          [None, None])  # [bsz,seq]

        # this loss is auto averaged with reduction parameter
        # loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)
        # loss = loss(y_true=move, y_pred=logits, sample_weight=rew)
        loss = tf.losses.sparse_softmax_cross_entropy(
            labels=     move_PH,
            logits=     logits,
            weights=    rew_PH)

    train_vars = [] + cnn_vars
    if train_ce: train_vars += enc_vars

    return{
        'name':                 name,
        'cards_PH':             cards_PH,
        'train_PH':             train_PH,
        'switch_PH':            switch_PH,
        'event_PH':             event_PH,
        'move_PH':              move_PH,
        'rew_PH':               rew_PH,
        'state_PH':             state_PH,
        'single_zero_state':    single_zero_state,
        'probs':                probs,
        'fin_state':            fin_state,
        'enc_zeroes':           tf.concat(enc_zsL, axis=-1),
        'cnn_zeroes':           tf.concat(cnn_zsL, axis=-1),
        'loss':                 loss,
        'n_hands':              n_hands,
        'enc_vars':             enc_vars,
        'cnn_vars':             cnn_vars,
        'train_vars':           train_vars}