def LSTM_critic(useMask=True,
                hiddenDim=100,
                LSTMactiv='tanh',
                depth=1,
                useLSTMfc=True,
                LSTMfcDim=16,
                LSTMfcActiv='relu',
                oneHotWordDim=wordsVocabSize,
                useOneHotWordFc=False,
                oneHotWordFcDim=16,
                oneHotWordFcActiv='relu',
                outputHDim=64,
                outputActiv='relu',
                lr=5e-4):

    # Manual seeds
    os.environ['PYTHONHASHSEED'] = '0'  # Necessary for python3
    np.random.seed(29)
    rn.seed(29)
    tf.set_random_seed(29)

    # Input
    vidInput = Input(shape=(
        framesPerWord,
        nOfMouthPixels,
    ))

    # Mask
    if useMask:
        LSTMinput = Masking(mask_value=0.)(vidInput)
    else:
        LSTMinput = vidInput

    # (Deep) LSTM
    # If depth > 1
    if depth > 1:
        # First layer
        encoded = LSTM(hiddenDim, activation=LSTMactiv,
                       return_sequences=True)(LSTMinput)
        for d in range(depth - 2):
            encoded = LSTM(hiddenDim,
                           activation=LSTMactiv,
                           return_sequences=True)(encoded)
        # Last layer
        encoded = LSTM(hiddenDim, activation=LSTMactiv)(encoded)
    # If depth = 1
    else:
        encoded = LSTM(hiddenDim, activation=LSTMactiv)(LSTMinput)

    # LSTM Fc
    if useLSTMfc:
        vidFeatures = Dense(LSTMfcDim, activation=LSTMfcActiv)(encoded)
    else:
        vidFeatures = encoded

    # Predicted Word input
    oneHotWordInput = Input(shape=(oneHotWordDim, ))

    # OHWfc
    if useOneHotWordFc:
        oneHotWordFeatures = Dense(
            oneHotWordFcDim, activation=oneHotWordFcActiv)(oneHotWordInput)
    else:
        oneHotWordFeatures = oneHotWordInput

    # Full feature
    fullFeature = concatenate([vidFeatures, oneHotWordInput])

    # Output
    y = Dense(outputHDim, activation=outputActiv)(fullFeature)
    myOutput = Dense(1, activation='sigmoid')(y)

    # Model
    criticModel = Model(inputs=[vidInput, oneHotWordInput], outputs=myOutput)

    # lr = 5e-4
    adam = Adam(lr=lr)
    criticModel.compile(optimizer=adam,
                        loss='binary_crossentropy',
                        metrics=['accuracy'])

    criticModel.summary()

    fileNamePre ='LSTMCritic-revSeq-Mask-LSTMh' + str(hiddenDim) \
        + '-LSTMactiv' + str(LSTMactiv) + '-depth' + str(depth)
    if useLSTMfc:
        fileNamePre += '-LSTMfc' + str(LSTMfcDim)
    fileNamePre += '-OHWord' + str(oneHotWordDim)
    if useOneHotWordFc:
        fileNamePre += '-OHWordFc' + str(oneHotWordFcDim)
    fileNamePre += '-out' + str(outputHDim) \
        + '-Adam-%1.e' % lr
    print(fileNamePre)

    return criticModel, fileNamePre
Пример #2
0
project = 'manual-ant-1.8.2'
x_train, y_train = preprocess.get_xy_train(TRAIN_SET_DIR + '/train',
                                           tokenizer=tokenizer,
                                           mn_maxlen=MAX_SEQUENCE_LENGTH,
                                           embedding_matrix=embedding_matrix)
#x_test, y_test = preprocess.get_xy_test(test_path,project,tokenizer=tokenizer, maxlen=MAX_SEQUENCE_LENGTH,embedding_matrix=embedding_matrix)

print('Training model.')
# load pre-trained word embeddings into an Embedding layer
# note that we set trainable = False so as to keep the embeddings fixed
method_a = Input(shape=(MAX_SEQUENCE_LENGTH, EMBEDDING_DIM), name='method_a')
metric_a = Input(shape=(12, ), name='metric_a')

#embedding_layer = Embedding(len(all_word_index) + 1,EMBEDDING_DIM,input_length=MAX_SEQUENCE_LENGTH,weights=[embedding_matrix],trainable=False)
masking_layer = Masking(mask_value=0,
                        input_shape=(MAX_SEQUENCE_LENGTH, EMBEDDING_DIM))
lstm_share = LSTM(output_dim=2, activation='sigmoid', init='uniform')
#dropout_share = GaussianNoise(0.2)
#dense_share = Dense(8,activation='tanh',init='uniform')
#dense_share3 = Dense(64,activation='tanh',init='uniform')
#bn_share = BatchNormalization(epsilon=0.001, mode=0, axis=-1, momentum=0.9, weights=None, beta_init='zero', gamma_init='one')

embedding_a = masking_layer(method_a)
lstm_a = lstm_share(embedding_a)
#dropout_a = dropout_share(lstm_a)
#bn_a = bn_share(lstm_a)
#encoded_a = dense_share(dropout_a)
#decoded_a = dense_share3(encoded_a)

#merged_vector = keras.layers.dot([encoded_a,encoded_b],normalize=True,axes=-1)
#dense_vector = Dense(16,activation='tanh')(merged_vector)
Пример #3
0
    def __init__(self,
                 dim,
                 batch_norm,
                 dropout,
                 rec_dropout,
                 header,
                 task,
                 mode,
                 target_repl=False,
                 deep_supervision=False,
                 num_classes=1,
                 depth=1,
                 input_dim=76,
                 size_coef=4,
                 **kwargs):

        self.dim = dim
        self.batch_norm = batch_norm
        self.dropout = dropout
        self.rec_dropout = rec_dropout
        self.depth = depth
        self.size_coef = size_coef

        if task in ['decomp', 'ihm', 'ph']:
            final_activation = 'sigmoid'
        elif task in ['los']:
            if num_classes == 1:
                final_activation = 'relu'
            else:
                final_activation = 'softmax'
        else:
            return ValueError("Wrong value for task")

        print "==> not used params in network class:", kwargs.keys()

        # Parse channels
        channel_names = set()
        for ch in header:
            if ch.find("mask->") != -1:
                continue
            pos = ch.find("->")
            if pos != -1:
                channel_names.add(ch[:pos])
            else:
                channel_names.add(ch)
        channel_names = sorted(list(channel_names))
        print "==> found {} channels: {}".format(len(channel_names),
                                                 channel_names)

        channels = []  # each channel is a list of columns
        for ch in channel_names:
            indices = range(len(header))
            indices = filter(lambda i: header[i].find(ch) != -1, indices)
            channels.append(indices)

        # Input layers and masking
        X = Input(shape=(None, input_dim), name='X')
        inputs = [X]
        mX = Masking()(X)

        if deep_supervision and mode == 'train':
            M = Input(shape=(None, ), name='M')
            inputs.append(M)

        # Configurations
        is_bidirectional = True
        if deep_supervision:
            is_bidirectional = False

        # Preprocess each channel
        cX = []
        for ch in channels:
            cX.append(Slice(ch)(mX))
        pX = []  # LSTM processed version of cX
        for x in cX:
            p = x
            for i in range(depth):
                num_units = dim
                if is_bidirectional:
                    num_units = num_units // 2

                lstm = LSTM(units=num_units,
                            activation='tanh',
                            return_sequences=True,
                            dropout=dropout,
                            recurrent_dropout=rec_dropout)

                if is_bidirectional:
                    p = Bidirectional(lstm)(p)
                else:
                    p = lstm(p)
            pX.append(p)

        # Concatenate processed channels
        Z = Concatenate(axis=2)(pX)

        # Main part of the network
        for i in range(depth - 1):
            num_units = int(size_coef * dim)
            if is_bidirectional:
                num_units = num_units // 2

            lstm = LSTM(units=num_units,
                        activation='tanh',
                        return_sequences=True,
                        dropout=dropout,
                        recurrent_dropout=rec_dropout)

            if is_bidirectional:
                Z = Bidirectional(lstm)(Z)
            else:
                Z = lstm(Z)

        # Output module of the network
        return_sequences = (target_repl or deep_supervision)
        return_sequences = return_sequences and (mode == 'train')
        L = LSTM(units=int(size_coef * dim),
                 activation='tanh',
                 return_sequences=return_sequences,
                 dropout=dropout,
                 recurrent_dropout=rec_dropout)(Z)

        if dropout > 0:
            L = Dropout(dropout)(L)

        if target_repl:
            y = TimeDistributed(Dense(num_classes,
                                      activation=final_activation),
                                name='seq')(L)
            y_last = LastTimestep(name='single')(y)
            outputs = [y_last, y]
        elif deep_supervision and mode == 'train':
            y = TimeDistributed(Dense(num_classes,
                                      activation=final_activation))(L)
            y = ExtendMask()([y, M])  # this way we extend mask of y to M
            outputs = [y]
        else:
            y = Dense(num_classes, activation=final_activation)(L)
            outputs = [y]

        return super(Network, self).__init__(inputs=inputs, outputs=outputs)
Пример #4
0
from keras.layers.wrappers import Bidirectional
from attention_decoder import AttentionDecoder
import processing

TOKEN = "553617004:AAGFq_FMPlojaJcdn4dzrWgUmfnUU3gOyTs"
URL = "https://api.telegram.org/bot{}/".format(TOKEN)

chats = []

#define model:
MAX_LENGTH = 150
NR_WORDPIECE = 512
LATENT_DIM = 256

model = Sequential()
model.add(Masking(mask_value=0., input_shape=(MAX_LENGTH, NR_WORDPIECE)))
model.add(Bidirectional(LSTM(LATENT_DIM, return_sequences=True)))
model.add(AttentionDecoder(LATENT_DIM * 2, NR_WORDPIECE))
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['acc'])

# Check for weights, and if not found return error
if os.path.isfile('../res/model.h5'):
    model.load_weights('../res/model.h5')
    print("Modelweights loaded")
else:
    raise FileNotFoundError("Could not find model weights!!!")


def get_url(url):
Пример #5
0
    def buildKerasModel(self, use_sourcelang=False, use_image=True):
        '''
        Define the exact structure of your model here. We create an image
        description generation model by merging the VGG image features with
        a word embedding model, with an LSTM over the sequences.
        '''
        logger.info('Building Keras model...')

        text_input = Input(shape=(self.max_t, self.vocab_size), name='text')
        text_mask = Masking(mask_value=0., name='text_mask')(text_input)

        # Word embeddings
        wemb = TimeDistributed(Dense(output_dim=self.embed_size,
                                     input_dim=self.vocab_size,
                                     W_regularizer=l2(self.l2reg)),
                               name="w_embed")(text_mask)
        drop_wemb = Dropout(self.dropin, name="wemb_drop")(wemb)

        # Embed -> Hidden
        emb_to_hidden = TimeDistributed(Dense(output_dim=self.hidden_size,
                                              input_dim=self.vocab_size,
                                              W_regularizer=l2(self.l2reg)),
                                        name='wemb_to_hidden')(drop_wemb)

        if use_image:
            # Image 'embedding'
            logger.info('Using image features: %s', use_image)
            img_input = Input(shape=(self.max_t, 4096), name='img')
            img_emb = TimeDistributed(Dense(output_dim=self.hidden_size,
                                            input_dim=4096,
                                            W_regularizer=l2(self.l2reg)),
                                      name='img_emb')(img_input)
            img_drop = Dropout(self.dropin, name='img_embed_drop')(img_emb)

        if use_sourcelang:
            logger.info('Using source features: %s', use_sourcelang)
            logger.info('Size of source feature vectors: %d', self.hsn_size)
            src_input = Input(shape=(self.max_t, self.hsn_size), name='src')
            src_relu = Activation('relu', name='src_relu')(src_input)
            src_embed = TimeDistributed(Dense(output_dim=self.hidden_size,
                                              input_dim=self.hsn_size,
                                              W_regularizer=l2(self.l2reg)),
                                        name="src_embed")(src_relu)
            src_drop = Dropout(self.dropin, name="src_drop")(src_embed)

        # Input nodes for the recurrent layer
        rnn_input_dim = self.hidden_size
        if use_image and use_sourcelang:
            recurrent_inputs = [emb_to_hidden, img_drop, src_drop]
            recurrent_inputs_names = ['emb_to_hidden', 'img_drop', 'src_drop']
            inputs = [text_input, img_input, src_input]
        elif use_image:
            recurrent_inputs = [emb_to_hidden, img_drop]
            recurrent_inputs_names = ['emb_to_hidden', 'img_drop']
            inputs = [text_input, img_input]
        elif use_sourcelang:
            recurrent_inputs = [emb_to_hidden, src_drop]
            recurrent_inputs_names = ['emb_to_hidden', 'src_drop']
            inputs = [text_input, src_input]
        merged_input = Merge(mode='sum')(recurrent_inputs)

        # Recurrent layer
        if self.gru:
            logger.info("Building a GRU with recurrent inputs %s",
                        recurrent_inputs_names)
            rnn = GRU(output_dim=self.hidden_size,
                      input_dim=rnn_input_dim,
                      return_sequences=True,
                      W_regularizer=l2(self.l2reg),
                      U_regularizer=l2(self.l2reg),
                      name='rnn')(merged_input)

        else:
            logger.info("Building an LSTM with recurrent inputs %s",
                        recurrent_inputs_names)
            rnn = LSTM(output_dim=self.hidden_size,
                       input_dim=rnn_input_dim,
                       return_sequences=True,
                       W_regularizer=l2(self.l2reg),
                       U_regularizer=l2(self.l2reg),
                       name='rnn')(merged_input)

        output = TimeDistributed(Dense(output_dim=self.vocab_size,
                                       input_dim=self.hidden_size,
                                       W_regularizer=l2(self.l2reg),
                                       activation='softmax'),
                                 name='output')(rnn)

        if self.optimiser == 'adam':
            # allow user-defined hyper-parameters for ADAM because it is
            # our preferred optimiser
            optimiser = Adam(lr=self.lr,
                             beta_1=self.beta1,
                             beta_2=self.beta2,
                             epsilon=self.epsilon,
                             clipnorm=self.clipnorm)
            model = Model(input=inputs, output=output)
            model.compile(optimiser, {'output': 'categorical_crossentropy'})
        else:
            model.compile(self.optimiser,
                          {'output': 'categorical_crossentropy'})

        if self.weights is not None:
            logger.info("... with weights defined in %s", self.weights)
            # Initialise the weights of the model
            shutil.copyfile("%s/weights.hdf5" % self.weights,
                            "%s/weights.hdf5.bak" % self.weights)
            model.load_weights("%s/weights.hdf5" % self.weights)

        #plot(model, to_file="model.png")

        return model
 def LSTM(self, features, labels, speaker_id, model_type):
     """
     This block will make LSTM cells and produce output
     """
     logo = LeaveOneGroupOut()
     #code for LSTM model
     LSTM_predict_probability = {}  #saves the softmax output
     LSTM_test_GT = {}  #saves the test set GTs
     LSTM__modelpred = {}  #saves the final outputs
     LSTM_con = {}  #saves the confusion matrix per speaker
     LSTM_UWR = {}  #saves the unweighted recall per speaker
     speaker = 0
     for train, test in logo.split(features, labels, speaker_id):
         label_train = np_utils.to_categorical(labels[train])
         # Set callback functions to early stop training
         callbacks = [EarlyStopping(monitor='val_loss', patience=10)]
         model = Sequential()
         model.add(
             Masking(mask_value=0.,
                     input_shape=(features.shape[1], features.shape[2])))
         if model_type == 'unidirectional':
             model.add(Bidirectional(LSTM(128, return_sequences=True)))
             model.add(Dropout(0.5))
             model.add(Bidirectional(LSTM(128)))
         elif model_type == 'bidirectional':
             model.add(LSTM(128, return_sequences=True))
             model.add(Dropout(0.5))
             model.add(LSTM(128))
         else:
             print('error in model type')
             break
         model.add(Dropout(0.5))
         model.add(Dense(256, activation='relu'))
         model.add(Dropout(0.5))  ##
         model.add(Dense(4, activation='softmax'))
         adam = keras.optimizers.Adam(lr=0.0001,
                                      beta_1=0.9,
                                      beta_2=0.999,
                                      epsilon=None,
                                      decay=0.0,
                                      amsgrad=False)
         model.compile(loss='categorical_crossentropy',
                       optimizer=adam,
                       metrics=['accuracy'])
         model.fit(features[train],
                   label_train,
                   epochs=50,
                   validation_split=0.20,
                   callbacks=callbacks,
                   batch_size=128,
                   verbose=1)
         X_pred = model.predict(features[test, :])
         LSTM_predict_probability[speaker] = X_pred
         LSTM_test_GT[speaker] = labels[test]
         Y_pred = np.argmax(X_pred, axis=1)
         LSTM_con[speaker] = confusion_matrix(labels[test], Y_pred)
         LSTM_UWR[speaker] = recall_score(labels[test],
                                          Y_pred,
                                          average='macro')
         LSTM__modelpred[speaker] = Y_pred
         speaker += 1
     return LSTM_test_GT, LSTM__modelpred, LSTM_predict_probability
Пример #7
0
    def _build_network(self,
                       vocab_size,
                       maxlen,
                       emb_weights=[],
                       hidden_units=256,
                       trainable=False):
        print('Build model...')

        model = Sequential()

        model.add(Masking(mask_value=0, input_shape=(maxlen, )))

        if (len(emb_weights) == 0):
            model.add(
                Embedding(vocab_size,
                          20,
                          input_length=maxlen,
                          embeddings_initializer='he_normal',
                          trainable=trainable,
                          mask_zero=True))
        else:
            model.add(
                Embedding(vocab_size,
                          emb_weights.shape[1],
                          input_length=maxlen,
                          weights=[emb_weights],
                          trainable=trainable))

        model.add(Reshape((model.output_shape[1], model.output_shape[2], 1)))

        model.add(
            Convolution2D(int(hidden_units / 8), (5, 1),
                          kernel_initializer='he_normal',
                          padding='valid',
                          activation='relu'))
        model.add(MaxPooling2D((2, 1)))
        model.add(Dropout(0.5))

        model.add(
            Convolution2D(int(hidden_units / 4), (3, 1),
                          kernel_initializer='he_normal',
                          padding='valid',
                          activation='relu'))
        model.add(MaxPooling2D((2, 1)))
        model.add(Dropout(0.5))

        model.add(
            Dense(int(hidden_units / 2),
                  kernel_initializer='he_normal',
                  activation='relu'))
        model.add(Dropout(0.5))

        model.add(Dense(2, activation='softmax'))

        adam = Adam(lr=0.001)
        model.compile(loss='categorical_crossentropy',
                      optimizer=adam,
                      metrics=['accuracy'])
        print('No of parameter:', model.count_params())

        print(model.summary())
        return model
Пример #8
0
def make_model(input_dim, embed_dim, char_hidden_dim, word_hidden_dim,
               output_dim, max_chars, max_len):
    """Creates a 2 level BiLSTM based model

    Arguments:
        input_dim {int} -- Size of the input vocabulary + 1 (adjusted for 0) i.e number of unqique characters
        embed_dim {int} -- Size of character embeddings
        char_hidden_dim {int} -- Size of char-level LSTM hidden state
        word_hidden_dim {int} -- Size of word-level LSTM hidden state
        output_dim {int} -- Number of output classes (4: other, en, es, padding)
        max_chars {int} -- Max chars in a token
        max_len {int} -- Max tokens in a tweet

    Returns:
        keras.models.Model -- Instance of a Keras Model
    """
    dropout_prob = 0.
    r1, r2 = None, None
    # r1=regularizers.l2(0.001)
    # r2=regularizers.l2(0.001)

    inputs = Input(shape=(max_chars * max_len, ))
    word_mask = Input(shape=(max_len, 2 * char_hidden_dim))
    '''
        Add character embeddings. Keep mask_zero = True.
        This is need because we have added padding at char level and want to ignore this padding for furture processing.
    '''
    embeddings = Embedding(input_dim,
                           embed_dim,
                           embeddings_initializer='uniform',
                           mask_zero=True)
    embed = embeddings(inputs)
    '''
        Reshape to a rank 3 tensor of (batch_size, max_chars, embed_dim)
        This tensor will hold the representation for each char in each word in each tweet (plus padding as required)
    '''
    reshape_layer_1 = Lambda(
        lambda x: K.reshape(x, shape=[-1, max_chars, embed_dim]))
    reshaped = reshape_layer_1(embed)

    # Apply char-level LSTM
    char_lstm = Bidirectional(LSTM(units=char_hidden_dim,
                                   kernel_regularizer=r1),
                              merge_mode='concat')
    char_hidden = char_lstm(reshaped)
    '''
        Reshape to a rank 3 tensor of (batch_size, max_len, 2 * char_hidden_dim])
        This tensor will essentially capture the representation for each word in each tweet (plus padding as required)
    '''
    reshape_layer_2 = Lambda(
        lambda x: K.reshape(x, shape=[-1, max_len, 2 * char_hidden_dim]))
    reshaped_2 = reshape_layer_2(char_hidden)
    '''
        Apply the mask at word level. 
        This is need because we have added padding at word level and don't want to compute the loss or metrics for this padding.
    '''
    mask_multiply = Multiply()
    masked = mask_multiply([reshaped_2, word_mask])
    mask_layer = Masking(mask_value=0.)
    masked = mask_layer(masked)

    # Apply the word-level LSTM
    word_lstm = Bidirectional(LSTM(units=word_hidden_dim,
                                   kernel_regularizer=r2,
                                   return_sequences=True),
                              merge_mode='concat')
    word_hidden = word_lstm(masked)
    dropout = Dropout(dropout_prob)
    dropped = dropout(word_hidden)

    # Project the word-level hidden representation to output space
    dense = Dense(output_dim, activation='softmax')
    output_probs = dense(dropped)

    # create model
    model = Model(inputs=[inputs, word_mask], outputs=output_probs)
    # print model's layerwise summary
    print(model.summary())
    return model
Пример #9
0
def trainRNN(X_train, X_test, y_train, y_test, w_train, w_test, sequence, collection, unit_type, n_units, combinedDim, epochs, batchSize, dropout, optimizer, activation, initializer, learningRate=0.01, decay=0.0, momentum=0.0, nesterov=False, mergeModels=False, multiclass=False):
  print "Performing a Deep Recurrent Neural Net!"

  if type(sequence) == list:
    for seq in sequence:
      print 'Prepare channel for {} collection...'.format(seq['name'])
      SHAPE = seq['X_train'].shape[1:]
      seq['input'] = Input(SHAPE)
      seq['channel'] = Masking(mask_value=-999, name=seq['name']+'_masking')(seq['input'])
      if unit_type.lower() == 'lstm':
        seq['channel'] = LSTM(n_units, name=seq['name']+'_lstm')(seq['channel'])
      if unit_type.lower() == 'gru':
        seq['channel'] = GRU(n_units, name=seq['name']+'_gru')(seq['channel'])
      seq['channel'] = Dropout(dropout, name=seq['name']+'_dropout')(seq['channel'])

  if mergeModels:
    print 'Going to merge sequence model with common NN!'
    print 'Standardize training set...'
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    model_inputs = Input(shape=(X_train.shape[1], ))
    layer = Dense(n_units, activation=activation, kernel_initializer=initializer)(model_inputs)
    layer = BatchNormalization()(layer)
    layer = Dropout(dropout)(layer)
    
  if mergeModels:
    combined = concatenate([c['channel'] for c in sequence]+[layer])
  else:
    if len(sequence)>1:
      combined = concatenate([c['channel'] for c in sequence])
    else:
      combined = sequence[0]['channel']
    #for layer in combinedDim:
    #  combined = Dense(layer, activation = activation)(combined)
    #  combined = Dropout(dropout)(combined)
  if multiclass:
    combined_output = Dense(len(np.bincount(y_train)), activation='softmax')(combined)
    loss = 'categorical_crossentropy'
  else:
    combined_outputs = Dense(1, activation='sigmoid')(combined)
    loss = 'binary_crossentropy'
  
  if mergeModels:
    combined_rnn = Model(inputs=[seq['input'] for seq in sequence]+[model_inputs], outputs=combined_outputs)
  else:
    if len(sequence)>1:
      combined_rnn = Model(inputs=[seq['input'] for seq in sequence], outputs=combined_outputs)
    else:
      combined_rnn = Model(inputs=sequence[0]['input'], outputs=combined_outputs)

  combined_rnn.summary()
  combined_rnn.compile(loss=loss, optimizer=optimizer, metrics=['accuracy'])
  print 'Training...'
  class_weight = compute_class_weight('balanced', np.unique(y_train), y_train)
  try:
    if mergeModels:
      history = combined_rnn.fit([seq['X_train'] for seq in sequence]+[X_train], y_train,
                class_weight=class_weight, sample_weight=w_train, epochs=epochs, batch_size=batchSize,
                callbacks = [EarlyStopping(verbose=True, patience=10, monitor='loss')])
                #ModelCheckpoint('./models/combinedrnn_tutorial-progress', monitor='val_loss', verbose=True, save_best_only=True)
    else:
      history = combined_rnn.fit([seq['X_train'] for seq in sequence], y_train,
                class_weight=class_weight, sample_weight=w_train, epochs=epochs, batch_size=batchSize,
                callbacks = [EarlyStopping(verbose=True, patience=10, monitor='acc')])
  except KeyboardInterrupt:
      print 'Training ended early.'

  print 'Testing...'
  if mergeModels:
    score = combined_rnn.evaluate([seq['X_test'] for seq in sequence]+[X_test], y_test, batch_size=batchSize)
    y_predicted = combined_rnn.predict([seq['X_test'] for seq in sequence]+[X_test], batch_size=batchSize)
  else:
    if len(seq)>1:
      score = combined_rnn.evaluate([seq['X_test'] for seq in sequence], y_test)
      y_predicted = combined_rnn.predict([seq['X_test'] for seq in sequence], batch_size=batchSize)
    else:
      score = combined_rnn.evaluate(sequence[0]['X_test'], y_test)
      y_predicted = combined_rnn.predict(sequence[0]['X_test'], batch_size=batchSize)
  #print("\n%s: %.2f%%" % (combined_rnn.metrics_names[0], score[0]*100))
  #print("\n%s: %.2f%%" % (combined_rnn.metrics_names[1], score[1]*100))
  

  print "RNN finished!"
  return combined_rnn, history, y_predicted
Пример #10
0
maxtracks = [5, 10, 20, 50]
nepochs = [1, 5, 10, 20]

for mt in maxtracks:
		scores = []
		aucs = []
		epochs = []
		for ne in nepochs:
			filename = output_dir + 'model_ntrk%(mt)d_nepochs%(ne)d'%{'mt':mt,'ne':ne}
			print filename
			if not os.path.isfile(filename):
					print 'not a file'
					continue
			epochs.append(ne)
			model = Sequential()
			model.add(Masking(mask_value=0.0, input_shape=(max_len, len(vars))))
			model.add(LSTM(output_dim=16, activation='sigmoid', inner_activation='hard_sigmoid'))
			model.add(Dropout(0.2))
			model.add(Dense(1))
			model.add(Activation('sigmoid'))
			model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])
			model.load_weights(filename)
			print 'Weights loaded'
			score = model.evaluate(X, y, batch_size=16)
			scores.append(score[1])
			print 'score %f', score
			proba = model.predict_proba(X, batch_size=16)
			fpr,tpr,thres = roc_curve(y, proba)	
			area =  auc(fpr, tpr)
Пример #11
0
def test_merge():
    from keras.layers import Input, merge, Merge, Masking
    from keras.models import Model

    # test modes: 'sum', 'mul', 'concat', 'ave', 'cos', 'dot'.
    input_shapes = [(3, 2), (3, 2)]
    inputs = [np.random.random(shape) for shape in input_shapes]

    # test functional API
    for mode in ['sum', 'mul', 'concat', 'ave', 'max']:
        print(mode)
        input_a = Input(shape=input_shapes[0][1:])
        input_b = Input(shape=input_shapes[1][1:])
        merged = merge([input_a, input_b], mode=mode)
        model = Model([input_a, input_b], merged)
        model.compile('rmsprop', 'mse')

        expected_output_shape = model.get_output_shape_for(input_shapes)
        actual_output_shape = model.predict(inputs).shape
        assert expected_output_shape == actual_output_shape

        config = model.get_config()
        model = Model.from_config(config)
        model.compile('rmsprop', 'mse')

        # test Merge (#2460)
        merged = Merge(mode=mode)([input_a, input_b])
        model = Model([input_a, input_b], merged)
        model.compile('rmsprop', 'mse')

        expected_output_shape = model.get_output_shape_for(input_shapes)
        actual_output_shape = model.predict(inputs).shape
        assert expected_output_shape == actual_output_shape

    # test lambda with output_shape lambda
    input_a = Input(shape=input_shapes[0][1:])
    input_b = Input(shape=input_shapes[1][1:])
    merged = merge([input_a, input_b],
                   mode=lambda tup: K.concatenate([tup[0], tup[1]]),
                   output_shape=lambda tup: tup[0][:-1] +
                   (tup[0][-1] + tup[1][-1], ))
    model = Model([input_a, input_b], merged)
    expected_output_shape = model.get_output_shape_for(input_shapes)
    actual_output_shape = model.predict(inputs).shape
    assert expected_output_shape == actual_output_shape

    config = model.get_config()
    model = Model.from_config(config)
    model.compile('rmsprop', 'mse')

    # test function with output_shape function
    def fn_mode(tup):
        x, y = tup
        return K.concatenate([x, y], axis=1)

    def fn_output_shape(tup):
        s1, s2 = tup
        return (s1[0], s1[1] + s2[1]) + s1[2:]

    input_a = Input(shape=input_shapes[0][1:])
    input_b = Input(shape=input_shapes[1][1:])
    merged = merge([input_a, input_b],
                   mode=fn_mode,
                   output_shape=fn_output_shape)
    model = Model([input_a, input_b], merged)
    expected_output_shape = model.get_output_shape_for(input_shapes)
    actual_output_shape = model.predict(inputs).shape
    assert expected_output_shape == actual_output_shape

    config = model.get_config()
    model = Model.from_config(config)
    model.compile('rmsprop', 'mse')

    # test function with output_mask function
    # time dimension is required for masking
    input_shapes = [(4, 3, 2), (4, 3, 2)]
    inputs = [np.random.random(shape) for shape in input_shapes]

    def fn_output_mask(tup):
        x_mask, y_mask = tup
        return K.concatenate([x_mask, y_mask])

    input_a = Input(shape=input_shapes[0][1:])
    input_b = Input(shape=input_shapes[1][1:])
    a = Masking()(input_a)
    b = Masking()(input_b)
    merged = merge([a, b],
                   mode=fn_mode,
                   output_shape=fn_output_shape,
                   output_mask=fn_output_mask)
    model = Model([input_a, input_b], merged)
    expected_output_shape = model.get_output_shape_for(input_shapes)
    actual_output_shape = model.predict(inputs).shape
    assert expected_output_shape == actual_output_shape

    config = model.get_config()
    model = Model.from_config(config)
    model.compile('rmsprop', 'mse')

    mask_inputs = (np.zeros(input_shapes[0][:-1]),
                   np.ones(input_shapes[1][:-1]))
    expected_mask_output = np.concatenate(mask_inputs, axis=-1)
    mask_input_placeholders = [
        K.placeholder(shape=input_shape[:-1]) for input_shape in input_shapes
    ]
    mask_output = model.layers[-1]._output_mask(mask_input_placeholders)
    assert np.all(
        K.function(mask_input_placeholders, [mask_output])(mask_inputs)[0] ==
        expected_mask_output)

    # test lambda with output_mask lambda
    input_a = Input(shape=input_shapes[0][1:])
    input_b = Input(shape=input_shapes[1][1:])
    a = Masking()(input_a)
    b = Masking()(input_b)
    merged = merge([a, b],
                   mode=lambda tup: K.concatenate([tup[0], tup[1]], axis=1),
                   output_shape=lambda tup:
                   (tup[0][0], tup[0][1] + tup[1][1]) + tup[0][2:],
                   output_mask=lambda tup: K.concatenate([tup[0], tup[1]]))
    model = Model([input_a, input_b], merged)
    expected_output_shape = model.get_output_shape_for(input_shapes)
    actual_output_shape = model.predict(inputs).shape
    assert expected_output_shape == actual_output_shape

    config = model.get_config()
    model = Model.from_config(config)
    model.compile('rmsprop', 'mse')

    mask_output = model.layers[-1]._output_mask(mask_input_placeholders)
    assert np.all(
        K.function(mask_input_placeholders, [mask_output])(mask_inputs)[0] ==
        expected_mask_output)

    # test with arguments
    input_shapes = [(3, 2), (3, 2)]
    inputs = [np.random.random(shape) for shape in input_shapes]

    def fn_mode(tup, a, b):
        x, y = tup
        return x * a + y * b

    input_a = Input(shape=input_shapes[0][1:])
    input_b = Input(shape=input_shapes[1][1:])
    merged = merge([input_a, input_b],
                   mode=fn_mode,
                   output_shape=lambda s: s[0],
                   arguments={
                       'a': 0.7,
                       'b': 0.3
                   })
    model = Model([input_a, input_b], merged)
    output = model.predict(inputs)

    config = model.get_config()
    model = Model.from_config(config)

    assert np.all(model.predict(inputs) == output)
Пример #12
0
def deeplearning():
    #参数设定
    BATCH_SIZE = 100  #每次训练多少句话
    TIME_STEPS = 30  #一句话多少个词向量
    INPUT_SIZE = 200  #每个词向量的长度
    OUTPUT_SIZE = 4  #label的宽度
    LR = 0.001

    #样本标签one hot 化
    y_train = np_utils.to_categorical(xunlian_label, num_classes=OUTPUT_SIZE)
    y_test = np_utils.to_categorical(ceshi_label, num_classes=OUTPUT_SIZE)
    y_validation = np_utils.to_categorical(yanzheng_label,
                                           num_classes=OUTPUT_SIZE)

    #统一词向量长度
    x_train = pad_sequences(xunlian,
                            maxlen=TIME_STEPS,
                            padding='post',
                            dtype='float')
    x_test = pad_sequences(ceshi,
                           maxlen=TIME_STEPS,
                           padding='post',
                           dtype='float')
    x_validation = pad_sequences(yanzheng,
                                 maxlen=TIME_STEPS,
                                 padding='post',
                                 dtype='float')

    #模型构建
    #我试了RNN,LSTM,GRU发现GRU的效果相对比较好
    model = Sequential()
    model.add(Masking(
        mask_value=0,
        input_shape=(TIME_STEPS,
                     INPUT_SIZE)))  #这里mask_value参数去除了输入的词向量中的零向量实现了GRU的变长度输入
    model.add(Bidirectional(GRU(64)))  #这里可选的参数常见有32,64,128,256我试了以后发现32比较好
    model.add(Dropout(0.5))  #这是对GRU门的设置,为了防止过拟合
    model.add(Dense(OUTPUT_SIZE))
    model.add(Activation('softmax'))

    adam = Adam(LR)
    model.compile(optimizer=adam,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    #跑模型
    result = model.fit(x_train,
                       y_train,
                       batch_size=BATCH_SIZE,
                       nb_epoch=5,
                       verbose=1,
                       validation_data=(x_test, y_test))

    #评估
    score, acc = model.evaluate(x_validation,
                                y_validation,
                                batch_size=BATCH_SIZE,
                                verbose=1)
    print('五分类下的准确率{}'.format(acc))

    # 鉴于上述四分的时候准确率不高,我们来看一下二分的时候
    validation_label = model.predict_classes(x_validation,
                                             batch_size=BATCH_SIZE)
    num = 0
    for i in range(len(x_validation)):
        if validation_label[i] <= 1 and yanzheng_label[i] <= 1:
            num += 1
        if validation_label[i] >= 2 and yanzheng_label[i] >= 2:
            num += 1
    print('二分法下的准确率{}'.format(
        num / len(x_validation)))  #四分类不是很准,但是对市场情绪的积极,消极判断还是不错的

    model.save('./GRU/gru.h5')
word2vec.save('word2vec_words_final.model')
word2vec.init_sims(replace=True)  #预先归一化,使得词向量不受尺度影响

print(u'正在进行第一次训练......')
'''
用最新版本的Keras训练模型,使用GPU加速(我的是GTX 960)
其中Bidirectional函数目前要在github版本才有
'''
from keras.layers import Dense, LSTM, Lambda, TimeDistributed, Input, Masking, Bidirectional
from keras.models import Model
from keras.utils import np_utils
from keras import regularizers
# from keras.regularizers import activity_regularizer #通过L1正则项,使得输出更加稀疏 activity_l1

sequence = Input(shape=(maxlen, word_size))
mask = Masking(mask_value=0.)(sequence)
blstm = Bidirectional(LSTM(64, return_sequences=True), merge_mode='sum')(mask)
blstm = Bidirectional(LSTM(32, return_sequences=True), merge_mode='sum')(blstm)
output = TimeDistributed(
    Dense(5, activation='softmax',
          activity_regularizer=regularizers.l1(0.01)))(blstm)
model = Model(input=sequence, output=output)
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
'''
gen_matrix实现从分词后的list来输出训练样本
gen_target实现将输出序列转换为one hot形式的目标
超过maxlen则截断,不足补0
'''
gen_matrix = lambda z: np.vstack(
Пример #14
0
    def create_model_hierarchy(cls,
                               bottom_item_list,
                               emb_wgts_bottom_items_dict,
                               layer_nums=3,
                               rnn_state_size=[],
                               bottom_emb_item_len=3,
                               flag_embedding_trainable=1,
                               seq_len=39,
                               batch_size=20,
                               mode_attention=1,
                               drop_out_r=0.,
                               att_layer_cnt=2,
                               bhDwellAtt=0,
                               rnn_type="WGRU",
                               RNN_norm="GRU",
                               flagCid3RNNNorm=False):
        c_mask_value = 0.
        att_zero_value = -2 ^ 31

        def slice(x):
            return x[:, -1, :]

        flag_concate_sku_cid = True
        RNN = rnn_type
        MODE_BHDWELLATT = True if bhDwellAtt == 1 else False

        ATT_NET_LAYER_CNT = att_layer_cnt
        bottom_item_len = len(bottom_item_list)

        input = [None] * bottom_item_len
        word_num = [None] * bottom_item_len
        emb_len = [None] * bottom_item_len
        embedding_bottom_item = [None] * bottom_item_len
        embed = [None] * bottom_item_len

        layer_nums_max = 3

        rnn_embed = [None] * layer_nums_max
        rnn = [None] * layer_nums_max
        rnn_output = [None] * layer_nums_max

        flag_embedding_trainable = True if flag_embedding_trainable == 1 else False

        ##Embedding layer
        # Embedding sku, bh, cid3, gap, dwell: 0, 1, 2, 3, 4

        for i in range(bottom_item_len):
            bottom_item = bottom_item_list[i]
            ###input
            input[i] = Input(batch_shape=(
                batch_size,
                seq_len,
            ),
                             dtype='int32')

            ###Embedding
            # load embedding weights
            # emb_wgts[i] = np.loadtxt(init_wgts_file_emb[i])
            word_num[i], emb_len[i] = emb_wgts_bottom_items_dict[
                bottom_item].shape
            print word_num[i], emb_len[i]
            # get embedding

            cur_flag_embedding_trainable = flag_embedding_trainable
            if (i == 0):
                cur_flag_embedding_trainable = False
            embedding_bottom_item[i] = Embedding(
                word_num[i],
                emb_len[i],
                input_length=seq_len,
                trainable=cur_flag_embedding_trainable)
            embed[i] = embedding_bottom_item[i](input[i])  # drop_out=0.2
            embedding_bottom_item[i].set_weights(
                [emb_wgts_bottom_items_dict[bottom_item]])

        # cal mask
        mask_sku = np.zeros((batch_size, seq_len))
        mask_cid3 = np.zeros((batch_size, seq_len))

        for j in range(batch_size):
            sku = input[0][j, :]
            cid3 = input[2][j, :]

            for k in range(seq_len - 1):
                if (sku[k] == 0 or sku[k] == sku[k + 1]):
                    mask_sku[j][k] = 1
                if (sku[k] == 0 or cid3[k] == cid3[k + 1]):
                    mask_cid3[j][k] = 1

        # f mask
        def f_mask_sku(x):
            x_new = x
            for j in range(batch_size):
                for k in range(seq_len):
                    if (mask_sku[j][k] == 1):
                        x_new = T.set_subtensor(x_new[j, k, :], c_mask_value)
            return x_new

        def f_mask_cid3(x):
            x_new = x
            for j in range(batch_size):
                for k in range(seq_len):
                    if (mask_cid3[j][k] == 1):
                        x_new = T.set_subtensor(x_new[j, k, :], c_mask_value)
            return x_new

        def f_mask_att_sku(x):
            x_new = x
            for j in range(batch_size):
                for k in range(seq_len):
                    if (mask_sku[j][k] == 1):
                        x_new = T.set_subtensor(x_new[j, k], att_zero_value)
            return x_new

        def f_mask_att_cid3(x):
            x_new = x
            for j in range(batch_size):
                for k in range(seq_len):
                    if (mask_cid3[j][k] == 1):
                        x_new = T.set_subtensor(x_new[j, k], att_zero_value)
            return x_new

        def K_dot(arr):
            axes = [1, 1]
            x, y = arr[0], arr[1]
            return K.batch_dot(x, y, axes=axes)

        def K_squeeze(x):
            return K.squeeze(x, axis=-1)

        Lambda_sequeeze = Lambda(lambda x: K_squeeze(x))
        ##RNN layer
        if (RNN == "BLSTM"):
            rnn[0] = BLSTM(rnn_state_size[0],
                           interval_dim=emb_len[3],
                           weight_dim=emb_len[1],
                           stateful=False,
                           return_sequences=True,
                           dropout=drop_out_r,
                           name="rnn_out_micro")
            rnn[1] = BLSTM(rnn_state_size[1],
                           interval_dim=emb_len[3],
                           weight_dim=emb_len[4],
                           stateful=False,
                           return_sequences=True,
                           dropout=drop_out_r,
                           name="rnn_out_sku")
            if (not flagCid3RNNNorm):
                rnn[2] = BLSTM(rnn_state_size[2],
                               interval_dim=emb_len[3],
                               weight_dim=0,
                               stateful=False,
                               return_sequences=True,
                               dropout=drop_out_r,
                               name="rnn_out_cid3")

        elif (RNN == "BLSTM2"):
            rnn[0] = BLSTM2(rnn_state_size[0],
                            interval_dim=emb_len[3],
                            weight_dim=emb_len[1],
                            stateful=False,
                            return_sequences=True,
                            dropout=drop_out_r,
                            name="rnn_out_micro")
            rnn[1] = BLSTM2(rnn_state_size[1],
                            interval_dim=emb_len[3],
                            weight_dim=emb_len[4],
                            stateful=False,
                            return_sequences=True,
                            dropout=drop_out_r,
                            name="rnn_out_sku")
            if (not flagCid3RNNNorm):
                rnn[2] = BLSTM2(rnn_state_size[2],
                                interval_dim=emb_len[3],
                                weight_dim=0,
                                stateful=False,
                                return_sequences=True,
                                dropout=drop_out_r,
                                name="rnn_out_cid3")

        elif (RNN == "TimeLSTM"):
            rnn[0] = BLSTM(rnn_state_size[0],
                           interval_dim=emb_len[3],
                           weight_dim=0,
                           stateful=False,
                           return_sequences=True,
                           dropout=drop_out_r,
                           name="rnn_out_micro")
            rnn[1] = BLSTM(rnn_state_size[1],
                           interval_dim=emb_len[3],
                           weight_dim=0,
                           stateful=False,
                           return_sequences=True,
                           dropout=drop_out_r,
                           name="rnn_out_sku")
            if (not flagCid3RNNNorm):
                rnn[2] = BLSTM(rnn_state_size[2],
                               interval_dim=emb_len[3],
                               weight_dim=0,
                               stateful=False,
                               return_sequences=True,
                               dropout=drop_out_r,
                               name="rnn_out_cid3")
        elif (RNN == "WGRU"):
            rnn[0] = WGRU(rnn_state_size[0],
                          weight_dim=emb_len[1],
                          stateful=False,
                          return_sequences=True,
                          dropout=drop_out_r,
                          name="rnn_out_micro")
            rnn[1] = WGRU(rnn_state_size[1],
                          weight_dim=emb_len[3],
                          stateful=False,
                          return_sequences=True,
                          dropout=drop_out_r,
                          name="rnn_out_sku")
            if (not flagCid3RNNNorm):
                rnn[2] = WGRU(rnn_state_size[2],
                              weight_dim=emb_len[3],
                              tateful=False,
                              return_sequences=True,
                              dropout=drop_out_r,
                              name="rnn_out_cid3")
        elif (RNN == "LSTM" or RNN == "GRU"):
            RNN = LSTM if RNN == "LSTM" else GRU
            rnn[0] = RNN(rnn_state_size[0],
                         stateful=False,
                         return_sequences=True,
                         dropout=drop_out_r,
                         name="rnn_out_micro")
            rnn[1] = RNN(rnn_state_size[1],
                         stateful=False,
                         return_sequences=True,
                         dropout=drop_out_r,
                         name="rnn_out_sku")
        else:
            print "%s is not valid RNN!" % RNN

        if (RNN_norm == "LSTM"):
            rnn_cid3 = LSTM
        else:
            rnn_cid3 = GRU
        if (flagCid3RNNNorm):
            rnn[2] = rnn_cid3(rnn_state_size[2],
                              stateful=False,
                              return_sequences=True,
                              dropout=drop_out_r,
                              name="rnn_out_cid3")

        #rnn embed 0
        if (bottom_emb_item_len == 5):
            rnn_embed[0] = Concatenate(axis=-1)(
                [embed[0], embed[1], embed[2], embed[3], embed[4]])
        elif (bottom_emb_item_len == 4):
            rnn_embed[0] = Concatenate(axis=-1)(
                [embed[0], embed[1], embed[2], embed[3]])
        elif (bottom_emb_item_len == 3):
            rnn_embed[0] = Concatenate(axis=-1)([embed[0], embed[1], embed[2]])
        elif (bottom_emb_item_len == 1):
            rnn_embed[0] = embed[0]
        elif (bottom_emb_item_len == 2):
            rnn_embed[0] = Concatenate(axis=-1)([embed[0], embed[2]])
        else:
            rnn_embed[0] = Concatenate(axis=-1)(
                [embed[0], embed[1], embed[2], embed[3]])

        #add interval, wei
        if (RNN == "WGRU"):
            rnn_embed[0] = Concatenate(axis=-1)([rnn_embed[0], embed[1]])

        if (RNN == "BLSTM" or RNN == "BLSTM2"):
            rnn_embed[0] = Concatenate(axis=-1)(
                [rnn_embed[0], embed[3], embed[1]])

        if (RNN == "TimeLSTM"):
            rnn_embed[0] = Concatenate(axis=-1)([rnn_embed[0], embed[3]])

        #rnn micro
        rnn_output[0] = rnn[0](rnn_embed[0])

        # rnn sku
        if (flag_concate_sku_cid):
            rnn_embed[1] = Concatenate(axis=-1)([embed[0], rnn_output[0]])
        else:
            rnn_embed[1] = rnn_output[0]

        # mask sku
        #rnn embed 1
        # rnn_embed[1] = Lambda(f_mask_sku, output_shape=(seq_len, rnn_state_size[1]))(rnn_embed[1])

        if (RNN == "WGRU"):
            rnn_embed[1] = Concatenate(axis=-1)([rnn_embed[1], embed[4]])

        if (RNN == "BLSTM" or RNN == "BLSTM2"):
            rnn_embed[1] = Concatenate(axis=-1)(
                [rnn_embed[1], embed[3], embed[4]])

        if (RNN == "TimeLSTM"):
            rnn_embed[1] = Concatenate(axis=-1)([rnn_embed[1], embed[3]])

        rnn_embed[1] = Lambda(f_mask_sku)(rnn_embed[1])
        rnn_embed[1] = Masking(mask_value=c_mask_value)(rnn_embed[1])

        rnn_output[1] = rnn[1](rnn_embed[1])

        # rnn cid3
        if (flag_concate_sku_cid):
            rnn_embed[2] = Concatenate()([embed[2], rnn_output[1]])
        else:
            rnn_embed[2] = rnn_output[1]

        if (not flagCid3RNNNorm):
            rnn_embed[2] = Concatenate(axis=-1)([rnn_embed[2], embed[3]])

        # mask cid3
        # rnn_embed[2] = Lambda(f_mask_cid3, output_shape=(seq_len, rnn_state_size[2]))(rnn_embed[2])
        rnn_embed[2] = Lambda(f_mask_cid3)(rnn_embed[2])
        rnn_embed[2] = Masking(mask_value=c_mask_value)(rnn_embed[2])
        rnn_output[2] = rnn[2](rnn_embed[2])

        # rnn final output
        rnn_out_final = rnn_output[layer_nums - 1]

        rnn_out_micro = rnn_output[0]
        rnn_out_sku = rnn_output[1]
        rnn_out_cid3 = rnn_output[2]

        # predict sku, cid3
        if (mode_attention == 0):
            # micro
            att_out_micro = Lambda(
                slice, output_shape=(rnn_state_size[0], ))(rnn_out_micro)
            # trans to sku emb len
            out_micro_sku_emb = Dense(emb_len[0],
                                      activation="tanh")(att_out_micro)
            out_micro = out_micro_sku_emb

            # sku
            att_out_sku = Lambda(
                slice, output_shape=(rnn_state_size[1], ))(rnn_out_sku)
            # trans to sku emb len
            out_sku_emb = Dense(emb_len[0], activation="tanh")(att_out_sku)
            out_sku = out_sku_emb

            # cid3
            att_out_cid3 = Lambda(
                slice, output_shape=(rnn_state_size[2], ))(rnn_out_cid3)
            out_cid3_emb = Dense(emb_len[2], activation="tanh")(att_out_cid3)
            out_cid3 = out_cid3_emb
            # out_cid3 = Dense(word_num[2], activation="softmax")(out_cid3_emb)

        if (mode_attention == 2):
            # atten micro
            m_h = rnn_out_micro
            m_h_last = Lambda(slice,
                              output_shape=(rnn_state_size[0], ),
                              name="rnn_out_micro_last")(m_h)
            m_h_r = RepeatVector(seq_len)(m_h_last)
            if (MODE_BHDWELLATT):
                m_h_c = Concatenate(axis=-1)([m_h, m_h_r, embed[1]])
            else:
                m_h_c = Concatenate(axis=-1)([m_h, m_h_r])
            if (ATT_NET_LAYER_CNT == 2):
                m_h_a_1 = TimeDistributed(
                    Dense(ATT_NET_HIDSIZE, activation='tanh'))(m_h_c)
                m_h_a = TimeDistributed(Dense(1, activation='tanh'))(m_h_a_1)
            else:
                m_h_a = TimeDistributed(Dense(1, activation='tanh'))(m_h_c)
            m_h_a = Lambda(lambda x: x, output_shape=lambda s: s)(m_h_a)
            m_att = Flatten()(m_h_a)

            m_att_micro = Softmax(name="att_micro")(m_att)
            m_att_out = Lambda(K_dot,
                               output_shape=(rnn_state_size[0], ),
                               name="out_micro_pre")([m_h, m_att_micro])

            # trans to sku emb len
            out_micro = Dense(emb_len[0], activation="tanh")(m_att_out)

            # attenion sku
            s_h = rnn_out_sku
            s_h_last = Lambda(slice,
                              output_shape=(rnn_state_size[1], ),
                              name="rnn_out_sku_last")(s_h)
            s_h_r = RepeatVector(seq_len)(s_h_last)
            if (MODE_BHDWELLATT):
                s_h_c = Concatenate(axis=-1)([s_h, s_h_r, embed[4]])
            else:
                s_h_c = Concatenate(axis=-1)([s_h, s_h_r])
            if (ATT_NET_LAYER_CNT == 2):
                s_h_a_1 = TimeDistributed(
                    Dense(ATT_NET_HIDSIZE, activation='tanh'))(s_h_c)
                s_h_a = TimeDistributed(Dense(1, activation='tanh'))(s_h_a_1)
            else:
                s_h_a = TimeDistributed(Dense(1, activation='tanh'))(s_h_c)
            s_h_a = Lambda(lambda x: x, output_shape=lambda s: s)(s_h_a)
            s_att = Flatten()(s_h_a)
            s_att = Lambda(f_mask_att_sku)(s_att)
            s_att_sku = Softmax(axis=-1, name="att_sku")(s_att)
            s_att_out = Lambda(K_dot,
                               output_shape=(rnn_state_size[1], ),
                               name="out_sku_pre")([s_h, s_att_sku])

            # attention cid3
            c_h = rnn_out_cid3
            c_h_last = Lambda(slice,
                              output_shape=(rnn_state_size[2], ),
                              name="rnn_out_cid3_last")(c_h)
            c_h_r = RepeatVector(seq_len)(c_h_last)
            c_h_c = Concatenate(axis=-1)([c_h, c_h_r])
            if (ATT_NET_LAYER_CNT == 2):
                c_h_a_1 = TimeDistributed(
                    Dense(ATT_NET_HIDSIZE, activation='tanh'))(c_h_c)
                c_h_a = TimeDistributed(Dense(1, activation='tanh'))(c_h_a_1)
            else:
                c_h_a = TimeDistributed(Dense(1, activation='tanh'))(c_h_c)
            c_h_a = Lambda(lambda x: x, output_shape=lambda s: s)(c_h_a)
            c_att = Flatten()(c_h_a)
            c_att = Lambda(f_mask_att_cid3)(c_att)
            c_att_cid3 = Softmax(axis=-1, name="att_cid3")(c_att)
            c_att_out = Lambda(K_dot,
                               output_shape=(rnn_state_size[2], ),
                               name="out_cid3_pre")([c_h, c_att_cid3])

            out_cid3 = Dense(emb_len[2], activation="tanh")(c_att_out)
            out_sku = Dense(emb_len[0], activation="tanh")(s_att_out)

        # model
        model = Model(
            inputs=[input[0], input[1], input[2], input[3], input[4]],
            outputs=[out_micro, out_sku, out_cid3])

        # return embedding, rnn, ret_with_target, input, out
        return model
Пример #15
0
    for j, char in enumerate(x_sentence):
        x_encoder[i, j, char_indices[char]] = 1  # encoderへの入力をone-hot表現で表す
    for j, char in enumerate(t_sentence):
        x_decoder[i, j, char_indices[char]] = 1  # decoderへの入力をone-hot表現で表す
        if j > 0:  # 正解は入力より1つ前の時刻のものにする
            t_decoder[i, j - 1, char_indices[char]] = 1

#print(x_encoder.shape)

#Model構築
batch_size = 32
epochs = 3000
n_mid = 256  # 中間層のニューロン数

encoder_input = Input(shape=(None, n_char))
encoder_mask = Masking(mask_value=0)  # 全ての要素が0であるベクトルの入力は無視する
encoder_masked = encoder_mask(encoder_input)
encoder_lstm = GRU(n_mid,
                   dropout=0.2,
                   recurrent_dropout=0.2,
                   return_state=True)  # dropoutを設定し、ニューロンをランダムに無効にする
encoder_output, encoder_state_h = encoder_lstm(encoder_masked)

decoder_input = Input(shape=(None, n_char))
decoder_mask = Masking(mask_value=0)  # 全ての要素が0であるベクトルの入力は無視する
decoder_masked = decoder_mask(decoder_input)
decoder_lstm = GRU(n_mid,
                   dropout=0.2,
                   recurrent_dropout=0.2,
                   return_sequences=True,
                   return_state=True)  # dropoutを設定
Пример #16
0
def bgru(x_train,
         x_val,
         x_test,
         y_train,
         y_val,
         y_test,
         out_dir,
         name='bgru_model',
         hidden_units=10,
         layers=1,
         max_epochs=1000,
         batch_size=32,
         patience=20,
         dropout=0.0,
         recurrent_dropout=0.0):
    """
    Bidirectional GRU model for protein secondary structure prediction.
    """
    num_samples = x_train.shape[0]
    max_seq_len = x_train.shape[1]
    num_features = x_train.shape[2]
    num_classes = y_train.shape[2]

    # Build Keras model
    model = Sequential()
    model.add(Masking(mask_value=0, input_shape=(max_seq_len, num_features)))
    model.add(
        Bidirectional(
            GRU(hidden_units,
                return_sequences=True,
                input_shape=(max_seq_len, num_features),
                dropout=dropout,
                recurrent_dropout=recurrent_dropout)))
    if layers > 1:
        for _ in range(layers - 1):
            model.add(
                Bidirectional(
                    GRU(hidden_units,
                        return_sequences=True,
                        dropout=dropout,
                        recurrent_dropout=recurrent_dropout)))
    model.add(TimeDistributed(Dense(num_classes)))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    print(model.summary())

    # Train model. Use early-stopping on validation data to determine when to stop training.
    model_path = os.path.join(out_dir, name + '.h5')
    checkpointer = ModelCheckpoint(model_path, save_best_only=True)
    model.fit(x_train,
              y_train,
              epochs=max_epochs,
              batch_size=batch_size,
              verbose=1,
              validation_data=(x_val, y_val),
              callbacks=[EarlyStopping(patience=patience), checkpointer])

    model = load_model(
        model_path
    )  # Best model is not necessarily current model instance b/c patience != 0
    y_train_pred = model.predict(x_train)
    print('Train accuracy: {:.2f}%'.format(
        calculate_accuracy(y_train, y_train_pred) * 100.0))
    # Test set accuracy
    y_test_pred = []
    for i in range(3):
        y_test_pred.append(model.predict(x_test[i]))
        print('Test accuracy: {:.2f}%'.format(
            calculate_accuracy(y_test[i], y_test_pred[i]) * 100.0))

    return model
Пример #17
0
#LOAD DATA

train_data_padded = np.load("/storage/hpc_lkpiel/data/fbank_train_data_padded.npy", encoding="bytes")
val_data_padded = np.load("/storage/hpc_lkpiel/data/fbank_val_data_padded.npy", encoding="bytes")
test_data_padded = np.load("/storage/hpc_lkpiel/data/fbank_test_data_padded.npy", encoding="bytes")
print ("DATA LOADED")

################################################################################################

reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.7,
                              patience=2, min_lr=0.0001, verbose=1)


model_14 = Sequential([
    Masking(mask_value=0., input_shape=(1107,20)),
    Bidirectional(LSTM(64, return_sequences=True)),
    Bidirectional(LSTM(64, return_sequences=True)),
    AttentionWithContext(),
    Dense(3, activation='softmax')
])

print ("model_14 BUILT")

model_14.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print ("model_14 COMPILED")


checkpoint = ModelCheckpoint(filepath='/models/model_14.hdf5', monitor='val_loss', save_best_only=True)

history = model_14.fit(x=train_data_padded,
Пример #18
0
from keras.optimizers import Adam

max_features = 20000
maxlen = 10  # cut texts after this number of words (among top max_features most common words)
a = [[[1, 1], [2, 2], [3, 3], [4, 4]], [[1, 1], [2, 2]]]
label = [[0, 1], [1, 0]]

print('Pad sequences (samples x time)')
x_train = sequence.pad_sequences(a, maxlen=maxlen)
print(x_train)

print('x_train shape:', x_train.shape)

print('Build model...')
frame_input = Input(shape=(10, 2))
mask_frame_input = Masking(mask_value=0.)(frame_input)
frame_l1 = Bidirectional(
    LSTM(16,
         return_sequences=True,
         recurrent_dropout=0.25,
         name='LSTM_audio_1'))(mask_frame_input)
frame_l2 = Bidirectional(LSTM(16, recurrent_dropout=0.25,
                              name='LSTM_audio_2'))(frame_l1)
dropout_word = Dropout(0.5)(frame_l2)

audio_prediction = Dense(2, activation='softmax')(dropout_word)
audio_model = Model(inputs=frame_input, outputs=audio_prediction)
inter_audio = Model(inputs=frame_input, outputs=frame_l1)

adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
audio_model.compile(loss='binary_crossentropy',
Пример #19
0
# # no embedding
# model = Sequential()
# #model.add(InputLayer(input_shape=(max_sequence_len,len(idx),)))
# #model.add(Embedding(len(idx),16))
# model.add(Masking(input_shape=(max_sequence_len,len(idx))))
# #model.add(Dropout(0.5))
# model.add(Bidirectional(LSTM(12, input_shape=(max_sequence_len,len(idx)),recurrent_dropout=0.0,dropout=0.0,return_sequences=True)))
# model.add(Dropout(0.5))
# model.add(TimeDistributed(Dense(len(idx_label),activation='softmax')))
# model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

# embedded tokens
tokens_input = Input(shape=(max_sequence_len,))
features_input = Input(shape=(max_sequence_len, len(idx)))
tokens_masked = Masking(0) (tokens_input)
features_masked = Masking(0) (features_input)
tokens_embed = Embedding(int((2/3)*len(idx_t)), 10, input_length=max_sequence_len) (tokens_masked)
tokens_embed = Dropout(0.5) (tokens_embed)
features_merged = Concatenate(axis=-1) ([features_masked, tokens_embed])

# features_input = Input(shape=(max_sequence_len, len(idx)))
# chars_input = Input(shape=(max_sequence_len, max_carray_len))
# chars_masked = Masking(0) (chars_input)
# features_masked = Masking(0) (features_input)
# chars_embed = Bidirectional(LSTM(5, return_sequences=True)) (chars_masked)
# # chars_embed = Dropout(0.5) (chars_embed)
# features_merged = Concatenate(axis=-1) ([features_masked, chars_embed])

h = Bidirectional(LSTM(12, return_sequences=True)) (features_merged)
# h = Dropout(0.5) (h)
Пример #20
0
    def model_architecture(self, input_shape, output_shape):
        """Build a Keras model and return a compiled model."""
        from keras.layers import LSTM, Activation, Masking, Dense
        from keras.models import Sequential

        from keras.models import Sequential
        from keras.layers import \
            Masking, LSTM, Dense, TimeDistributed, Activation

        # Build Model
        model = Sequential()

        # the shape of the y vector of the labels,
        # determines which output from rnn will be used
        # to calculate the loss
        if len(output_shape) == 1:
            # y is (num examples, num features) so
            # only the last output from the rnn is used to
            # calculate the loss
            model.add(Masking(mask_value=-1, input_shape=input_shape))
            model.add(LSTM(self.rnn_size))
            model.add(Dense(input_dim=self.rnn_size, units=output_shape[-1]))
        elif len(output_shape) == 2:
            # y is (num examples, max_dialogue_len, num features) so
            # all the outputs from the rnn are used to
            # calculate the loss, therefore a sequence is returned and
            # time distributed layer is used

            # the first value in input_shape is max dialogue_len,
            # it is set to None, to allow dynamic_rnn creation
            # during prediction
            model.add(
                Masking(mask_value=-1, input_shape=(None, input_shape[1])))
            model.add(LSTM(self.rnn_size, return_sequences=True))
            model.add(TimeDistributed(Dense(units=output_shape[-1])))
        else:
            raise ValueError("Cannot construct the model because"
                             "length of output_shape = {} "
                             "should be 1 or 2."
                             "".format(len(output_shape)))

        model.add(Activation('softmax'))
        #model.add(Activation('relu'))

        # model.compile(loss='categorical_crossentropy',
        #               optimizer='adam',
        #               metrics=['accuracy'])
        model.compile(loss='categorical_crossentropy',
                      optimizer='SGD',
                      metrics=['accuracy'])

        logger.debug(model.summary())
        return model


# class nuRobotPolicy(KerasPolicy):
#     def model_architecture(self, num_features, num_actions, max_history_len):
#         """Build a Keras model and return a compiled model."""
#         from keras.layers import LSTM, Activation, Masking, Dense
#         from keras.models import Sequential

#         n_hidden = 32  # size of hidden layer in LSTM
#         # Build Model
#         batch_shape = (None, max_history_len, num_features)

#         model = Sequential()
#         model.add(Masking(-1, batch_input_shape=batch_shape))
#         model.add(LSTM(n_hidden, batch_input_shape=batch_shape))
#         model.add(Dense(input_dim=n_hidden, output_dim=num_actions))
#         model.add(Activation('softmax'))

#         model.compile(loss='categorical_crossentropy',
#                       optimizer='adam',
#                       metrics=['accuracy'])

#         logger.debug(model.summary())
#         return model
Пример #21
0
trainY = Y[:int((1-valSplit)*len(audioMfccChunksPadded))]
valY = Y[int((1-valSplit)*len(audioMfccChunksPadded)):len(audioMfccChunksPadded)]


##############################################################################
# MODEL
##############################################################################

depth = 2

# Input

myInput = Input(shape=(maxInputLen, inputDim,))

# Masking
LSTMinput = Masking(mask_value=0.)(myInput)

# If depth > 1
if depth > 1:
    # First layer
    encoded = LSTM(hiddenDim, activation=LSTMactiv,
                   return_sequences=True)(LSTMinput)
    for d in range(depth - 2):
        encoded = LSTM(hiddenDim, activation=LSTMactiv,
                       return_sequences=True)(encoded)
    # Last layer
    encoded = LSTM(hiddenDim, activation=LSTMactiv)(encoded)
# If depth = 1
else:
    encoded = LSTM(hiddenDim, activation=LSTMactiv)(LSTMinput)
Пример #22
0
	def get_text_model(self):

		# Modality specific hyperparameters
		self.epochs = 100
		self.batch_size = 50

		# Modality specific parameters
		self.embedding_dim = self.data.W.shape[1]

		# For text model
		self.vocabulary_size = self.data.W.shape[0]
		self.filter_sizes = [3,4,5]
		self.num_filters = 512


		print("Creating Model...")

		sentence_length = self.train_x.shape[2]

		# Initializing sentence representation layers
		embedding = Embedding(input_dim=self.vocabulary_size, output_dim=self.embedding_dim, weights=[self.data.W], input_length=sentence_length, trainable=False)
		conv_0 = Conv2D(self.num_filters, kernel_size=(self.filter_sizes[0], self.embedding_dim), padding='valid', kernel_initializer='normal', activation='relu')
		conv_1 = Conv2D(self.num_filters, kernel_size=(self.filter_sizes[1], self.embedding_dim), padding='valid', kernel_initializer='normal', activation='relu')
		conv_2 = Conv2D(self.num_filters, kernel_size=(self.filter_sizes[2], self.embedding_dim), padding='valid', kernel_initializer='normal', activation='relu')
		maxpool_0 = MaxPool2D(pool_size=(sentence_length - self.filter_sizes[0] + 1, 1), strides=(1,1), padding='valid')
		maxpool_1 = MaxPool2D(pool_size=(sentence_length - self.filter_sizes[1] + 1, 1), strides=(1,1), padding='valid')
		maxpool_2 = MaxPool2D(pool_size=(sentence_length - self.filter_sizes[2] + 1, 1), strides=(1,1), padding='valid')
		dense_func = Dense(100, activation='tanh', name="dense")
		dense_final = Dense(units=self.classes, activation='softmax')
		reshape_func = Reshape((sentence_length, self.embedding_dim, 1))

		def slicer(x, index):
			return x[:,K.constant(index, dtype='int32'),:]

		def slicer_output_shape(input_shape):
		    shape = list(input_shape)
		    assert len(shape) == 3  # batch, seq_len, sent_len
		    new_shape = (shape[0], shape[2])
		    return new_shape

		def reshaper(x):
			return K.expand_dims(x, axis=3)

		def flattener(x):
			x = K.reshape(x, [-1, x.shape[1]*x.shape[3]])
			return x

		def flattener_output_shape(input_shape):
		    shape = list(input_shape)
		    new_shape = (shape[0], 3*shape[3])
		    return new_shape

		inputs = Input(shape=(self.sequence_length, sentence_length), dtype='int32')
		cnn_output = []
		for ind in range(self.sequence_length):
			
			local_input = Lambda(slicer, output_shape=slicer_output_shape, arguments={"index":ind})(inputs) # Batch, word_indices
			
			#cnn-sent
			emb_output = embedding(local_input)
			reshape = Lambda(reshaper)(emb_output)
			concatenated_tensor = Concatenate(axis=1)([maxpool_0(conv_0(reshape)), maxpool_1(conv_1(reshape)), maxpool_2(conv_2(reshape))])
			flatten = Lambda(flattener, output_shape=flattener_output_shape,)(concatenated_tensor)
			dense_output = dense_func(flatten)
			dropout = Dropout(0.5)(dense_output)
			cnn_output.append(dropout)

		def stack(x):
			return K.stack(x, axis=1)
		cnn_outputs = Lambda(stack)(cnn_output)

		masked = Masking(mask_value =0)(cnn_outputs)
		lstm = Bidirectional(LSTM(300, activation='relu', return_sequences = True, dropout=0.3))(masked)
		lstm = Bidirectional(LSTM(300, activation='relu', return_sequences = True, dropout=0.3), name="utter")(lstm)
		output = TimeDistributed(Dense(self.classes,activation='softmax'))(lstm)

		model = Model(inputs, output)
		return model
Пример #23
0
    def buildKerasModel(self, use_sourcelang=False, use_image=True):
        '''
        Define the exact structure of your model here. We create an image
        description generation model by merging the VGG image features with
        a word embedding model, with an LSTM over the sequences.

        The order in which these appear below (text, image) is _IMMUTABLE_.
        (Needs to match up with input to model.fit.)
        '''
        logger.info('Building Keras model...')
        logger.info('Using image features: %s', use_image)
        logger.info('Using source language features: %s', use_sourcelang)

        model = Graph()
        model.add_input('text', input_shape=(self.max_t, self.vocab_size))
        model.add_node(Masking(mask_value=0.), input='text', name='text_mask')

        # Word embeddings
        model.add_node(TimeDistributedDense(output_dim=self.embed_size,
                                            input_dim=self.vocab_size,
                                            W_regularizer=l2(self.l2reg)),
                       name="w_embed",
                       input='text_mask')
        model.add_node(Dropout(self.dropin),
                       name="w_embed_drop",
                       input="w_embed")

        # Embed -> Hidden
        model.add_node(TimeDistributedDense(output_dim=self.hidden_size,
                                            input_dim=self.embed_size,
                                            W_regularizer=l2(self.l2reg)),
                       name='embed_to_hidden',
                       input='w_embed_drop')
        recurrent_inputs = 'embed_to_hidden'

        # Source language input
        if use_sourcelang:
            model.add_input('source', input_shape=(self.max_t, self.hsn_size))
            model.add_node(Masking(mask_value=0.),
                           input='source',
                           name='source_mask')

            model.add_node(TimeDistributedDense(output_dim=self.hidden_size,
                                                input_dim=self.hsn_size,
                                                W_regularizer=l2(self.l2reg)),
                           name="s_embed",
                           input="source_mask")
            model.add_node(Dropout(self.dropin),
                           name="s_embed_drop",
                           input="s_embed")
            recurrent_inputs = ['embed_to_hidden', 's_embed_drop']

        # Recurrent layer
        if self.gru:
            model.add_node(GRU(output_dim=self.hidden_size,
                               input_dim=self.hidden_size,
                               return_sequences=True),
                           name='rnn',
                           input=recurrent_inputs)

        else:
            model.add_node(LSTM(output_dim=self.hidden_size,
                                input_dim=self.hidden_size,
                                return_sequences=True),
                           name='rnn',
                           input=recurrent_inputs)

        # Image 'embedding'
        model.add_input('img', input_shape=(self.max_t, 4096))
        model.add_node(Masking(mask_value=0.), input='img', name='img_mask')

        model.add_node(TimeDistributedDense(output_dim=self.hidden_size,
                                            input_dim=4096,
                                            W_regularizer=l2(self.l2reg)),
                       name='i_embed',
                       input='img_mask')
        model.add_node(Dropout(self.dropin),
                       name='i_embed_drop',
                       input='i_embed')

        # Multimodal layer outside the recurrent layer
        model.add_node(TimeDistributedDense(output_dim=self.hidden_size,
                                            input_dim=self.hidden_size,
                                            W_regularizer=l2(self.l2reg)),
                       name='m_layer',
                       inputs=['rnn', 'i_embed_drop', 'embed_to_hidden'],
                       merge_mode='sum')

        model.add_node(TimeDistributedDense(output_dim=self.vocab_size,
                                            input_dim=self.hidden_size,
                                            W_regularizer=l2(self.l2reg),
                                            activation='softmax'),
                       name='output',
                       input='m_layer',
                       create_output=True)

        if self.optimiser == 'adam':
            # allow user-defined hyper-parameters for ADAM because it is
            # our preferred optimiser
            optimiser = Adam(lr=self.lr,
                             beta_1=self.beta1,
                             beta_2=self.beta2,
                             epsilon=self.epsilon,
                             clipnorm=self.clipnorm)
            model.compile(optimiser, {'output': 'categorical_crossentropy'})
        else:
            model.compile(self.optimiser,
                          {'output': 'categorical_crossentropy'})

        if self.weights is not None:
            logger.info("... with weights defined in %s", self.weights)
            # Initialise the weights of the model
            shutil.copyfile("%s/weights.hdf5" % self.weights,
                            "%s/weights.hdf5.bak" % self.weights)
            model.load_weights("%s/weights.hdf5" % self.weights)

        #plot(model, to_file="model.png")

        return model
Пример #24
0
def my_timedistributed_cnn_model(input_shape,
                                 conv_f_1,
                                 conv_f_2,
                                 conv_f_3,
                                 cnn_dense_fc_1,
                                 masking=False):

    model = Sequential()

    if masking:
        model.add(Masking(mask_value=0.0, input_shape=input_shape))
        model.add(
            TimeDistributed(
                Conv2D(filters=conv_f_1,
                       kernel_size=(3, 3),
                       padding='same',
                       kernel_regularizer=l2(1.e-4))))
    else:
        model.add(
            TimeDistributed(Conv2D(filters=conv_f_1,
                                   kernel_size=(3, 3),
                                   padding='same',
                                   kernel_regularizer=l2(1.e-4)),
                            input_shape=input_shape))

    model.add(TimeDistributed(BatchNormalization()))
    model.add(TimeDistributed(Activation('relu')))
    model.add(
        TimeDistributed(
            MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same')))

    model.add(
        TimeDistributed(
            Conv2D(filters=conv_f_2,
                   kernel_size=(3, 3),
                   padding='same',
                   activation='relu',
                   kernel_regularizer=l2(1.e-4))))
    model.add(TimeDistributed(BatchNormalization()))
    model.add(TimeDistributed(Activation('relu')))
    model.add(
        TimeDistributed(
            MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same')))

    model.add(
        TimeDistributed(
            Conv2D(filters=conv_f_3,
                   kernel_size=(3, 3),
                   padding='same',
                   activation='relu',
                   kernel_regularizer=l2(1.e-4))))
    model.add(TimeDistributed(BatchNormalization()))
    model.add(TimeDistributed(Activation('relu')))
    model.add(
        TimeDistributed(
            MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same')))

    model.add(TimeDistributed(Flatten()))
    model.add(
        TimeDistributed(Dense(cnn_dense_fc_1, kernel_regularizer=l2(1.e-4))))

    return model
Пример #25
0
    def __init__(self, dim, batch_norm, dropout, rec_dropout, task,
                 target_repl=False, deep_supervision=False, num_classes=1,
                 depth=1, input_dim=76, **kwargs):

        print("==> not used params in network class:", kwargs.keys())

        self.dim = dim
        self.batch_norm = batch_norm
        self.dropout = dropout
        self.rec_dropout = rec_dropout
        self.depth = depth

        if task in ['decomp', 'ihm', 'ph']:
            final_activation = 'sigmoid'
        elif task in ['los']:
            if num_classes == 1:
                final_activation = 'relu'
            else:
                final_activation = 'softmax'
        else:
            raise ValueError("Wrong value for task")

        # Input layers and masking
        X = Input(shape=(None, input_dim), name='X')

        print("X-> " , X)

        inputs = [X]
        print("input->", inputs)
        mX = Masking()(X)

        if deep_supervision:
            M = Input(shape=(None,), name='M')
            inputs.append(M)

        # Configurations
        is_bidirectional = False
        if deep_supervision:
            is_bidirectional = False

        # Main part of the network
        '''
        for i in range(depth - 1):
            num_units = dim
            if is_bidirectional:
                num_units = num_units // 2

            lstm = LSTM(units=num_units,
                        activation='tanh',
                        return_sequences=True,
                        recurrent_dropout=rec_dropout,
                        dropout=dropout)

            if is_bidirectional:
                mX = Bidirectional(lstm)(mX)
            else:
                mX = lstm(mX)
        '''

        # Output module of the network
        return_sequences = (target_repl or deep_supervision)
        '''
        L = LSTM(units=dim,
                 activation='tanh',
                 return_sequences=return_sequences,
                 dropout=dropout,
                 recurrent_dropout=rec_dropout)(mX)
        '''

        L = SeqSelfAttention(
                attention_width=48,
                attention_type=SeqSelfAttention.ATTENTION_TYPE_MUL,
                attention_activation='sigmoid',
                kernel_regularizer=keras.regularizers.l2(1e-6),
                use_attention_bias=False,
                name='Attention',
            )(mX)

        
        L = GRU(units=dim,
                 activation='tanh',
                 return_sequences=return_sequences,
                 dropout=dropout,
                 recurrent_dropout=rec_dropout)(L)
        

        if dropout > 0:
            L = Dropout(dropout)(L)

        if target_repl:
            y = TimeDistributed(Dense(num_classes, activation=final_activation),
                                name='seq')(L)
            y_last = LastTimestep(name='single')(y)
            outputs = [y_last, y]
        elif deep_supervision:
            y = TimeDistributed(Dense(num_classes, activation=final_activation))(L)
            y = ExtendMask()([y, M])  # this way we extend mask of y to M
            outputs = [y]
        else:
            y = Dense(num_classes, activation=final_activation)(L)
            outputs = [y]

        super(Network, self).__init__(inputs=inputs, outputs=outputs)
print('train shape:', train_samples)
print('dev shape:', dev_samples)
print()

word_embedding = pd.read_csv('../preprocessing/senna/embeddings.txt', delimiter=' ', header=None)
word_embedding = word_embedding.values
word_embedding = np.concatenate([np.zeros((1,emb_length)),word_embedding, np.random.uniform(-1,1,(1,emb_length))])

embed_index_input = Input(shape=(step_length,))
embedding = Embedding(emb_vocab+2, emb_length, weights=[word_embedding], mask_zero=True, input_length=step_length)(embed_index_input)

pos_input = Input(shape=(step_length, pos_length))
chunk_input = Input(shape=(step_length, chunk_length))

senna_pos_chunk_merge = merge([embedding, pos_input, chunk_input], mode='concat')
input_mask = Masking(mask_value=0)(senna_pos_chunk_merge)
dp_1 = Dropout(0.5)(input_mask)
hidden_1 = Bidirectional(LSTM(64, return_sequences=True))(dp_1)
hidden_2 = Bidirectional(LSTM(32, return_sequences=True))(hidden_1)
dp_2 = Dropout(0.5)(hidden_2)
output = TimeDistributed(Dense(output_length, activation='softmax'))(dp_2)
model = Model(input=[embed_index_input,pos_input,chunk_input], output=output)

model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

print(model.summary())


number_of_train_batches = int(math.ceil(float(train_samples)/batch_size))
    def __init__(self,
                 dim,
                 batch_norm,
                 dropout,
                 rec_dropout,
                 task,
                 target_repl=False,
                 deep_supervision=False,
                 num_classes=1,
                 depth=1,
                 input_dim=376,
                 **kwargs):

        print("==> not used params in network class:", kwargs.keys())

        self.output_dim = dim
        self.batch_norm = batch_norm
        self.dropout = dropout
        self.rec_dropout = rec_dropout
        self.depth = depth

        if task in ['decomp', 'ihm', 'ph']:
            final_activation = 'sigmoid'
        elif task in ['los']:
            if num_classes == 1:
                final_activation = 'relu'
            else:
                final_activation = 'softmax'
        else:
            return ValueError("Wrong value for task")

        # Input layers and masking
        X = Input(shape=(None, input_dim), name='X')
        inputs = [X]
        mX = Masking()(X)

        if deep_supervision:
            M = Input(shape=(None, ), name='M')
            inputs.append(M)

        # Configurations
        is_bidirectional = True
        if deep_supervision:
            is_bidirectional = False

        # Main part of the network
        for i in range(depth - 1):
            #num_units = 48

            num_units = dim
            if is_bidirectional:
                num_units = num_units // 2

            lstm = LSTM(num_units,
                        activation='tanh',
                        return_sequences=True,
                        dropout_U=rec_dropout,
                        dropout_W=dropout)

            if is_bidirectional:
                mX = Bidirectional(lstm)(mX)
            else:
                mX = lstm(mX)

        # Output module of the network
        return_sequences = (target_repl or deep_supervision)
        '''
        L = LSTM(units=dim,
                 activation='tanh',
                 return_sequences=return_sequences,
                 dropout=dropout,
                 recurrent_dropout=rec_dropout)(mX)
        '''
        L = LSTM(dim,
                 activation='tanh',
                 return_sequences=return_sequences,
                 dropout_W=dropout,
                 dropout_U=rec_dropout)(mX)

        if dropout > 0:
            L = Dropout(dropout)(L)

        y = Dense(num_classes, activation=final_activation)(L)
        outputs = [y]

        return super(Network, self).__init__(inputs, outputs)
Пример #28
0
def get_model(args):
    # Dataset config
    assert args.dataset.lower() == 'avletters'
    config = data_constants['avletters']
    inputCNNshape = config['lstm_inputCNNshape']
    inputMLPshape = config['lstm_inputMLPshape']
    nb_classes = config['nb_classes']

    # Build the CNN - pre-cross-connections
    inputCNN = Input(shape=inputCNNshape)
    inputNorm = TimeDistributed(Flatten())(inputCNN)
    inputNorm = Masking(mask_value=0.)(inputNorm)
    inputNorm = TimeDistributed(Reshape((80, 60, 1)))(inputNorm)
    inputNorm = BatchNormalization(axis=1)(inputNorm)

    conv = TimeDistributed(Convolution2D(8,
                                         3,
                                         3,
                                         border_mode='same',
                                         activation='relu'),
                           name='conv11')(inputNorm)
    pool = TimeDistributed(MaxPooling2D((2, 2), strides=(2, 2)),
                           name='maxpool1')(conv)

    # Build the MLP - pre-cross-connections
    inputMLP = Input(shape=inputMLPshape)
    inputMasked = Masking(mask_value=0., input_shape=inputMLPshape)(inputMLP)

    fcMLP = TimeDistributed(Dense(32, activation='relu'),
                            name='fc1')(inputMasked)

    # Add the 1st round of cross-connections - CNN to MLP
    x21 = TimeDistributed(Convolution2D(8, 1, 1, border_mode='same'))(pool)
    x21 = TimeDistributed(PReLU())(x21)
    x21 = TimeDistributed(Flatten())(x21)
    x21 = TimeDistributed(Dense(32))(x21)
    x21 = TimeDistributed(PReLU())(x21)

    # Add 1st shortcut (residual connection) from CNN input to MLP
    short1_2dto1d = TimeDistributed(MaxPooling2D((4, 4),
                                                 strides=(4, 4)))(inputNorm)
    short1_2dto1d = TimeDistributed(Flatten())(short1_2dto1d)
    short1_2dto1d = TimeDistributed(Dense(32))(short1_2dto1d)
    short1_2dto1d = TimeDistributed(PReLU())(short1_2dto1d)

    # Cross-connections - MLP to CNN
    x12 = TimeDistributed(Dense(25 * 15))(fcMLP)
    x12 = TimeDistributed(PReLU())(x12)
    x12 = TimeDistributed(Reshape((25, 15, 1)))(x12)
    x12 = TimeDistributed(Conv2DTranspose(8, (16, 16), padding='valid'))(x12)
    x12 = TimeDistributed(PReLU())(x12)

    # 1st shortcut (residual connection) from MLP input to CNN
    short1_1dto2d = TimeDistributed(Dense(25 * 15))(inputMasked)
    short1_1dto2d = TimeDistributed(PReLU())(short1_1dto2d)
    short1_1dto2d = TimeDistributed(Reshape((25, 15, 1)))(short1_1dto2d)
    short1_1dto2d = TimeDistributed(
        Conv2DTranspose(8, (16, 16), padding='valid'))(short1_1dto2d)
    short1_1dto2d = TimeDistributed(PReLU())(short1_1dto2d)

    # CNN - post-cross-connections 1
    pool = add([pool, short1_1dto2d])
    merged = concatenate([pool, x12])

    conv = TimeDistributed(Convolution2D(16,
                                         3,
                                         3,
                                         border_mode='same',
                                         activation='relu'),
                           name='conv21')(merged)
    pool = TimeDistributed(MaxPooling2D((2, 2), strides=(2, 2)),
                           name='maxpool2')(conv)

    # MLP - post-cross-connections 1
    fcMLP = add([fcMLP, short1_2dto1d])
    fcMLP = concatenate([fcMLP, x21])

    fcMLP = TimeDistributed(Dense(32, activation='relu'), name='fc2')(fcMLP)

    # Add the 2nd round of cross-connections - CNN to MLP
    x21 = TimeDistributed(Convolution2D(16, 1, 1, border_mode='same'))(pool)
    x21 = TimeDistributed(PReLU())(x21)
    x21 = TimeDistributed(Flatten())(x21)
    x21 = TimeDistributed(Dense(64))(x21)
    x21 = TimeDistributed(PReLU())(x21)

    # Add 2nd shortcut (residual connection) from CNN input to MLP
    short2_2dto1d = TimeDistributed(MaxPooling2D((8, 8),
                                                 strides=(8, 4)))(inputNorm)
    short2_2dto1d = TimeDistributed(Flatten())(short2_2dto1d)
    short2_2dto1d = TimeDistributed(Dense(32))(short2_2dto1d)
    short2_2dto1d = TimeDistributed(PReLU())(short2_2dto1d)

    # Cross-connections - MLP to CNN
    x12 = TimeDistributed(Dense(13 * 8))(fcMLP)
    x12 = TimeDistributed(PReLU())(x12)
    x12 = TimeDistributed(Reshape((13, 8, 1)))(x12)
    x12 = TimeDistributed(Conv2DTranspose(16, (8, 8), padding='valid'))(x12)
    x12 = TimeDistributed(PReLU())(x12)

    # 2nd shortcut (residual connection) from MLP input to CNN
    short2_1dto2d = TimeDistributed(Dense(13 * 8))(inputMasked)
    short2_1dto2d = TimeDistributed(PReLU())(short2_1dto2d)
    short2_1dto2d = TimeDistributed(Reshape((13, 8, 1)))(short2_1dto2d)
    short2_1dto2d = TimeDistributed(
        Conv2DTranspose(16, (8, 8), padding='valid'))(short2_1dto2d)
    short2_1dto2d = TimeDistributed(PReLU())(short2_1dto2d)

    # CNN - post-cross-connections 2
    pool = add([pool, short2_1dto2d])
    merged = concatenate([pool, x12])

    reshape = TimeDistributed(Flatten(), name='flatten1')(merged)
    fcCNN = TimeDistributed(Dense(64, activation='relu'),
                            name='fcCNN')(reshape)

    # Merge the models
    fcMLP = add([fcMLP, short2_2dto1d])
    merged = concatenate([fcCNN, fcMLP, x21])
    merged = BatchNormalization(axis=1, name='mergebn')(merged)
    merged = Dropout(0.5, name='mergedrop')(merged)

    lstm = LSTM(64)(merged)
    out = Dense(nb_classes, activation='softmax')(lstm)

    # Return the model object
    model = Model(input=[inputCNN, inputMLP], output=out)
    return model
Пример #29
0
predictfrom_ind, predictfrom_resp = predictfrom[:, :, :-1], predictfrom[:, :,
                                                                        -1]

print("Shape of the array used for prediction:", predictfrom_ind.shape)

# The shape IS THE SAME as seasonML1!

#%% Design and train the LSTM model:
# Design LSTM neural network

# Define the network using the Sequential Keras API
model = Sequential()

# Inform algorithm that -1 represents non-values
model.add(
    Masking(mask_value=-1,
            input_shape=(train_ind.shape[1], train_ind.shape[2])))

# Define as LSTM with 9 neurons - not optimized - use 9 because I have 9 statistical categories
model.add(LSTM(9))

# I'm not even sure why I need this part, but it doesn't work without it...
model.add(Dense(train_ind.shape[1]))

# Define a loss function and the Adam optimization algorithm
model.compile(loss='mean_squared_error', optimizer='adam')

# train network
history = model.fit(train_ind,
                    train_resp,
                    epochs=40,
                    batch_size=25,
Пример #30
0
def negative_samples(input_length, input_dim, output_length, output_dim,
                     hidden_dim, ns_amount, learning_rate, drop_rate):
    q_encoder_input = Input(shape=(input_length, input_dim))
    r_decoder_input = Input(shape=(output_length, output_dim))
    weight_data_r = Input(shape=(1, ))
    weight_data_w = Input(shape=(1, ns_amount))
    if ns_amount == 0:
        weight_data_w_list = []
    else:
        weight_data_w_list = Lambda(lambda x: tf.split(
            x, num_or_size_splits=ns_amount, axis=2))(weight_data_w)
    fixed_r_decoder_input = adding_weight(
        output_length, output_dim)([r_decoder_input, weight_data_r])
    w_decoder_input = Input(shape=(output_length, output_dim, ns_amount))
    if ns_amount == 0:
        w_decoder_input_list = []
    else:
        w_decoder_input_list = Lambda(lambda x: tf.split(
            x, num_or_size_splits=ns_amount, axis=3))(w_decoder_input)
    if ns_amount == 1:
        # print("===w_decoder_input_list:", w_decoder_input_list.shape)
        w_decoder_input_list = [w_decoder_input_list]
        weight_data_w_list = [weight_data_w_list]
    fixed_w_decoder_input = []
    for i in range(ns_amount):
        w_decoder_input_list[i] = Reshape(
            (output_length, output_dim))(w_decoder_input_list[i])
        weight_data_w_list[i] = Reshape((1, ))(weight_data_w_list[i])

        w_decoder_weighted = adding_weight(output_length, output_dim)(
            [w_decoder_input_list[i], weight_data_w_list[i]])
        w_decoder_weighted_masked = Masking(
            mask_value=0.,
            input_shape=(output_length, output_dim))(w_decoder_weighted)
        fixed_w_decoder_input.append(w_decoder_weighted_masked)

    q_encoder_input_masked = Masking(mask_value=0.,
                                     input_shape=(input_length,
                                                  input_dim))(q_encoder_input)
    fixed_r_decoder_input_masked = Masking(
        mask_value=0.,
        input_shape=(output_length, output_dim))(fixed_r_decoder_input)

    encoder = Bidirectional(GRU(hidden_dim),
                            merge_mode="ave",
                            name="bidirectional1")
    q_encoder_output = encoder(q_encoder_input_masked)
    q_encoder_output = Dropout(rate=drop_rate,
                               name="dropout1")(q_encoder_output)

    decoder = Bidirectional(GRU(hidden_dim),
                            merge_mode="ave",
                            name="bidirectional2")
    r_decoder_output = decoder(fixed_r_decoder_input_masked)
    r_decoder_output = Dropout(rate=drop_rate,
                               name="dropout2")(r_decoder_output)

    # doc_output = MaxPooling1D(pool_size=20, stride=5, padding='same')(q_encoder_input)
    # doc_output = Flatten()(q_encoder_input)
    # que_output = MaxPooling1D(pool_size=20, stride=5, padding='same')(fixed_r_decoder_input)
    # que_output = Flatten()(fixed_r_decoder_input)

    # output_vec = Concatenate(axis=1, name="dropout_con")([q_encoder_output, r_decoder_output])
    # output_hid = Dense(hidden_dim, name="output_hid", activation="relu")(output_vec)
    # similarity = Dense(1, name="similarity", activation="softmax")(output_hid)

    # Difference between kernel, bias, and activity regulizers in Keras
    # https://stats.stackexchange.com/questions/383310/difference-between-kernel-bias-and-activity-regulizers-in-keras
    # output = Dense(128, kernel_regularizer=keras.regularizers.l2(0.0001))(output_vec) # activation="relu",
    # output = Dense(64, name="output_hid", kernel_regularizer=keras.regularizers.l2(0.0001))(output) # activation="relu",
    # similarity = Dense(1, name="similarity", activation="softmax")(output)

    w_decoder_output_list = []
    for i in range(ns_amount):
        w_decoder_output = decoder(fixed_w_decoder_input[i])
        w_decoder_output = Dropout(rate=drop_rate)(w_decoder_output)
        w_decoder_output_list.append(w_decoder_output)
    # similarities = [ similarity ]
    similarities = [
        Dot(axes=1, normalize=True)([q_encoder_output, r_decoder_output])
    ]
    for i in range(ns_amount):
        similarities.append(
            Dot(axes=1,
                normalize=True)([q_encoder_output, w_decoder_output_list[i]]))
    loss_data = Lambda(lambda x: loss_c(x))(similarities)
    model = Model([
        q_encoder_input, r_decoder_input, w_decoder_input, weight_data_r,
        weight_data_w
    ], similarities[0])
    ada = adam(lr=learning_rate)
    model.compile(optimizer=ada, loss=lambda y_true, y_pred: loss_data)
    return model