print('x_train shape:', x_train.shape) print('x_test shape:', x_test.shape) # In[39]: #embedding_dims = 16 top_classes = y_train.shape[1] # create the model model = Sequential() model.add( Embedding(len(tokenizer.word_index) + len(token_indice) + 1, embedding_dims, input_length=maxlen)) model.add(Conv1D(filters=128, kernel_size=6, padding='same', activation='relu')) model.add(MaxPooling1D(pool_size=2)) model.add(Conv1D(filters=64, kernel_size=3, padding='same', activation='relu')) model.add(MaxPooling1D(pool_size=2)) model.add(Flatten()) model.add(Dense(256, activation='relu')) model.add(Dense(top_classes, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) print(model.summary()) # In[40]: history = model.fit(x_train, y_train, validation_data=(x_test, y_test),
print('Null word embeddings: %d' % np.sum(np.sum(embedding_matrix, axis=1) == 0)) print('Build model...') model = Sequential() model.add( Embedding(embedding_matrix.shape[0], embedding_matrix.shape[1], weights=[embedding_matrix], input_length=maxlen, trainable=False)) model.add(Dropout(0.25)) model.add( Conv1D(filters, kernel_size, padding='valid', activation='relu', strides=1)) model.add(MaxPooling1D(pool_size=pool_size)) model.add(LSTM(lstm_output_size)) model.add(Dense(1)) model.add(Activation('sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.summary() print('Train...') history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
def resnet_model(input_shape): input1 = Input(shape=(300, 2)) input2 = Input(shape=(3, 1)) DropoutRate = 0.3 ###ksize=3 #C1_1 flow1 = layoutConv1D(strides=1, kSize=ksize1, nFilters=8)(input1) #None*300*8 flow1 = BatchNormalization()(flow1) #�淶��������������������ֹ����� flow1 = Activation(activation='relu')(flow1) #����� ###ksize=5 #C1_2 flow2 = layoutConv1D(strides=1, kSize=ksize2, nFilters=8)(input1) #None*300*8 flow2 = BatchNormalization()(flow2) #�淶��������������������ֹ����� flow2 = Activation(activation='relu')(flow2) #����� ###ksize=7 #C1_3 flow3 = layoutConv1D(strides=1, kSize=ksize3, nFilters=8)(input1) #None*300*8 flow3 = BatchNormalization()(flow3) #�淶��������������������ֹ����� flow3 = Activation(activation='relu')(flow3) #����� flow = keras.layers.concatenate([flow1, flow2]) flow = keras.layers.concatenate([flow, flow3]) #None*300*24 shortcut = flow #C2 flow = layoutConv1D(strides=1, nFilters=8, kSize=ksize1)(flow) #None*300*8 flow = BatchNormalization()(flow) flow = Activation(activation='relu')(flow) flow = Dropout(DropoutRate)(flow) #C3 flow = layoutConv1D(strides=2, nFilters=16, kSize=ksize1)(flow) #None*150*16 shortcut = layoutIdentity1D(nFilters=int(flow.get_shape()[-1]))( shortcut) #None*300*16 #�²�����S1��pool_size�²������ӣ����˲�����С��strides���������ΪNone,��Ĭ�ϴ�СΪpool_size,paddingΪ���ʽ shortcut = MaxPooling1D(pool_size=2, padding='same')(shortcut) #None*150*16 flow = keras.layers.concatenate([flow, shortcut]) for i in range(2, 7): flow, shortcut = layoutResidualBlock(i, flow, shortcut, ksize1) pass flow = BatchNormalization()(flow) flow = Activation(activation='relu')(flow) flow = Dropout(DropoutRate)(flow) flow = Flatten()(flow) flow = Dense(128, activation='relu')(flow) flow = Dropout(DropoutRate)(flow) #�������߶��µõ��������Լ�RR�������� flow = keras.layers.concatenate([flow, Flatten()(input2)]) flow = BatchNormalization()(flow) predictions = Dense(4, activation='softmax')(flow) adam_lr = 0.005 model = Model(inputs=[input1, input2], outputs=predictions) model.compile(optimizer=adam(lr=adam_lr), loss='categorical_crossentropy', metrics=['accuracy']) return model
def DeepConv1DOptLearnerStaticArchitecture(param_trainable, init_wrapper, smpl_params, input_info, faces, emb_size=1000, input_type="3D_POINTS"): """ Optimised learner network architecture """ # An embedding layer is required to optimise the parameters optlearner_input = Input(shape=(1,), name="embedding_index") # Initialise the embedding layers emb_layers = init_emb_layers(optlearner_input, emb_size, param_trainable, init_wrapper) optlearner_params = Concatenate(name="parameter_embedding")(emb_layers) optlearner_params = Reshape(target_shape=(85,), name="learned_params")(optlearner_params) print("optlearner parameters shape: " +str(optlearner_params.shape)) #exit(1) # Ground truth parameters and point cloud are inputs to the model as well gt_params = Input(shape=(85,), name="gt_params") gt_pc = Input(shape=(6890, 3), name="gt_pc") print("gt parameters shape: " + str(gt_params.shape)) print("gt point cloud shape: " + str(gt_pc.shape)) # Compute the true offset (i.e. difference) between the ground truth and learned parameters pi = K.constant(np.pi) delta_d = Lambda(lambda x: x[0] - x[1], name="delta_d")([gt_params, optlearner_params]) #delta_d = Lambda(lambda x: x[0] - x[1], name="delta_d_no_mod")([gt_params, optlearner_params]) #delta_d = Lambda(lambda x: K.tf.math.floormod(x - pi, 2*pi) - pi, name="delta_d")(delta_d) # custom modulo 2pi of delta_d #delta_d = custom_mod(delta_d, pi, name="delta_d") # custom modulo 2pi of delta_d print("delta_d shape: " + str(delta_d.shape)) #exit(1) # Calculate the (batched) MSE between the learned parameters and the ground truth parameters false_loss_delta_d = Lambda(lambda x: K.mean(K.square(x), axis=1))(delta_d) print("delta_d loss shape: " + str(false_loss_delta_d.shape)) #exit(1) false_loss_delta_d = Reshape(target_shape=(1,), name="delta_d_mse")(false_loss_delta_d) print("delta_d loss shape: " + str(false_loss_delta_d.shape)) # Load SMPL model and get necessary parameters optlearner_pc = get_pc(optlearner_params, smpl_params, input_info, faces) # UNCOMMENT print("optlearner_pc shape: " + str(optlearner_pc.shape)) #exit(1) #optlearner_pc = Dense(6890*3)(delta_d) #optlearner_pc = Reshape((6890, 3))(optlearner_pc) # Get the (batched) Euclidean loss between the learned and ground truth point clouds pc_euclidean_diff = Lambda(lambda x: x[0] - x[1])([gt_pc, optlearner_pc]) pc_euclidean_dist = Lambda(lambda x: K.sum(K.square(x),axis=-1))(pc_euclidean_diff) print('pc euclidean dist '+str(pc_euclidean_dist.shape)) #exit(1) false_loss_pc = Lambda(lambda x: K.mean(x, axis=1))(pc_euclidean_dist) false_loss_pc = Reshape(target_shape=(1,), name="pc_mean_euc_dist")(false_loss_pc) print("point cloud loss shape: " + str(false_loss_pc.shape)) #exit(1) # Gather sets of points and compute their cross product to get mesh normals # In order of: right hand, right wrist, right forearm, right bicep end, right bicep, right shoulder, top of cranium, left shoulder, left bicep, left bicep end, left forearm, left wrist, left hand, # chest, belly/belly button, back of neck, upper back, central back, lower back/tailbone, # left foot, left over-ankle, left shin, left over-knee, left quadricep, left hip, right, hip, right, quadricep, right over-knee, right shin, right, over-ankle, right foot vertex_list = [5674, 5705, 5039, 5151, 4977, 4198, 411, 606, 1506, 1682, 1571, 2244, 2212, 3074, 3500, 460, 2878, 3014, 3021, 3365, 4606, 4588, 4671, 6877, 1799, 5262, 3479, 1187, 1102, 1120, 6740] #face_array = np.array([11396, 8620, 7866, 5431, 6460, 1732, 4507]) pc_euclidean_diff_NOGRAD = Lambda(lambda x: K.stop_gradient(x))(pc_euclidean_diff) # This is added to avoid influencing embedding layer parameters by a "bad" gradient network vertex_diff_NOGRAD = Lambda(lambda x: K.tf.gather(x, np.array(vertex_list).astype(np.int32), axis=-2))(pc_euclidean_diff_NOGRAD) print("vertex_diff_NOGRAD shape: " + str(vertex_diff_NOGRAD.shape)) vertex_diff_NOGRAD = Flatten()(vertex_diff_NOGRAD) #exit(1) face_array = np.array([[face for face in faces if vertex in face][0] for vertex in vertex_list]) # only take a single face for each vertex print("face_array shape: " + str(face_array.shape)) gt_normals = get_mesh_normals(gt_pc, face_array, layer_name="gt_cross_product") print("gt_normals shape: " + str(gt_normals.shape)) opt_normals = get_mesh_normals(optlearner_pc, face_array, layer_name="opt_cross_product") print("opt_normals shape: " + str(opt_normals.shape)) #exit(1) # Learn the offset in parameters from the difference between the ground truth and learned mesh normals diff_normals = Lambda(lambda x: K.tf.cross(x[0], x[1]), name="diff_cross_product")([gt_normals, opt_normals]) diff_normals_NOGRAD = Lambda(lambda x: K.stop_gradient(x))(diff_normals) # This is added to avoid influencing embedding layer parameters by a "bad" gradient network diff_angles = Lambda(lambda x: K.tf.subtract(x[0], x[1]), name="diff_angle")([gt_normals, opt_normals]) diff_angles_NOGRAD = Lambda(lambda x: K.stop_gradient(x))(diff_angles) diff_angles_norm_NOGRAD = Lambda(lambda x: K.tf.norm(x, axis=-1), name="diff_angle_norm")(diff_angles_NOGRAD) dist_angles = Lambda(lambda x: K.mean(K.square(x), axis=-1), name="diff_angle_mse")(diff_angles) dist_angles_NOGRAD = Lambda(lambda x: K.stop_gradient(x))(dist_angles) print("diff_angles shape: " + str(diff_angles.shape)) print("dist_angles shape: " + str(dist_angles.shape)) #pc_euclidean_diff_NOGRAD = Lambda(lambda x: K.stop_gradient(x))(pc_euclidean_diff) # This is added to avoid influencing embedding layer parameters by a "bad" gradient network #print("diff_normals_NOGRAD shape: " + str(diff_normals_NOGRAD.shape)) diff_normals_NOGRAD = Flatten()(diff_normals_NOGRAD) diff_angles_NOGRAD = Flatten()(diff_angles_NOGRAD) mesh_diff_NOGRAD = Concatenate()([diff_normals_NOGRAD, dist_angles_NOGRAD]) if input_type == "3D_POINTS": #optlearner_architecture = Dense(2**9, activation="relu")(vertex_diff_NOGRAD) optlearner_architecture = Dense(2**7, activation="relu")(vertex_diff_NOGRAD) if input_type == "MESH_NORMALS": #optlearner_architecture = Dense(2**11, activation="relu")(diff_angles_norm_NOGRAD) #optlearner_architecture = Dense(2**11, activation="relu")(diff_angles_NOGRAD) #optlearner_architecture = Dense(2**9, activation="relu")(mesh_diff_NOGRAD) optlearner_architecture = Dense(2**7, activation="relu")(mesh_diff_NOGRAD) #optlearner_architecture = BatchNormalization()(optlearner_architecture) #optlearner_architecture = Dropout(0.5)(optlearner_architecture) print('optlearner_architecture shape: '+str(optlearner_architecture.shape)) optlearner_architecture = Reshape((optlearner_architecture.shape[1].value, 1))(optlearner_architecture) print('optlearner_architecture shape: '+str(optlearner_architecture.shape)) optlearner_architecture = Conv1D(64, 5, activation="relu")(optlearner_architecture) optlearner_architecture = BatchNormalization()(optlearner_architecture) optlearner_architecture = MaxPooling1D(3)(optlearner_architecture) optlearner_architecture = Conv1D(128, 5, activation="relu")(optlearner_architecture) optlearner_architecture = BatchNormalization()(optlearner_architecture) optlearner_architecture = MaxPooling1D(2)(optlearner_architecture) optlearner_architecture = Conv1D(256, 3, activation="relu")(optlearner_architecture) optlearner_architecture = BatchNormalization()(optlearner_architecture) optlearner_architecture = MaxPooling1D(2)(optlearner_architecture) optlearner_architecture = Conv1D(256, 3, activation="relu")(optlearner_architecture) optlearner_architecture = BatchNormalization()(optlearner_architecture) #optlearner_architecture = MaxPooling1D(2)(optlearner_architecture) optlearner_architecture = AveragePooling1D(2)(optlearner_architecture) print('optlearner_architecture shape: '+str(optlearner_architecture.shape)) optlearner_architecture = Flatten()(optlearner_architecture) print('optlearner_architecture shape: '+str(optlearner_architecture.shape)) #optlearner_architecture = Dropout(0.5)(optlearner_architecture) optlearner_architecture = Dense(2**7, activation="relu")(optlearner_architecture) print('optlearner_architecture shape: '+str(optlearner_architecture.shape)) #delta_d_hat = Dense(85, activation=pos_scaled_tanh, name="delta_d_hat")(optlearner_architecture) delta_d_hat = Dense(85, activation="linear", name="delta_d_hat")(optlearner_architecture) #delta_d_hat = Dense(85, activation=centred_linear, name="delta_d_hat")(optlearner_architecture) print('delta_d_hat shape: '+str(delta_d_hat.shape)) #exit(1) # Calculate the (batched) MSE between the learned and ground truth offset in the parameters delta_d_NOGRAD = Lambda(lambda x: K.stop_gradient(x))(delta_d) false_loss_delta_d_hat = Lambda(lambda x: K.mean(K.square(x[0] - x[1]), axis=1))([delta_d_NOGRAD, delta_d_hat]) #false_loss_delta_d_hat = Lambda(lambda x: K.sum(K.square(x[0] - x[1]), axis=1))([delta_d_NOGRAD, delta_d_hat]) #false_loss_delta_d_hat = Lambda(lambda x: mape(x[0], x[1]))([delta_d_NOGRAD, delta_d_hat]) false_loss_delta_d_hat = Reshape(target_shape=(1,), name="delta_d_hat_mse")(false_loss_delta_d_hat) print("delta_d_hat loss shape: " + str(false_loss_delta_d_hat.shape)) #false_sin_loss_delta_d_hat = get_sin_metric(delta_d_NOGRAD, delta_d_hat) false_sin_loss_delta_d_hat = get_sin_metric(delta_d_NOGRAD, delta_d_hat, average=False) false_sin_loss_delta_d_hat = Lambda(lambda x: x, name="delta_d_hat_sin_output")(false_sin_loss_delta_d_hat) print("delta_d_hat sin loss shape: " + str(false_sin_loss_delta_d_hat.shape)) # Prevent model from using the delta_d_hat gradient in final loss delta_d_hat_NOGRAD = Lambda(lambda x: K.stop_gradient(x), name='optlearner_output_NOGRAD')(delta_d_hat) # False loss designed to pass the learned offset as a gradient to the embedding layer false_loss_smpl = Multiply(name="smpl_diff")([optlearner_params, delta_d_hat_NOGRAD]) print("smpl loss shape: " + str(false_loss_smpl.shape)) #return [optlearner_input, gt_params, gt_pc], [optlearner_params, false_loss_delta_d, optlearner_pc, false_loss_pc, false_loss_delta_d_hat, false_sin_loss_delta_d_hat, false_loss_smpl, delta_d, delta_d_hat, delta_d_hat_NOGRAD] return [optlearner_input, gt_params, gt_pc], [optlearner_params, false_loss_delta_d, optlearner_pc, false_loss_pc, false_loss_delta_d_hat, false_sin_loss_delta_d_hat, false_loss_smpl, delta_d, delta_d_hat, dist_angles]
def createBaseNetworkLarge(inputDim, inputLength): baseNetwork = Sequential() baseNetwork.add( Embedding(input_dim=inputDim, output_dim=inputDim, input_length=inputLength)) baseNetwork.add( Conv1D(1024, 7, strides=1, padding='valid', activation='relu', kernel_initializer=RandomNormal(mean=0.0, stddev=0.02), bias_initializer=RandomNormal(mean=0.0, stddev=0.02))) baseNetwork.add(MaxPooling1D(pool_size=3, strides=3)) baseNetwork.add( Conv1D(1024, 7, strides=1, padding='valid', activation='relu', kernel_initializer=RandomNormal(mean=0.0, stddev=0.02), bias_initializer=RandomNormal(mean=0.0, stddev=0.02))) baseNetwork.add(MaxPooling1D(pool_size=3, strides=3)) baseNetwork.add( Conv1D(1024, 3, strides=1, padding='valid', activation='relu', kernel_initializer=RandomNormal(mean=0.0, stddev=0.02), bias_initializer=RandomNormal(mean=0.0, stddev=0.02))) baseNetwork.add( Conv1D(1024, 3, strides=1, padding='valid', activation='relu', kernel_initializer=RandomNormal(mean=0.0, stddev=0.02), bias_initializer=RandomNormal(mean=0.0, stddev=0.02))) baseNetwork.add( Conv1D(1024, 3, strides=1, padding='valid', activation='relu', kernel_initializer=RandomNormal(mean=0.0, stddev=0.02), bias_initializer=RandomNormal(mean=0.0, stddev=0.02))) baseNetwork.add( Conv1D(1024, 3, strides=1, padding='valid', activation='relu', kernel_initializer=RandomNormal(mean=0.0, stddev=0.02), bias_initializer=RandomNormal(mean=0.0, stddev=0.02))) baseNetwork.add(MaxPooling1D(pool_size=3, strides=3)) baseNetwork.add(Flatten()) baseNetwork.add(Dense(2048, activation='relu')) baseNetwork.add(Dropout(0.5)) baseNetwork.add(Dense(2048, activation='relu')) baseNetwork.add(Dropout(0.5)) return baseNetwork
strides=1, activation='relu', input_shape=(14, 64))) model.add( Conv1D(256, kernel_size=3, strides=1, activation='relu', input_shape=(14, 128))) model.add( Conv1D(512, kernel_size=3, strides=1, activation='relu', input_shape=(14, 256))) model.add(MaxPooling1D(pool_size=3, strides=1)) model.add(Flatten()) model.add(Dropout(0.5, input_shape=(7168, ))) model.add(Dense(500, activation='relu')) model.add(Dropout(0.5, input_shape=(500, ))) model.add(Dense(100, activation='relu')) model.add(Dense(2, activation='softmax')) adamx = keras.optimizers.Adamax(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0) model.compile(loss='sparse_categorical_crossentropy', optimizer=adamx, metrics=['accuracy'])
for word, i in word_index.items(): embedding_vector = embeddings_index.get(word) if embedding_vector is not None: # words not found in embedding index will be all-zeros. embedding_matrix[i] = embedding_vector embedding_layer = Embedding(len(word_index) + 1, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH, trainable=True) sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') embedded_sequences = embedding_layer(sequence_input) l_cov1 = Conv1D(128, 5, activation='relu')(embedded_sequences) l_pool1 = MaxPooling1D(5)(l_cov1) l_cov2 = Conv1D(128, 5, activation='relu')(l_pool1) l_pool2 = MaxPooling1D(5)(l_cov2) l_cov3 = Conv1D(128, 5, activation='relu')(l_pool2) l_pool3 = MaxPooling1D(35)(l_cov3) # global max pooling l_flat = Flatten()(l_pool3) l_dense = Dense(128, activation='relu')(l_flat) preds = Dense(2, activation='softmax')(l_dense) model = Model(sequence_input, preds) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['acc']) print("model fitting - simplified convolutional neural network") model.summary()
'-----------------------------------------------------------------------------------------------------------------------' print(Oxxx1) O1 = add([Oxxx1, Oxxx2]) O1 = Reshape((-1, 256))(O1) x22 = Conv1D(filters=512, kernel_size=3, strides=1, activation='relu')(O1) x22 = BatchNormalization()(x22) shorcut = Conv1D(filters=512, kernel_size=3, strides=1, activation='relu', padding='same')(x22) shorcut = BatchNormalization()(shorcut) x = add([x22, shorcut]) x2 = MaxPooling1D(2)(x) x22 = Conv1D(filters=512, kernel_size=3, strides=1, activation='relu')(x2) x22 = BatchNormalization()(x22) shorcut = Conv1D(filters=512, kernel_size=3, strides=1, activation='relu', padding='same')(x22) shorcut = BatchNormalization()(shorcut) x = add([x22, shorcut]) x2 = MaxPooling1D(2)(x) O1 = Bidirectional(LSTM(16, return_sequences=True))(x2) xxxx2 = concatenate([xxx1, xxx2])
LSTM_layer1 = 16 # 64 # 32 # 256 # dropout_n = 0.2 # n_epochs = 30 # 168 n_batch_size = 256 # 64 # LSTMinputDim = (2) numericTS_set = Input(shape=(len(X_train[0]), 2), name='numericTS_set') output_merge = numericTS_set # output_merge = Conv1D(filters=22, kernel_size=6, strides=3, activation='relu')(output_merge) output_merge = MaxPooling1D(pool_size=2)(output_merge) # output_merge = LSTM(return_sequences=False, units=LSTM_layer1, dropout=0, recurrent_dropout=0.2)(output_merge) # output_merge = Dropout(dropout_n)(output_merge) # output_merge = Flatten()(output_merge) # output_merge = Dense(8)(output_merge) output_merge = Dropout(dropout_n)(output_merge) # main_output = Dense(1, activation='sigmoid')(output_merge) # model = Model(inputs=numericTS_set, outputs=main_output)
from keras.layers.core import Activation from keras.layers.core import Flatten from keras.layers.core import Dense from keras.preprocessing import sequence from keras.optimizers import * from keras.regularizers import l2 # In[ ]: print('Training model.') model = Sequential() model.add(embedding_layer) model.add(Convolution1D(100, 5, border_mode="same", input_shape=(65, 300))) model.add(Activation("tanh")) model.add(MaxPooling1D(pool_length=5)) model.add(Convolution1D(50, 3, border_mode="same")) model.add(Activation("tanh")) model.add(MaxPooling1D(pool_length=2)) model.add(Flatten()) model.add(Dense(500)) model.add(Activation("tanh")) # softmax classifier model.add(Dense(69, W_regularizer=l2(0.01))) model.add(Activation("softmax")) # # train a 1D convnet with global maxpooling # sequence_input = Input(shape=(MAX_SEQ_LENGTH,), dtype='int32') # embedded_sequences = embedding_layer(sequence_input) # x = Conv1D(100, 5, activation='tanh')(embedded_sequences) # x = MaxPooling1D(5)(x)
def build_cnn_dueling_prior(self, nb_ego_states, nb_states_per_vehicle, nb_vehicles, nb_actions, nb_conv_layers, nb_conv_filters, nb_hidden_fc_layers, nb_hidden_neurons, duel, activation='relu', window_length=1, dueling_type='avg', prior_scale_factor=1.): nb_inputs = nb_ego_states + nb_states_per_vehicle * nb_vehicles net_input = Input(shape=(window_length, nb_inputs), name='input') flat_input = Flatten(data_format='channels_first')(net_input) input_ego = Lambda(lambda state: state[:, :nb_ego_states * window_length])(flat_input) input_others = Lambda(lambda state: state[:, nb_ego_states * window_length:])(flat_input) input_others_reshaped = Reshape((nb_vehicles * nb_states_per_vehicle * window_length, 1,), input_shape=(nb_vehicles * nb_states_per_vehicle * window_length,))(input_others) ego_net_prior = Dense(nb_conv_filters, activation=activation, kernel_initializer='glorot_normal', trainable=False, name='ego_prior_0')(input_ego) for i in range(nb_conv_layers - 1): ego_net_prior = Dense(nb_conv_filters, activation=activation, kernel_initializer='glorot_normal', trainable=False, name='ego_prior_' + str(i + 1))(ego_net_prior) prior_conv_net = Conv1D(nb_conv_filters, nb_states_per_vehicle * window_length, strides=nb_states_per_vehicle * window_length, activation=activation, kernel_initializer='glorot_normal', trainable=False)(input_others_reshaped) for _ in range(nb_conv_layers - 1): prior_conv_net = Conv1D(nb_conv_filters, 1, strides=1, activation=activation, kernel_initializer='glorot_normal', trainable=False)(prior_conv_net) prior_pool = MaxPooling1D(pool_size=nb_vehicles)(prior_conv_net) prior_conv_net_out = Reshape((nb_conv_filters,), input_shape=(1, nb_conv_filters,), name='prior_convnet_out')(prior_pool) prior_merged = concatenate([ego_net_prior, prior_conv_net_out]) prior_joint_net = Dense(nb_hidden_neurons, activation=activation, kernel_initializer='glorot_normal', trainable=False)(prior_merged) for _ in range(nb_hidden_fc_layers-1): prior_joint_net = Dense(nb_hidden_neurons, activation=activation, kernel_initializer='glorot_normal', trainable=False)(prior_joint_net) if duel: prior_out_wo_dueling = Dense(nb_actions+1, activation='linear', name='prior_out_wo_dueling', trainable=False)(prior_joint_net) if dueling_type == 'avg': prior_out = Lambda(lambda a: K.expand_dims(a[:, 0], -1) + a[:, 1:] - K.mean(a[:, 1:], axis=-1, keepdims=True), output_shape=(nb_actions,), name='prior_out')(prior_out_wo_dueling) elif dueling_type == 'max': prior_out = Lambda(lambda a: K.expand_dims(a[:, 0], -1) + a[:, 1:] - K.max(a[:, 1:], axis=-1, keepdims=True), output_shape=(nb_actions,), name='prior_out')(prior_out_wo_dueling) elif dueling_type == 'naive': prior_out = Lambda(lambda a: K.expand_dims(a[:, 0], -1) + a[:, 1:], output_shape=(nb_actions,), name='prior_out')(prior_out_wo_dueling) else: assert False, "dueling_type must be one of {'avg','max','naive'}" else: prior_out = Dense(nb_actions, activation='linear', name='prior_out', trainable=False)(prior_joint_net) prior_scale = Lambda(lambda x: x * prior_scale_factor, name='prior_scale')(prior_out) ego_net_trainable = Dense(nb_conv_filters, activation=activation, kernel_initializer='glorot_normal', name='ego_trainable_0')(input_ego) for i in range(nb_conv_layers - 1): ego_net_trainable = Dense(nb_conv_filters, activation=activation, kernel_initializer='glorot_normal', name='ego_trainable_' + str(i + 1))(ego_net_trainable) trainable_conv_net = Conv1D(nb_conv_filters, nb_states_per_vehicle * window_length, strides=nb_states_per_vehicle * window_length, activation=activation, kernel_initializer='glorot_normal', trainable=True)(input_others_reshaped) for _ in range(nb_conv_layers - 1): trainable_conv_net = Conv1D(nb_conv_filters, 1, strides=1, activation=activation, kernel_initializer='glorot_normal', trainable=True)(trainable_conv_net) trainable_pool = MaxPooling1D(pool_size=nb_vehicles)(trainable_conv_net) trainable_conv_net_out = Reshape((nb_conv_filters,), input_shape=(1, nb_conv_filters,), name='trainable_convnet_out')(trainable_pool) trainable_merged = concatenate([ego_net_trainable, trainable_conv_net_out]) trainable_joint_net = Dense(nb_hidden_neurons, activation=activation, kernel_initializer='glorot_normal', trainable=True)(trainable_merged) for _ in range(nb_hidden_fc_layers-1): trainable_joint_net = Dense(nb_hidden_neurons, activation=activation, kernel_initializer='glorot_normal', trainable=True)(trainable_joint_net) if duel: trainable_out_wo_dueling = Dense(nb_actions + 1, activation='linear', name='trainable_out_wo_dueling', trainable=True)(trainable_joint_net) if dueling_type == 'avg': trainable_out = Lambda(lambda a: K.expand_dims(a[:, 0], -1) + a[:, 1:] - K.mean(a[:, 1:], axis=-1, keepdims=True), output_shape=(nb_actions,), name='trainable_out')(trainable_out_wo_dueling) elif dueling_type == 'max': trainable_out = Lambda(lambda a: K.expand_dims(a[:, 0], -1) + a[:, 1:] - K.max(a[:, 1:], axis=-1, keepdims=True), output_shape=(nb_actions,), name='trainable_out')(trainable_out_wo_dueling) elif dueling_type == 'naive': trainable_out = Lambda(lambda a: K.expand_dims(a[:, 0], -1) + a[:, 1:], output_shape=(nb_actions,), name='trainable_out')(trainable_out_wo_dueling) else: assert False, "dueling_type must be one of {'avg','max','naive'}" else: trainable_out = Dense(nb_actions, activation='linear', name='trainable_out', trainable=True)(trainable_joint_net) add_output = add([trainable_out, prior_scale], name='final_output') self.model = Model(inputs=net_input, outputs=add_output)
# word input = Input(shape=(max_tweet_word_count, )) x1 = Embedding(vocab_size, model_mat[0].shape[1], input_length=max_tweet_word_count, weights=model_mat, trainable=False)(input) # x1 = Flatten()(x1) # x1 = Model(inputs=input, outputs=x1) # x3 = Flatten()(x3) # x3 = Model(inputs=input, outputs=x3) z = Conv1D(100, 5, activation='relu')(x1) z = Conv1D(100, 5, activation='relu')(z) z = MaxPooling1D()(z) z = Conv1D(160, 5, activation='relu')(z) z = Conv1D(160, 5, activation='relu')(z) z = GlobalMaxPooling1D()(z) z = Dropout(0.5)(z) # f = Flatten()(z) # conv # polling # flatten # dense # (evntl. residual conv) # z = Dense(10, activation="relu")(z) z = Dense(1, activation="sigmoid")(z) # ca 20kk total params
# model.add(GlobalAveragePooling1D()) # model.add(Dropout(0.3)) # model.add(Dense(128, activation='sigmoid')) # model.add(Dense(64, activation='sigmoid')) # model.add(Dense(32, activation='sigmoid')) # model.add(Dense(2, activation='softmax')) model = Sequential() model.add( Conv1D(128, 3, activation='sigmoid', input_shape=(n_packets, n_features))) model.add(BatchNormalization(axis=-1)) #model.add(MaxPooling1D(3)) #model.add(Conv1D(256, 8, activation='sigmoid')) model.add(MaxPooling1D(3)) model.add(Conv1D(64, 3, activation='relu')) model.add(BatchNormalization(axis=-1)) model.add(GlobalAveragePooling1D()) model.add(Dropout(0.5)) model.add(Dense(128, activation='relu')) model.add(Dense(64, activation='relu')) model.add(Dense(32, activation='relu')) model.add(Dense(2, activation='sigmoid')) print(model.summary()) #model.compile(loss = "categorical_crossentropy", optimizer ='adam', metrics=['acc' ,precision_m, recall_m]) model.compile(loss="binary_crossentropy", optimizer='adam', metrics=['acc', precision_m, recall_m])
target_num = [category_to_num[t] for t in target] target_one_hot = np_utils.to_categorical(target_num) # print(np.unique(target_one_hot)) # exit() X_train, X_test, y_train, y_test = train_test_split(data, target_one_hot, test_size=0.33, random_state=42) model = Sequential() model.add(Embedding(1000, 128, input_length=300)) model.add(Conv1D(32, (5), activation='relu')) model.add(Dropout(.4)) model.add(MaxPooling1D()) model.add(Conv1D(32, (5), activation='relu')) model.add(Dropout(.4)) model.add(MaxPooling1D()) model.add(Conv1D(32, (5), activation='relu')) model.add(Dropout(.4)) model.add(MaxPooling1D()) model.add(Dense(300, activation='relu')) model.add(Dropout(.5)) model.add(Dense(30, activation='relu')) model.add(Dropout(.5)) model.add(Flatten()) model.add(Dense(2, activation='sigmoid')) model.summary()
# classWeight = compute_class_weight('balanced', np.unique(Y), Y) # classWeight = dict(enumerate(classWeight)) sample_weight = compute_sample_weight('balanced', Y_train) # build and fit the network classifier = Sequential() # classifier.add(Conv1D(filters=32, kernel_size=(3), data_format='channels_first', input_shape=(8, X_train.shape[2]), activation='relu', kernel_regularizer=l2(0.0005))) X_train, X_test = np.expand_dims(X_train, axis=1), np.expand_dims(X_test, axis=1) classifier.add(Conv1D(filters=32, kernel_size=(3), data_format='channels_first', input_shape=(1, X_train.shape[2]), activation='relu', kernel_regularizer=l2(0.0005))) classifier.add(BatchNormalization()) classifier.add(MaxPooling1D(pool_size=2, data_format='channels_first')) classifier.add(Conv1D(filters=32, kernel_size=(3), data_format='channels_first', activation='relu')) classifier.add(BatchNormalization()) classifier.add(MaxPooling1D(pool_size=2, data_format='channels_first')) classifier.add(Conv1D(filters=32, kernel_size=(3), data_format='channels_first', activation='relu')) classifier.add(BatchNormalization()) classifier.add(MaxPooling1D(pool_size=2, data_format='channels_first')) classifier.add(Flatten()) classifier.add((Dense(units=64, activation='relu'))) classifier.add(Dropout(rate=0.2)) classifier.add((Dense(units=32, activation='relu')))
x_train = np.expand_dims(x_train,axis=1) x_test = np.expand_dims(x_test,axis=2) x_test = np.expand_dims(x_test,axis=1) #y_train = np.expand_dims(y_train,axis=2) seq_lenth = x_train.shape[0] seq_width = x_train.shape[1] print(x_train.shape) print(x_test.shape) ##### build up cnn model model = Sequential() model.add(TimeDistributed(Conv1D(64,60,activation='relu',padding='same'),batch_input_shape=(None,None,29,1))) model.add(TimeDistributed(MaxPooling1D(2))) model.add(TimeDistributed(Conv1D(64,60,activation='relu',padding='same'))) model.add(TimeDistributed(MaxPooling1D(2))) model.add(TimeDistributed(Dropout(0.5))) model.add(TimeDistributed(Flatten())) model.add(TimeDistributed(Dense(256,activation = 'relu'))) model.add(LSTM(50,return_sequences=True)) model.add(LSTM(50)) model.add(Dense(256,activation='relu')) model.add(Dense(256,activation='relu')) model.add(Dense(2,activation='softmax')) def matthews_correlation(y_true,y_pred): y_pred_pos = k.round(k.clip(y_pred,0,1))
def create_model(X_vocab_len, X_max_len, y_vocab_len, y_max_len, n_phonetic_features, y1, n1, y2, n2, y3, n3, y4, n4, y5, n5, y6, n6, hidden_size, num_layers): def smart_merge(vectors, **kwargs): return vectors[0] if len(vectors) == 1 else merge(vectors, **kwargs) current_word = Input(shape=(X_max_len, ), dtype='float32', name='input1') # for encoder (shared) root_word = Input(shape=(X_max_len, ), dtype='float32', name='input2') decoder_input = Input(shape=(X_max_len, ), dtype='float32', name='input3') # for decoder -- attention right_word1 = Input(shape=(X_max_len, ), dtype='float32', name='input4') right_word2 = Input(shape=(X_max_len, ), dtype='float32', name='input5') right_word3 = Input(shape=(X_max_len, ), dtype='float32', name='input6') right_word4 = Input(shape=(X_max_len, ), dtype='float32', name='input7') left_word1 = Input(shape=(X_max_len, ), dtype='float32', name='input8') left_word2 = Input(shape=(X_max_len, ), dtype='float32', name='input9') left_word3 = Input(shape=(X_max_len, ), dtype='float32', name='input10') left_word4 = Input(shape=(X_max_len, ), dtype='float32', name='input11') phonetic_input = Input(shape=(n_phonetic_features, ), dtype='float32', name='input12') emb_layer1 = Embedding(X_vocab_len, EMBEDDING_DIM, input_length=X_max_len, mask_zero=False, name='Embedding') list_of_inputs = [ current_word, root_word, right_word1, right_word2, right_word3, right_word4, left_word1, left_word2, left_word3, left_word4 ] current_word_embedding, root_word_embedding, right_word_embedding1, right_word_embedding2,right_word_embedding3, right_word_embedding4, \ left_word_embedding1, left_word_embedding2, left_word_embedding3, left_word_embedding4 = [emb_layer1(i) for i in list_of_inputs] print("Typeeeee:: ", type(current_word_embedding)) current_word_embedding = smart_merge( [current_word_embedding, root_word_embedding]) # concatenate root word with current input list_of_embeddings1 = [current_word_embedding, right_word_embedding1, right_word_embedding2,right_word_embedding3, right_word_embedding4, \ left_word_embedding1, left_word_embedding2, left_word_embedding3, left_word_embedding4] # list_of_embeddings = [smart_merge([i,root_word_embedding]) for i in list_of_embeddings] # concatenate root word with each of inputs list_of_embeddings = [ Dropout(0.50, name='drop1_' + str(j))(i) for i, j in zip(list_of_embeddings1, range(len(list_of_embeddings1))) ] list_of_embeddings = [ GaussianNoise(0.05, name='noise1_' + str(j))(i) for i, j in zip(list_of_embeddings, range(len(list_of_embeddings))) ] conv4_curr, conv4_right1, conv4_right2, conv4_right3, conv4_right4, conv4_left1, conv4_left2, conv4_left3, conv4_left4 =\ [Conv1D(filters=no_filters, kernel_size=4, padding='valid',activation='relu', strides=1, name='conv4_'+str(j))(i) for i,j in zip(list_of_embeddings, range(len(list_of_embeddings)))] conv4s = [ conv4_curr, conv4_right1, conv4_right2, conv4_right3, conv4_right4, conv4_left1, conv4_left2, conv4_left3, conv4_left4 ] maxPool4 = [ MaxPooling1D(name='max4_' + str(j))(i) for i, j in zip(conv4s, range(len(conv4s))) ] avgPool4 = [ AveragePooling1D(name='avg4_' + str(j))(i) for i, j in zip(conv4s, range(len(conv4s))) ] pool4_curr, pool4_right1, pool4_right2, pool4_right3, pool4_right4, pool4_left1, pool4_left2, pool4_left3, pool4_left4 = \ [merge([i,j], name='merge_conv4_'+str(k)) for i,j,k in zip(maxPool4, avgPool4, range(len(maxPool4)))] conv5_curr, conv5_right1, conv5_right2, conv5_right3, conv5_right4, conv5_left1, conv5_left2, conv5_left3, conv5_left4 = \ [Conv1D(filters=no_filters, kernel_size=5, padding='valid', activation='relu', strides=1, name='conv5_'+str(j))(i) for i,j in zip(list_of_embeddings, range(len(list_of_embeddings)))] conv5s = [ conv5_curr, conv5_right1, conv5_right2, conv5_right3, conv5_right4, conv5_left1, conv5_left2, conv5_left3, conv5_left4 ] maxPool5 = [ MaxPooling1D(name='max5_' + str(j))(i) for i, j in zip(conv5s, range(len(conv5s))) ] avgPool5 = [ AveragePooling1D(name='avg5_' + str(j))(i) for i, j in zip(conv5s, range(len(conv5s))) ] pool5_curr, pool5_right1, pool5_right2, pool5_right3, pool5_right4, pool5_left1, pool5_left2, pool5_left3, pool5_left4 = \ [merge([i,j], name='merge_conv5_'+str(k)) for i,j,k in zip(maxPool5, avgPool5, range(len(maxPool5)))] maxPools = [pool4_curr, pool4_right1, pool4_right2, pool4_right3, pool4_right4, \ pool4_left1, pool4_left2, pool4_left3, pool4_left4, \ pool5_curr, pool5_right1, pool5_right2, pool5_right3, pool5_right4, \ pool5_left1, pool5_left2, pool5_left3, pool5_left4] concat = merge(maxPools, mode='concat', name='main_merge') # curr_vector_total = smart_merge([pool4_curr, pool5_curr], mode='concat') x = Dropout(0.15, name='drop_single1')(concat) x = Bidirectional(RNN(rnn_output_size, name='rnn_for_features'))(x) total_features = [x, phonetic_input] concat2 = merge(total_features, mode='concat', name='phonetic_merging') x = Dense(HIDDEN_DIM, activation='relu', kernel_initializer='he_normal', kernel_constraint=maxnorm(3), bias_constraint=maxnorm(3), name='dense1')(concat2) x = Dropout(0.15, name='drop_single2')(x) x = Dense(HIDDEN_DIM, kernel_initializer='he_normal', activation='tanh', kernel_constraint=maxnorm(3), bias_constraint=maxnorm(3), name='dense2')(x) x = Dropout(0.15, name='drop_single3')(x) out1 = Dense(n1, kernel_initializer='he_normal', activation='softmax', name='output1')(x) out2 = Dense(n2, kernel_initializer='he_normal', activation='softmax', name='output2')(x) out3 = Dense(n3, kernel_initializer='he_normal', activation='softmax', name='output3')(x) out4 = Dense(n4, kernel_initializer='he_normal', activation='softmax', name='output4')(x) out5 = Dense(n5, kernel_initializer='he_normal', activation='softmax', name='output5')(x) out6 = Dense(n6, kernel_initializer='he_normal', activation='softmax', name='output6')(x) # Luong et al. 2015 attention model emb_layer = Embedding(X_vocab_len, EMBEDDING_DIM, input_length=X_max_len, mask_zero=True, name='Embedding_for_seq2seq') current_word_embedding = emb_layer(current_word) current_word_embedding = GaussianNoise( 0.05, name='noise_seq2seq')(current_word_embedding) encoder, state = RNN(rnn_output_size, return_sequences=True, unroll=True, return_state=True, name='encoder')(current_word_embedding) encoder_last = encoder[:, -1, :] decoder = emb_layer(decoder_input) decoder = GRU(rnn_output_size, return_sequences=True, unroll=True, name='decoder')(decoder, initial_state=[state]) attention = dot([decoder, encoder], axes=[2, 2], name='dot') attention = Activation('softmax', name='attention')(attention) context = dot([attention, encoder], axes=[2, 1], name='dot2') decoder_combined_context = concatenate([context, decoder], name='concatenate') outputs = TimeDistributed( Dense(64, activation='tanh', name='TimeDistributed1'))(decoder_combined_context) outputs = TimeDistributed( Dense(X_vocab_len, activation='softmax', name='TimeDistributed2'))(outputs) all_inputs = [current_word, root_word, decoder_input, right_word1, right_word2, right_word3, right_word4, left_word1, \ left_word2, left_word3, left_word4, phonetic_input] all_outputs = [outputs, out1, out2, out3, out4, out5, out6] model = Model(input=all_inputs, output=all_outputs) opt = Adam() model.compile(optimizer=Adadelta(epsilon=1e-06), loss='categorical_crossentropy', metrics=['accuracy'], loss_weights=[1., 1., 1., 1., 1., 1., 1.]) return model
def ml(tweet_column = 'tweets', labels_column = 'fanboy', languages = ['en'],\ cleaning_words = ['RT','rt','http','https','www','WWW','al','twitter','co','com','html','unsupportedbrowser',], embed_dimension = 300, test_size = 0.2, num_epochs = 1, dataset_name = 'portion.csv' ): org_data = pd.read_csv(dataset_name) #Identifies languages and adds a column as language to the dataset class read_languages: def __init__(self, dataset): self.dataset = dataset def read_all(self): langs = [] undetected = [] dataset = self.dataset print(' determining the language ') bar = progressbar.ProgressBar(maxval=len(dataset), \ widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()]) bar.start(); for i in range (len(dataset)): try : pred = langdetect(str(dataset[tweet_column][i])) langs.append(pred) except : undetected.append('could not detect language') langs.append(None) bar.update(i+1) bar.finish() dataset['language'] = langs self.dataset = dataset return(dataset) def known_languages(self,langs): dataset = self.dataset dataset = dataset[dataset['language'].notnull()] merged1 = pd.DataFrame() for i in langs: merged1 = merged1.append(dataset[dataset['language']==i]) return(merged1) # calling the language determiner rd_lang = read_languages(org_data) rd_lang.read_all() output_language = rd_lang.known_languages(languages) #Balancing the dataset merged = output_language non_fan = merged.loc[merged[labels_column] == 0].reset_index(drop=True) fan = merged.loc[merged[labels_column] == 1].reset_index(drop=True) max_len = min(len(fan),len(non_fan)) merged = pd.concat([fan[:max_len], non_fan[:max_len]], axis=0) #A little pre-processing and removing the stopwords tweets = merged actual_tweets = tweets[tweet_column].copy() lmtzr = WordNetLemmatizer() # print('-------Lemmazation--------') tweets[tweet_column] = tweets[tweet_column].apply(lambda x: ' '.join([lmtzr.lemmatize(word,'v') for word in x.split() ])) ## Iterate over the data to preprocess by removing stopwords lines_without_stopwords=[] for line in tweets[tweet_column].values: line = line.lower() line_by_words = re.findall(r'(?:\w+)', line, flags = re.UNICODE) # remove punctuation ans split new_line=[] additional = cleaning_words for word in line_by_words: if word not in additional: if (len(word)>2 and word not in stop): new_line.append(word) if(len(word)==2 and word[0].isnumeric()==False and word[1].isnumeric()==False and word not in stop): new_line.append(word) lines_without_stopwords.append(new_line) texts = lines_without_stopwords tweets[tweet_column] = texts #split the data to train and test train_set, test_set, actual_tweets_train, actual_tweets_test = train_test_split(tweets, actual_tweets, test_size=test_size, shuffle=True) train_set = train_set.reset_index(drop=True) test_set = test_set.reset_index(drop=True) actual_tweets_train = actual_tweets_train.reset_index(drop=True) actual_tweets_test = actual_tweets_test.reset_index(drop=True) embeddings_index = {} f = open('glove/glove.6B.%dd.txt' % embed_dimension) for line in f: values = line.split(' ') word = values[0] ## The first entry is the word coefs = np.asarray(values[1:], dtype='float32') ## These are the vecotrs representing the embedding for the word embeddings_index[word] = coefs f.close() print("Glove data loaded") # USING KERAS TO WORK WITH embeddings # For test test set # encoder = LabelEncoder() # encoder.fit(output_language['fanboy']) # encoded_Y = encoder.transform(output_language['fanboy']) encoded_Y = test_set[labels_column] texts = test_set[tweet_column] MAX_NUM_WORDS = 100000 MAX_SEQUENCE_LENGTH = embed_dimension tokenizer = Tokenizer(num_words=MAX_NUM_WORDS, filters='!"#$%&()*+,-./:;<=>?@[\]^_`{"}~\t\n',lower=False, split=" ") tokenizer.fit_on_texts(texts) sequences = tokenizer.texts_to_sequences(texts) #sequences = tokenizer.texts_to_matrix(texts, mode='tfidf') word_index = tokenizer.word_index print('Found %s unique tokens.' % len(word_index)) data_test = pad_sequences(sequences, maxlen=MAX_SEQUENCE_LENGTH) labels_test = to_categorical(np.asarray(encoded_Y)) #For train train set ## Code adapted from (https://github.com/keras-team/keras/blob/master/examples/pretrained_word_embeddings.py) # Vectorize the text samples # encoder = LabelEncoder() # encoder.fit(output_language['fanboy']) # encoded_Y = encoder.transform(output_language['fanboy']) encoded_Y = train_set[labels_column] texts = train_set[tweet_column] MAX_NUM_WORDS = 100000 MAX_SEQUENCE_LENGTH = embed_dimension tokenizer = Tokenizer(num_words=MAX_NUM_WORDS, filters='!"#$%&()*+,-./:;<=>?@[\]^_`{"}~\t\n',lower=False, split=" ") tokenizer.fit_on_texts(texts) sequences = tokenizer.texts_to_sequences(texts) #sequences = tokenizer.texts_to_matrix(texts, mode='tfidf') word_index = tokenizer.word_index print('Found %s unique tokens.' % len(word_index)) data = pad_sequences(sequences, maxlen=MAX_SEQUENCE_LENGTH) labels = to_categorical(np.asarray(encoded_Y)) #Split into train and validation X_train, X_valid, y_train, y_valid = train_test_split(data, labels, test_size=0.2, shuffle=True) ## More code adapted from the keras reference (https://github.com/keras-team/keras/blob/master/examples/pretrained_word_embeddings.py) # prepare embedding matrix ## EMBEDDING_DIM = ## seems to need to match the embeddings_index dimension EMBEDDING_DIM = embeddings_index.get('a').shape[0] num_words = min(MAX_NUM_WORDS, len(word_index)) + 1 found_words = 0 not_found = 0 embedding_matrix = np.zeros((num_words, EMBEDDING_DIM)) for word, i in word_index.items(): if i > MAX_NUM_WORDS: continue embedding_vector = embeddings_index.get(word) ## This references the loaded embeddings dictionary if embedding_vector is not None: # words not found in embedding index will be all-zeros. embedding_matrix[i] = embedding_vector found_words +=1 else : not_found+1 # load pre-trained word embeddings into an Embedding layer # note that we set trainable = False so as to keep the embeddings fixed embedding_layer = Embedding(num_words, EMBEDDING_DIM, embeddings_initializer=Constant(embedding_matrix), input_length=MAX_SEQUENCE_LENGTH, trainable=False) ## Code from: https://medium.com/@sabber/classifying-yelp-review-comments-using-cnn-lstm-and-pre-trained-glove-word-embeddings-part-3-53fcea9a17fa ## To create and visualize a model model = Sequential() model.add(Embedding(num_words, embed_dimension, input_length=embed_dimension, weights= [embedding_matrix], trainable=False)) model.add(Dropout(rate = 0.2)) model.add(Conv1D(128, 2, activation='relu')) model.add(MaxPooling1D(pool_size=4)) model.add(LSTM(embed_dimension)) model.add(Dense(2, activation='softmax')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) # Finally training the model history = model.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=num_epochs) #getting the accuracy score score = model.evaluate(data_test, labels_test) test_score = score[1] print('accuracy is: ',test_score) #Get predictions and creating a new dataset of predicted labels including tweets and exporting it to csv ynew = model.predict_classes(data_test) df = pd.DataFrame({'tweets':test_set[tweet_column],'actual_tweet':actual_tweets_test,'predicted':ynew,'label':test_set[labels_column]}) # test_dataset.to_csv('predicted_test_set.csv',index=False) lst = list() for i in range(len(actual_tweets_test)): a = {} a["tweets"] = actual_tweets_test[i] a["fanboy"] = int(ynew[i]) lst.append(a) with open('files/predicted_tweets.json', 'w', encoding='utf-8') as f: json.dump(lst, f, ensure_ascii=False, indent=4) print("created 'predicted_tweets.json' in 'files'") #create json file # df = pd.read_csv('predicted_test_set.csv') tweets = test_set[tweet_column] wordsarray = [] for i in range(len(tweets)): wordsarray += tweets[i] c=Counter(wordsarray) sorted_d = sorted(c.items(), key=lambda x: x[1], reverse=True) lst = list() for i in range(len(sorted_d)): a = {} a["text"] = sorted_d[i][0] a["size"] = sorted_d[i][1] lst.append(a) with open('files/words.json', 'w', encoding='utf-8') as f: json.dump(lst, f, ensure_ascii=False, indent=4) print("created 'words.json' in 'files'") #getting uniq words uniq_words = list(set(wordsarray)) #making a csv containing each word with its label words=[] relateds = [] fanboys = [] fanboy_precentage = [] word_labels = [] for i in range(len(uniq_words)): related_count=0 fanboy_count=0 word = uniq_words[i] for j in range(len(df)): if word in df[tweet_column][j]: if (df['predicted'][j] ==1): related_count +=1 if (df['predicted'][j] ==0): fanboy_count +=1 words.append(word) relateds.append(related_count) fanboys.append(fanboy_count) fanboy_precentage.append(int((fanboy_count/(fanboy_count+related_count))*100)) if (fanboy_precentage[i]>50): word_labels.append(1) else: word_labels.append(0) words_df = pd.DataFrame({'word':words,'related_count':relateds, 'fanboy_count':fanboys, 'fanboy_precentage':fanboy_precentage, 'label':word_labels}) words_df.to_csv('files/word_predictions.csv',index=False) print("created 'word_predictions.csv' in 'files'") return(test_score)
def CNN_model(X_training, X_test, y_training, y_test, n_epochs=100, batch_size=256, model_name='model', history_file='model_accuracies.csv', conf_matrix=False, accuracy_report=False): while os.path.isfile(model_name + ".h5"): model_name = model_name + str(1) csv_logger = CSVLogger('model_training.log') plot_losses = my_callbacks.PlotLosses() metrics = my_callbacks.Metrics() f1_accuracy = my_callbacks.F1Metric() earlystop = EarlyStopping(monitor='val_acc', patience=10, mode='auto') adam = Adam(lr=0.00001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False) model = Sequential() model.add( Conv1D(32, 9, input_shape=(X_training.shape[1], 1), kernel_initializer=he_normal(seed=12), activation='relu', W_regularizer=l1_l2(0.01))) model.add(BatchNormalization()) model.add(MaxPooling1D(1)) model.add( Conv1D(32, 3, activation='relu', W_regularizer=l1_l2(0.01), padding='same')) model.add(MaxPooling1D(3, padding='same')) model.add(BatchNormalization()) model.add( Conv1D(9, 3, activation='relu', W_regularizer=l1_l2(0.01), padding='same')) model.add(MaxPooling1D(3, padding='same')) model.add(BatchNormalization()) model.add( Conv1D(9, 3, activation='relu', W_regularizer=l1_l2(0.01), padding='same')) model.add(MaxPooling1D(3, padding='same')) model.add(BatchNormalization()) model.add(Flatten()) model.add(Dense(512, activation='relu')) model.add(Dropout(0.2)) model.add(Dense(256, activation='relu')) model.add(BatchNormalization()) model.add(Dropout(0.2)) model.add(Dense(17, activation='softmax', input_shape=(1, ))) model.compile(optimizer=adam, loss='sparse_categorical_crossentropy', metrics=['accuracy']) print('starts fitting model ...') start = time.time() model.fit(X_training, y_training, batch_size=batch_size, epochs=n_epochs, validation_data=(X_test, y_test), callbacks=[metrics, csv_logger]) end = time.time() delta = end - start print('fitting time: ', delta) print('starts predicting model ...') start_prediction = time.time() model.predict(X_test) end_prediction = time.time() delta_prediction = end_prediction - start_prediction print('prediction time: ', delta_prediction) y_pred = model.predict_classes(X_test) model.save_weights(model_name + ".h5") print('weights saved to disk') model_json = model.to_json() with open(model_name + '.json', 'w') as json_file: json_file.write(model_json) print('model saved to disk') with open(history_file, 'a', newline='') as history: writer = csv.writer(history, delimiter=';') writer.writerow([ model_name, accuracy_score(y_test, y_pred), cohen_kappa_score(y_test, y_pred), f1_score(y_test, y_pred, average='weighted'), delta, delta_prediction ]) if conf_matrix: cm_filename = model_name + '_cm.csv' cm = pd.DataFrame(confusion_matrix(y_test, y_pred)) cm.to_csv(cm_filename) if accuracy_report: raport_filename = model_name + '_report.csv' report = classification_report(y_test, y_pred) with open(raport_filename, 'w') as acc_report: acc_report.write(report) return y_pred
def get_model(self): #from keras_self_attention import SeqSelfAttention from keras.layers import Dense from keras import backend from keras.layers import TimeDistributed, Flatten, Conv1D, MaxPooling1D, GlobalMaxPooling1D from keras.callbacks import ModelCheckpoint # Model variables backend.clear_session() n_hidden = 50 gradient_clipping_norm = 1.25 batch_size = 128 n_epoch = 100 def exponent_neg_manhattan_distance(left, right): ''' Helper function for the similarity estimate of the LSTMs outputs''' return K.exp(-K.sum(K.abs(left - right), axis=1, keepdims=True)) # The visible layer left_input = Input(shape=(self.max_seq_length, ), dtype='int32') right_input = Input(shape=(self.max_seq_length, ), dtype='int32') embedding_layer = Embedding(86002, self.embedding_dim, input_length=self.max_seq_length) # Embedded version of the inputs #encoded_left = embedding_layer(left_input) #encoded_right = embedding_layer(right_input) # Since this is a siamese network, both sides share the same LSTM # shared_lstm = LSTM(n_hidden,return_sequences=True) # left_output = shared_lstm(encoded_left) # right_output = shared_lstm(encoded_right) encoded_left = embedding_layer(left_input) encoded_right = embedding_layer(right_input) ## conv12 conv = Conv1D(filters=1500, kernel_size=4, padding='valid', activation='sigmoid', strides=1) encoded_left = conv(encoded_left) encoded_right = conv(encoded_right) pooling = MaxPooling1D(pool_size=4) encoded_left = pooling(encoded_left) encoded_right = pooling(encoded_right) conv2 = Conv1D(filters=3000, kernel_size=4, padding='valid', activation='sigmoid', strides=1) encoded_left = conv2(encoded_left) encoded_right = conv2(encoded_right) pooling2 = GlobalMaxPooling1D() encoded_left = pooling2(encoded_left) encoded_right = pooling2(encoded_right) dense = Dense(256) left_output = dense(encoded_left) right_output = dense(encoded_right) # Calculates the distance as defined by the MaLSTM model malstm_distance = Lambda( function=lambda x: exponent_neg_manhattan_distance(x[0], x[1]), output_shape=lambda x: (x[0][0], 1))([left_output, right_output]) # Pack it all up into a model malstm = Model([left_input, right_input], [malstm_distance]) return malstm
def train_and_save_keras_tokenizer_and_nn_model(x_train, y_train, x_test, use_cnn=True, dropout_variation=2): """Train and save Keras tokenizer and Keras NN model. Default parameters were selected to produce what is likely a reasonably optimal model that is less prone to overfitting. Keyword arguments: x_train: Should be a series that's already been pre-preocessed: html->text, lowercase, remove punct./#s It is used: 1) Along with x_test to fit the tokenizer (converts reviews into sequences of integers corresponding to word-frequency rankings). 2) Along with y_train to fit the classification neural net. y_train: Should be a series of sentiment values. It is used along with x_train to fit the classification neural net. x_test: Should be a series that's already been pre-preocessed: html->text, lowercase, remove punct./#s It is also used along with x_train to fit the tokenizer (converts reviews into sequences of integers corresponding to word-frequency rankings). use_cnn: Used to indicate whether or not to include CNN layer (with the corresponding maxpool layer) in the trained neural net. Generally found to produce worse results during tests. Default: False. When set to False: At 3 epochs, dropout_variation=0 and 20% of training data going to validation, training acc was .9001 and validation acc was .8710. With dropout_variation=2: 0.8596/0.8720 (Theano). Setting to True: At 3 epochs, dropout_variation=0 produced training acc of .9043 and validation acc of .8550. With dropout_variation=2, 0.8937/0.8792 (Theano). Test AUC: 0.952308125317 (TensorFlow), 0.953867095363 (Theano). At 10 epochs, dropout_variation=0 produced training acc of .9843 and validation acc of .8625. (overfitting). With dropout_variation=2, 0.9298/0.8658 (Theano). Test AUC 0.954923156362 in TensorFlow, 0.947220311445 in Theano. dropout_variation: Indicates how to use dropout layers, if any, in the trained neural net as follows: Unless specified otherwise, training and validation accuracy scores are reported in the form training acc/validation acc, where number of epochs is 3, dropout_variation is 0, 20% of training data is going to validation, and use_cnn is False. Under those conditions, running time on a modern local machine took 14 to 22 minutes. 0 = No dropouts. Generally found to produce the best results. .9001/.8710. When use_cnn is True: .9043/.8550. Test AUC: 0.946452427092 (TF) / 0.938564451483 (Theano) When use_cnn is True at 10 epochs: .9843/.8625 (overfitting). When use_cnn is False at 10 epochs: .9782/.8585 (overfitting) 1 = One p=0.5 dropout after GRU layer. .9081/.8505. Based on suggestions from: http://www.icfhr2014.org/wp-content/uploads/2015/02/ICFHR2014-Bluche.pdf 2 = p=0.2 dropout between layers: on input to Embedding layer, after Embedding layer, and after GRU layer. Note that one may want to experiment with adding dropout after the optional CNN layer and/or its related MaxPool layer. No CNN: 0.8596/0.8720 (Theano). With 10 epochs: 0.9301/0.8802 Test AUC: 0.948833044597 (TF) / 0.949987991306 (Theano) **With CNN: 3 epochs: 0.8937/0.8792 (Theano), Test AUC: 0.952308125317 (TF), 0.953867095363 (Theano). 10 epochs: 0.9298/0.8658 (Theano). Test AUC: 0.954923156362 (TF), 0.947220311445 (Theano) Based on suggestions from link below. 3 = Applies p=0.2 dropout to input to embeddings and p=0.2 dropout_W/U to input gates and recurrent connections respectively in GRU layer. 0.8246/0.8465 (theano) With 10 epochs: 0.8929/0.8353 Based on suggestions from link below. Default: 2. Approach largely based on: http://machinelearningmastery.com/sequence-classification-lstm-recurrent-neural-networks-python-keras/ """ assert dropout_variation in (0, 1, 2, 3), "dropout_variation is not 0, 1, 2, or 3" np.random.seed(SEED) # Note that we assume we have train/test reviews preprocessed: html->text, lowercase, punct/#s removed # Note that in https://github.com/IndicoDataSolutions/Passage/blob/master/examples/sentiment.py they only # extract text from html, lowercase and strip (no punctuation/#s removal) in case one wants to experiment # with different pre-processing variations. # Tokenization: Assign each word in the reviews an ID corresponding to its frequency rank # Note only top 5000 most frequent words are included num_most_freq_words_to_include = 5000 tokenizer = KTokenizer(nb_words=num_most_freq_words_to_include) # Need to convert unicode strings into ascii to avoid tokenization errors # Note that we use both training and test data to fit the tokenizer, since we're not making use of the # test target values, and could theoretically apply this approach at least if the sentiment prediction process # is done in batches offline. train_reviews_list = [s.encode('ascii') for s in x_train.tolist()] test_reviews_list = [s.encode('ascii') for s in x_test.tolist()] all_reviews_list = train_reviews_list + test_reviews_list tokenizer.fit_on_texts(all_reviews_list) # Tokenize reviews where the result is a [review1 tokenized into list of word-freq-ranks, review2 tokenized into..] train_reviews_tokenized = tokenizer.texts_to_sequences(train_reviews_list) # Commented out since we won't evaluate at the end of this function # test_reviews_tokenized = tokenizer.texts_to_sequences(test_reviews_list) # Truncate and pad input sequences, so that we only cover up to the first 500 tokens per review # This ensures all reviews have a representation of the same size, which is needed for the Keras NN to process them. x_train = sequence.pad_sequences(train_reviews_tokenized, maxlen=MAX_REVIEW_LENGTH_FOR_KERAS_RNN) # Commented out since we won't evaluate at the end of this function # x_test = sequence.pad_sequences(test_reviews_tokenized, maxlen=MAX_REVIEW_LENGTH_FOR_KERAS_RNN) # Create the neural net model, which roughly consists of the following: # Embedding layer: Ensures each review is represented as a 32-entry vector whose values typically correspond to # semantic relationship with other words appearing in the reviews. # CNN + MaxPool layer: Helps turn the representation corresponding to a sequence of words into a higher-level # representation corresponding to a sequence of multiple adjacent words. # Let's call this the conceptual sequence representation. # GRU (RNN) layer: Helps turn the conceptual sequence representation into one corresponding to the sequential # relationship of elements in that representation. # Let's call this the conceptual sequence relationship representation. # Dense layer: Fully connected layer, which with the help of the sigmoid function, can turn the # conceptual sequence relationship representation into a binary classification probability. # Depending on dropout_variation, dropout may be used in different parts of the neural net to help reduce # overfitting. # Indicate it's a sequential type of model - linear stack of layers model = Sequential() # Decide on dropout to apply (if any) to the input of the Embedding layer initial_dropout = 0.0 # Default KEmbedding dropout value (no dropout) if dropout_variation == 2 or dropout_variation == 3: initial_dropout = 0.2 # Create a 32-entry word embedding - ie: each word will be mapped into being a 32-entry word embedding vector # Words beyond the most frequent num_most_freq_words_to_include (5000) or beyond the first # MAX_REVIEW_LENGTH_FOR_KERAS_RNN (500) in a review are discarded. embedding_vector_length = 32 # Note we provide KEmbeddings with size of vocab (num_most_freq_words_to_include), # size of embedding vector (embedding_vector_length), # length of each input sequence (MAX_REVIEW_LENGTH_FOR_KERAS_RNN), and dropout to apply to input # Outputs a 3D Tensor of shape (# of samples, sequence/review length, embedding vector length) model.add( KEmbedding(num_most_freq_words_to_include, embedding_vector_length, input_length=MAX_REVIEW_LENGTH_FOR_KERAS_RNN, dropout=initial_dropout)) if dropout_variation == 2: model.add(Dropout(0.2)) # Incorporate CNN and corresponding MaxPool layer if use_cnn: model.add( Convolution1D(nb_filter=32, filter_length=3, border_mode='same', activation='relu')) model.add( MaxPooling1D(pool_length=2)) # Cuts representation size in half # Add GRU layer of size 100 units # Set dropout values for input units for input gates (dropout_W), for input units for recurrent connections # (dropout_U). Default values (when dropout_variation is 0) is 0.0. dropout_W = 0.0 dropout_U = 0.0 if dropout_variation == 3: dropout_W = 0.2 dropout_U = 0.2 model.add(GRU(100, dropout_W=dropout_W, dropout_U=dropout_U)) # Add potential dropout: This is based on recommendation for p=0.5 and placement after each GRU/LSTM layer: # http://www.icfhr2014.org/wp-content/uploads/2015/02/ICFHR2014-Bluche.pdf if dropout_variation == 1: model.add(Dropout(0.5)) elif dropout_variation == 2: model.add(Dropout(0.2)) # Add layer to get final probability prediction model.add(KDense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) print(model.summary()) # Run 3 epochs. More is said to overfit - eg training acc increasing while validation acc stagnating or declining # after the 3rd epoch. # Note that while tuning, validation_split parameter was set to 0.2 to use last 20% of training data # to report validation score. It seems that if that parameter is set as such, only 80% of x/y_train would be used to # train. model.fit(x_train, y_train, nb_epoch=3, batch_size=64) # , validation_split=0.2) # Save model model.save(KERAS_NN_MODEL) # When Theano is used as the backend, an exception may occur when attempting to load a model that can be resolved by # deleting "optimizer_weights" in the model H5 file - see https://github.com/fchollet/keras/issues/4044 if backend.backend() == "theano": with h5py.File(KERAS_NN_MODEL, "r+") as f: del f["optimizer_weights"] _ = joblib.dump(tokenizer, KERAS_TOKENIZER, compress=9)
#z=Attention_word_weight(8, 64)([embeddings, embeddings, embeddings]) #z=Attention_weight(8,128)([z,z,z]) #z=embeddings z = Attention_word(8, 32)([embeddings, embeddings, embeddings]) z = Attention(8, 32)([z, z, z]) conv_blocks = [] for sz in filter_sizes: conv = Convolution1D(filters=num_filters, kernel_size=sz, padding="valid", activation="relu", kernel_regularizer=regularizers.l2(0.01), strides=1)(z) #conv = Capsule(2, 32, 3, True)(conv) conv = MaxPooling1D(pool_size=2)(conv) conv = Flatten()(conv) conv_blocks.append(conv) z = Concatenate()(conv_blocks) if len(conv_blocks) > 1 else conv_blocks[0] z = Dropout(dropout_prob[1])(z) z = Dense(hidden_dims, activation="relu")(z) model_output = Dense(1, activation="sigmoid")(z) model = Model(model_input, model_output) adam = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0,
def cnn(x_train, x_test, y_train, y_test, batch_size, epochs, filters, kernel_size, stride=1): ''' THIS 4 PARAMETERS ARE ALREADY NORMALIZED (MIN-MAX NORMALIZATION) :param x_train: samples used in train :param x_test: samples used in test :param y_train: targets used in train :param y_test: targets used in test :param batch_size: integer that represents batch size :param epochs: integer that represents epochs :param filters: integer --> dimensionality of output space(number of output filters in the convolution) :param kernel_size: integer of tuple with only one integer (integer, ) --> length of convolution window :param stride: by default=1, integer represents stride length of convolution :return: score of model: accuracy ''' try: #I NEED TO RESHAPE DATA TO: (number samples, time step ,features) --> for this example, time_step is 1, and the reshape format is : (samples, features) #input shape in channels last --> (time steps, features), if time step is 1, then (None, features) --> https://keras.io/layers/convolutional/ x_train = x_train.reshape( x_train.shape[0], x_train.shape[1], 1) #TIME STEPS = FEATURES AND FEATURES=TIME STEPS x_test = x_test.reshape(x_test.shape[0], x_test.shape[1], 1) #I NEED TO CONVERT TARGETS INTO BINARY CLASS, TO PUT THE TARGETS INTO SAME RANGE OF ACTIVATION OF FUNCTIONS LIKE: SOFTMAX OR SIGMOID y_train = keras.utils.to_categorical(y_train, 3) y_test = keras.utils.to_categorical(y_test, 3) #EXPLANATION BETWEEN PADDING SAME AND VALID: https://stackoverflow.com/questions/37674306/what-is-the-difference-between-same-and-valid-padding-in-tf-nn-max-pool-of-t #MODEL CREATION input_shape = (x_train.shape[1], 1) model = Sequential() model.add( Conv1D(filters=filters, kernel_size=kernel_size, input_shape=input_shape, padding='valid')) #FIRST CNN model.add(Activation('relu')) model.add(BatchNormalization()) model.add(MaxPooling1D(strides=1, padding='same') ) #I maintain the default value --> max pool matrix (2.2) model.add(Flatten()) model.add(Dense(3)) #FULL CONNECTED LAYER --> OUTPUT LAYER 3 OUTPUTS model.add(Activation('softmax')) model.summary() #PRINT SUMMARY OF MODEL #COMPILE MODEL model.compile( optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'] ) # CROSSENTROPY BECAUSE IT'S MORE ADEQUATED TO MULTI-CLASS PROBLEMS #FIT MODEL historyOfTraining = model.fit( x=x_train, y=y_train, batch_size=batch_size, epochs=epochs, ) predict = model.predict(x=x_test, batch_size=batch_size) print(predict) print(y_test) predict = (predict == predict.max(axis=1)[:, None]).astype(int) print(predict) numberRights = 0 for i in range(len(y_test)): indexMaxValue = numpy.argmax(predict[i], axis=0) if indexMaxValue == numpy.argmax( y_test[i], axis=0 ): # COMPARE INDEX OF MAJOR CLASS PREDICTED AND REAL CLASS numberRights = numberRights + 1 hitRate = numberRights / len( y_test) # HIT PERCENTAGE OF CORRECT PREVISIONS return hitRate except: raise
for j in range(0, depth): X_test_tmp[i, :, j] = X_test[i, :] X_test = X_test_tmp ### Set up CNN model ## inp = Input( shape=(width, depth)) # depth goes last in TensorFlow back-end (first in Theano) # Conv [32] -> Conv [32] -> Pool (with dropout on the pooling layer) conv_1 = Convolution1D(conv_depth_1, (kernel_size), padding='same', activation='relu')(inp) conv_2 = Convolution1D(conv_depth_1, (kernel_size), padding='same', activation='relu')(conv_1) pool_1 = MaxPooling1D(pool_size=(pool_size))(conv_2) drop_1 = Dropout(drop_prob_1)(pool_1) # Conv [64] -> Conv [64] -> Pool (with dropout on the pooling layer) conv_3 = Convolution1D(conv_depth_2, (kernel_size), padding='same', activation='relu')(drop_1) conv_4 = Convolution1D(conv_depth_2, (kernel_size), padding='same', activation='relu')(conv_3) pool_2 = MaxPooling1D(pool_size=(pool_size))(conv_4) drop_2 = Dropout(drop_prob_1)(pool_2) # Now flatten to 1D, apply FC -> ReLU (with dropout) -> softmax flat = Flatten()(drop_2) hidden = Dense(hidden_size_2, activation='relu')(flat) drop_3 = Dropout(drop_prob_2)(hidden) out = Dense(num_classes, activation='softmax')(drop_3)
model4.add(Dropout(0.2)) model4.add(BatchNormalization()) models.append(model4) model_inputs.append(x2) model_inputs_test.append(x2_test) # model5 if opts.siamese == 0: if opts.attention == 1: model5_ip = Input(shape=(40, )) x5 = Embedding(len(word_index) + 1, 300, input_length=40, dropout=0.2)(model5_ip) if opts.cnn == 1: x5 = Conv1D(64, 5, padding='valid', activation='relu', strides=1)(x5) x5 = MaxPooling1D(pool_size=4)(x5) if opts.bilstm == 1: if opts.regularize == 1: x5 = Bidirectional( LSTM(300, dropout_W=0.2, dropout_U=0.2, return_sequences=True, W_regularizer=l2(0.01)))(x5) else: x5 = Bidirectional( LSTM(300, dropout_W=0.2, dropout_U=0.2, return_sequences=True))(x5) else:
def build_point_net(input_shape=(2048, 3), output_shape=10, refined_points=25, mode="segmentation", method="original"): assert mode in ["classification", "segmentation"] assert method in ["original", "refined"] features = Input(input_shape, name="input_features") if (method == "refined"): indexes = Input((input_shape[0], refined_points), dtype="int32", name="input_features") print(indexes) def multiply(input_tensors): dot = K.batch_dot(input_tensors[0], input_tensors[1]) return dot transform3 = build_T_net((input_shape[0], input_shape[1]), name="T_net_3")(features) transformed3 = Lambda(multiply, name="transformed3")([features, transform3]) def gather(input_tensors): gathered = K.gather(input_tensors[0], input_tensors[1]) print(gathered) return gathered if (method == "refined"): around = Lambda(gather, name="transformed3")([transformed3, indexes]) print(around) conv10 = Conv1D(filters=64, kernel_size=(1), padding='valid', strides=(1), activation="relu", name="conv10")(transformed3) conv11 = Conv1D(filters=64, kernel_size=(1), padding='valid', strides=(1), activation="relu", name="conv11")(conv10) transform64 = build_T_net((input_shape[0], 64), name="T_net_64")(conv11) transformed64 = Lambda(multiply, name="transformed64")([conv11, transform64]) conv20 = Conv1D(filters=64, kernel_size=(1), padding='valid', strides=(1), activation="relu", name="conv20")(transformed64) conv21 = Conv1D(filters=128, kernel_size=(1), padding='valid', strides=(1), activation="relu", name="conv21")(conv20) conv22 = Conv1D(filters=1024, kernel_size=(1), padding='valid', strides=(1), activation="relu", name="conv22")(conv21) global_features = MaxPooling1D(pool_size=input_shape[0], strides=None, padding="valid")(conv22) global_features = Flatten()(global_features) if (mode == "classification"): dense0 = Dense(512, activation="relu")(global_features) dense1 = Dense(256, activation="relu")(dense0) dense2 = Dense(output_shape, activation="softmax")(dense1) model = Model(inputs=features, outputs=dense2) return model elif (mode == "segmentation"): input_segmentation = Concatenate()([ transformed3, conv21, transformed64, RepeatVector(input_shape[0])(global_features) ]) conv30 = Conv1D(filters=512, kernel_size=(1), padding='valid', strides=(1), activation="relu", name="conv30")(input_segmentation) conv31 = Conv1D(filters=256, kernel_size=(1), padding='valid', strides=(1), activation="relu", name="conv31")(conv30) conv32 = Conv1D(filters=128, kernel_size=(1), padding='valid', strides=(1), activation="relu", name="conv32")(conv31) conv33 = Conv1D(filters=128, kernel_size=(1), padding='valid', strides=(1), activation="relu", name="conv33")(conv32) conv34 = Conv1D(filters=output_shape, kernel_size=(1), padding='valid', strides=(1), activation="softmax", name="conv34")(conv33) model = Model(inputs=features, outputs=conv34) return model
dilation_rate=2)(embedding_output) cnn5 = Conv1D(filters=100, kernel_size=5, strides=1, padding="same", activation="tanh", dilation_rate=2)(embedding_output) cnn_output = concatenate([cnn2, cnn3, cnn4], axis=2) # cnn_output1 = Conv1D(filters=300, kernel_size=4, strides=1, padding="same", activation="relu")(cnn_output) # # cnn_output = keras.layers.add([cnn_output, cnn_output1]) cnn_output = MaxPooling1D(pool_size=FIXED_SIZE, strides=FIXED_SIZE, padding="valid")(cnn_output) # cnn_output = BatchNormalization()(cnn_output) cnn_output = Lambda(lambda x: tf.squeeze(x, axis=1))(cnn_output) # cnn1 = piecewise_maxpool_layer(filter_num=128, fixed_size=FIXED_SIZE)([cnn1, e1_pos, e2_pos]) # cnn2 = piecewise_maxpool_layer(filter_num=128, fixed_size=FIXED_SIZE)([cnn2, e1_pos, e2_pos]) # cnn3 = piecewise_maxpool_layer(filter_num=128, fixed_size=FIXED_SIZE)([cnn3, e1_pos, e2_pos]) # cnn4 = piecewise_maxpool_layer(filter_num=128, fixed_size=FIXED_SIZE)([cnn4, e1_pos, e2_pos]) # # cnn1 = MaxPooling1D(pool_size=FIXED_SIZE, strides=1, padding="same")(cnn1) # cnn2 = MaxPooling1D(pool_size=FIXED_SIZE, strides=1, padding="same")(cnn2) # cnn3 = MaxPooling1D(pool_size=FIXED_SIZE, strides=1, padding="same")(cnn3) # cnn4 = MaxPooling1D(pool_size=FIXED_SIZE, strides=1, padding="same")(cnn4)
embedded_sequences = embedding_layer(sequence_input) embedded_sequences = BatchNormalization(epsilon=1e-08, mode=0, axis=1, momentum=0.9, weights=None)(embedded_sequences) x = Conv1D(64, num_filter, activation='relu')(embedded_sequences) x = Conv1D(64, num_filter, activation='relu')(x) x = Conv1D(64, num_filter, activation='relu')(x) x = Conv1D(64, num_filter, activation='relu')(x) x = Conv1D(64, num_filter, activation='relu')(x) x = Conv1D(64, num_filter, activation='relu')(x) x = Conv1D(64, num_filter, activation='relu')(x) x = Conv1D(64, num_filter, activation='relu')(x) #x = BatchNormalization(epsilon=1e-08, mode=0, axis=1, momentum=0.9, weights=None)(x) x = MaxPooling1D(2)(x) # print('Conv1 output shape:', x.shape) # print('Maxpooling1 output shape:', x.shape) x = Conv1D(128, num_filter, activation='relu')(x) x = Conv1D(128, num_filter, activation='relu')(x) x = Conv1D(128, num_filter, activation='relu')(x) x = Conv1D(128, num_filter, activation='relu')(x) x = Conv1D(128, num_filter, activation='relu')(x) x = Conv1D(128, num_filter, activation='relu')(x) x = Conv1D(128, num_filter, activation='relu')(x) x = Conv1D(128, num_filter, activation='relu')(x) #x = BatchNormalization(epsilon=1e-08, mode=0, axis=1, momentum=0.9, weights=None)(x) x = MaxPooling1D(2)(x) # print('Conv2 output shape:', x.shape)
# load pre-trained word embeddings into an Embedding layer # note that we set trainable = False so as to keep the embeddings fixed embedding_layer = Embedding(num_words, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH, trainable=False) print('Training model.') # train a 1D convnet with global maxpooling sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') embedded_sequences = embedding_layer(sequence_input) x = Conv1D(128, 5, activation='relu')(embedded_sequences) x = MaxPooling1D(5)(x) x = Conv1D(128, 5, activation='relu')(x) x = MaxPooling1D(5)(x) x = Conv1D(128, 5, activation='relu')(x) x = GlobalMaxPooling1D()(x) x = Dense(128, activation='relu')(x) preds = Dense(dir_inx, activation='softmax')(x) model = Model(sequence_input, preds) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['acc']) model.fit(x_train, y_train, batch_size=128,
def get_model(emb_matrix): ## Headline ## headline_input = Input(shape=(max_length, )) emb = Embedding(vocab_size + 1, emb_size, input_length=max_length, weights=[emb_matrix], trainable=True)(headline_input) emb = SpatialDropout1D(.2)(emb) conv = Conv1D(filters=64, kernel_size=5, padding='same', activation='selu')(emb) conv = MaxPooling1D(pool_size=3)(conv) text_rnn = LSTM(200, dropout=0.3, recurrent_dropout=0.3, return_sequences=False)(conv) text_rnn = Activation('selu')(text_rnn) text_rnn = BatchNormalization()(text_rnn) # text_rnn = LSTM(300, dropout=0.3, recurrent_dropout=0.3)(text_rnn) # text_rnn = Activation('relu')(text_rnn) # text_rnn = BatchNormalization()(text_rnn) ## Source ## meta_input = Input(shape=(len(all_sources) + 7, )) ## Combined ## merged = concatenate([text_rnn, meta_input]) final_dense = Dense(100)(merged) final_dense = Activation('selu')(final_dense) final_dense = BatchNormalization()(final_dense) final_dense = Dropout(0.5)(final_dense) final_dense = Dense(100)(merged) final_dense = Activation('selu')(final_dense) final_dense = BatchNormalization()(final_dense) final_dense = Dropout(0.5)(final_dense) if model_type == 'regression': pred_dense = Dense(1)(final_dense) out = pred_dense model = Model(inputs=[headline_input, meta_input], outputs=out) model.compile(optimizer=RMSprop(lr=0.001), loss='mse', metrics=[correct_sign_acc]) else: pred_dense = Dense(2)(final_dense) out = Activation('softmax')(pred_dense) model = Model(inputs=[headline_input, meta_input], outputs=out) model.compile(optimizer=RMSprop(lr=0.001), loss='categorical_crossentropy', metrics=['acc']) return model