def do_cnn_doc2vec(trainX, testX, trainY, testY): global max_features print "CNN and doc2vec" #trainX = pad_sequences(trainX, maxlen=max_features, value=0.) #testX = pad_sequences(testX, maxlen=max_features, value=0.) # Converting labels to binary vectors trainY = to_categorical(trainY, nb_classes=2) testY = to_categorical(testY, nb_classes=2) # Building convolutional network network = input_data(shape=[None,max_features], name='input') network = tflearn.embedding(network, input_dim=1000000, output_dim=128,validate_indices=False) branch1 = conv_1d(network, 128, 3, padding='valid', activation='relu', regularizer="L2") branch2 = conv_1d(network, 128, 4, padding='valid', activation='relu', regularizer="L2") branch3 = conv_1d(network, 128, 5, padding='valid', activation='relu', regularizer="L2") network = merge([branch1, branch2, branch3], mode='concat', axis=1) network = tf.expand_dims(network, 2) network = global_max_pool(network) network = dropout(network, 0.8) network = fully_connected(network, 2, activation='softmax') network = regression(network, optimizer='adam', learning_rate=0.001, loss='categorical_crossentropy', name='target') # Training model = tflearn.DNN(network, tensorboard_verbose=0) model.fit(trainX, trainY, n_epoch=5, shuffle=True, validation_set=(testX, testY), show_metric=True, batch_size=100,run_id="review")
def transform_embedded_sequences(self, embedded_sequences): drop_1, drop_2 = self.dropout_rates net = dropout(embedded_sequences, drop_1) conv_blocks = [] for sz in self.filter_sizes: conv = conv_1d(net, nb_filter=self.num_filters, filter_size=sz, padding="valid", activation="relu", regularizer="L2") conv_blocks.append(conv) net = merge(conv_blocks, mode='concat', axis=1) if len(conv_blocks) > 1 else conv_blocks[0] net = tf.expand_dims(net, 2) net = global_max_pool(net) net = dropout(net, drop_2) model_output = fully_connected(net, self.class_count, activation="softmax") return model_output
def get_cnn_model(max_len, volcab_size): # Building convolutional network network = tflearn.input_data(shape=[None, max_len], name='input') network = tflearn.embedding(network, input_dim=volcab_size, output_dim=64) branch1 = conv_1d(network, 128, 3, padding='valid', activation='relu', regularizer="L2") branch2 = conv_1d(network, 128, 4, padding='valid', activation='relu', regularizer="L2") branch3 = conv_1d(network, 128, 5, padding='valid', activation='relu', regularizer="L2") network = merge([branch1, branch2, branch3], mode='concat', axis=1) network = tf.expand_dims(network, 2) network = global_max_pool(network) network = dropout(network, 0.5) network = fully_connected(network, 3, activation='softmax') network = regression(network, optimizer='adam', learning_rate=0.001, loss='categorical_crossentropy', name='target') model = tflearn.DNN(network, tensorboard_verbose=0) return model
def do_cnn_word2vec(trainX, testX, trainY, testY): global max_features print "CNN and word2vec" #trainX = pad_sequences(trainX, maxlen=max_document_length, value=-1.) #testX = pad_sequences(testX, maxlen=max_document_length, value=0.) # Converting labels to binary vectors trainY = to_categorical(trainY, nb_classes=2) testY = to_categorical(testY, nb_classes=2) # Building convolutional network network = input_data(shape=[None,max_features], name='input') network = tflearn.embedding(network, input_dim=1000000, output_dim=128) branch1 = conv_1d(network, 128, 3, padding='valid', activation='relu', regularizer="L2") branch2 = conv_1d(network, 128, 4, padding='valid', activation='relu', regularizer="L2") branch3 = conv_1d(network, 128, 5, padding='valid', activation='relu', regularizer="L2") network = merge([branch1, branch2, branch3], mode='concat', axis=1) network = tf.expand_dims(network, 2) network = global_max_pool(network) network = dropout(network, 0.8) network = fully_connected(network, 2, activation='softmax') network = regression(network, optimizer='adam', learning_rate=0.001, loss='categorical_crossentropy', name='target') # Training model = tflearn.DNN(network, tensorboard_verbose=0) model.fit(trainX, trainY, n_epoch=5, shuffle=True, validation_set=(testX, testY), show_metric=True, batch_size=2,run_id="spam")
def build_network(optimizer): net = input_data(shape=[None, length], name='input') net = tflearn.embedding(net, input_dim=caes_ngram_data.dims, output_dim=128) branch1 = conv_1d(net, 128, 3, padding='valid', activation='relu', regularizer="L2") branch2 = conv_1d(net, 128, 4, padding='valid', activation='relu', regularizer="L2") branch3 = conv_1d(net, 128, 5, padding='valid', activation='relu', regularizer="L2") net = merge([branch1, branch2, branch3], mode='concat', axis=1) net = tf.expand_dims(net, 2) net = global_max_pool(net) net = dropout(net, 0.33) net = fully_connected(net, 6, activation='softmax') net = regression(net, optimizer=optimizer, learning_rate=0.001, loss='categorical_crossentropy', name='target') return net
def conv_model(network): branch1 = conv_1d(network, 200, 3, padding='valid', activation='relu', regularizer="L2") branch2 = conv_1d(network, 200, 4, padding='valid', activation='relu', regularizer="L2") branch3 = conv_1d(network, 200, 5, padding='valid', activation='relu', regularizer="L2") network = merge([branch1, branch2, branch3], mode='concat', axis=1) network = tf.expand_dims(network, 2) network = global_max_pool(network) network = dropout(network, 0.5) return network
def do_cnn(trainX, trainY,testX, testY): global n_words # Data preprocessing # Sequence padding trainX = pad_sequences(trainX, maxlen=MAX_DOCUMENT_LENGTH, value=0.) testX = pad_sequences(testX, maxlen=MAX_DOCUMENT_LENGTH, value=0.) # Converting labels to binary vectors trainY = to_categorical(trainY, nb_classes=2) testY = to_categorical(testY, nb_classes=2) # Building convolutional network network = input_data(shape=[None, MAX_DOCUMENT_LENGTH], name='input') network = tflearn.embedding(network, input_dim=n_words+1, output_dim=128) branch1 = conv_1d(network, 128, 3, padding='valid', activation='relu', regularizer="L2") branch2 = conv_1d(network, 128, 4, padding='valid', activation='relu', regularizer="L2") branch3 = conv_1d(network, 128, 5, padding='valid', activation='relu', regularizer="L2") network = merge([branch1, branch2, branch3], mode='concat', axis=1) network = tf.expand_dims(network, 2) network = global_max_pool(network) network = dropout(network, 0.5) network = fully_connected(network, 2, activation='softmax') network = regression(network, optimizer='adam', learning_rate=0.001, loss='categorical_crossentropy', name='target') # Training model = tflearn.DNN(network, tensorboard_verbose=0) model.fit(trainX, trainY, n_epoch = 20, shuffle=True, validation_set=(testX, testY), show_metric=True, batch_size=32)
def cnn(self): network = input_data(shape=[None, self.max_document_length], name='input') network = tflearn.embedding(network, input_dim=1000000, output_dim=128) branch1 = conv_1d(network, 128, 3, padding='valid', activation='relu', regularizer="L2") branch2 = conv_1d(network, 128, 4, padding='valid', activation='relu', regularizer="L2") branch3 = conv_1d(network, 128, 5, padding='valid', activation='relu', regularizer="L2") network = merge([branch1, branch2, branch3], mode='concat', axis=1) network = tf.expand_dims(network, 2) network = global_max_pool(network) network = dropout(network, 0.8) network = fully_connected(network, 2, activation='softmax') network = regression(network, optimizer='adam', learning_rate=0.001, loss='categorical_crossentropy', name='target') model = tflearn.DNN(network, tensorboard_verbose=0) return model
def cnn_3_filters(trainX, trainY, valX, valY, testX, input_weights): ''' A CNN with three convolutional layers as in Kim Yoon (Convolutional Neural Networks for Sentence Classification) ''' # Building convolutional network network = input_data(shape=[None, MAX_LENGHT], name='input') network = tflearn.embedding(network, input_dim=input_weights.shape[0], output_dim=input_weights.shape[1], trainable=True, name="EmbeddingLayer") branch1 = conv_1d(network, 128, 3, padding='valid', activation='relu', regularizer="L2") branch2 = conv_1d(network, 128, 4, padding='valid', activation='relu', regularizer="L2") branch3 = conv_1d(network, 128, 5, padding='valid', activation='relu', regularizer="L2") network = merge([branch1, branch2, branch3], mode='concat', axis=1) network = tf.expand_dims(network, 2) network = global_max_pool(network) network = dropout(network, 0.5) network = fully_connected(network, 12, activation='softmax') network = regression(network, optimizer='adam', learning_rate=0.001, loss='categorical_crossentropy', name='target') # Training model = tflearn.DNN(network, tensorboard_verbose=1) # Add embedding weights into the embedding layer embeddingWeights = tflearn.get_layer_variables_by_name("EmbeddingLayer")[0] model.set_weights(embeddingWeights, input_weights) print("Start trianing CNN...") model.fit(trainX, trainY, n_epoch=NB_EPOCHS, validation_set=(valX, valY), shuffle=True, show_metric=True, batch_size=32) y_result = model.predict(testX) return y_result
def do_cnn(x,y): print("start CNN......") global max_document_length print("CNN and tf") trainX, testX, trainY, testY = train_test_split(x, y, test_size=0.4, random_state=0) y_test=testY # 训练、测试数据进行填充和转换,不到最大长度的数据填充0 trainX = pad_sequences(trainX, maxlen=max_document_length, value=0.) testX = pad_sequences(testX, maxlen=max_document_length, value=0.) # Converting labels to binary vectors # 二分类问题,把标记数据二值化 trainY = to_categorical(trainY, nb_classes=2) testY = to_categorical(testY, nb_classes=2) # Building convolutional network network = input_data(shape=[None,max_document_length], name='input') # 三个数量为128,长度分别为3,4,5的一维卷积函数处理数据 network = tflearn.embedding(network, input_dim=1000000, output_dim=128) branch1 = conv_1d(network, 128, 3, padding='valid', activation='relu', regularizer="L2") branch2 = conv_1d(network, 128, 4, padding='valid', activation='relu', regularizer="L2") branch3 = conv_1d(network, 128, 5, padding='valid', activation='relu', regularizer="L2") network = merge([branch1, branch2, branch3], mode='concat', axis=1) network = tf.expand_dims(network, 2) network = global_max_pool(network) network = dropout(network, 0.8) network = fully_connected(network, 2, activation='softmax') network = regression(network, optimizer='adam', learning_rate=0.001, loss='categorical_crossentropy', name='target') # 实例化CNN对象并进行训练数据,训练5轮 model = tflearn.DNN(network, tensorboard_verbose=0) if not os.path.exists(pkl_file): model.fit(trainX, trainY, n_epoch=5, shuffle=True, validation_set=0.1, show_metric=True, batch_size=100,run_id="webshell") model.save(pkl_file) else: model.load(pkl_file) y_predict_list=model.predict(testX) y_predict=[] for i in y_predict_list: print(i[0]) if i[0] > 0.5: y_predict.append(0) else: y_predict.append(1) print('y_predict_list:') print(y_predict_list) print('y_predict:') print(y_predict) #print y_test do_metrics(y_test, y_predict)
def hybrid_header(x, reuse=False): # size = 3 # inputs_shape = x.get_shape().as_list() # with tf.variable_scope('1d-cnn'): # split_array = [] # for t in xrange(S_LEN - 1): # tmp_split = tflearn.conv_1d( # x[:, t:t + 1, :], FEATURE_NUM, size, activation='relu') # tmp_split_flat = tflearn.flatten(tmp_split) # tmp_split_flat = tflearn.layers.normalization.batch_normalization(tmp_split_flat) # split_array.append(tmp_split_flat) # merge_net = tflearn.merge(split_array, 'concat') # _count = merge_net.get_shape().as_list()[1] # out_cnn = tf.reshape(out_cnn # merge_net, [-1, inputs_shape[1], _count / inputs_shape[1]]) # with tf.variable_scope('gru'): # net = tflearn.gru(out_cnn, FEATURE_NUM, return_seq=True) # out_gru = tflearn.gru(net, FEATURE_NUM) # out_gru = tf.expand_dims(out_gru, 1) #conv_1d_net = tflearn.conv_1d(out_gru, FEATURE_NUM, size, activation='relu') #conv_1d_net_flattern = tflearn.flatten(conv_1d_net) with tf.name_scope('1d-cnn'): network_array = [] for p in xrange(S_INFO - 1): branch_array = [] for i in xrange(2, 4): sp_branch = tflearn.conv_1d(x[:, :, p:p + 1], FEATURE_NUM, i, padding='valid', activation='relu', regularizer="L2") branch_array.append(sp_branch) branch = tflearn.merge(branch_array, mode='concat', axis=1) branch = tf.expand_dims(branch, 2) branch = global_max_pool(branch) #branch = tflearn.dropout(branch, 0.5) network_array.append(branch) out_cnn = tflearn.merge(network_array, 'concat') #with tf.name_scope('gru'): # #net = tflearn.gru(x, FEATURE_NUM, return_seq=True) # net = tflearn.gru(x, FEATURE_NUM) # out_gru = tflearn.fully_connected( # net, FEATURE_NUM, activation='relu') # out_gru = tflearn.dropout(out_gru, 0.5) #merge_net = tflearn.merge([out_cnn, out_gru], 'concat') return out_cnn
def do_cnn(x,y): global max_document_length print "CNN and tf" trainX, testX, trainY, testY = train_test_split(x, y, test_size=0.4, random_state=0) y_test=testY trainX = pad_sequences(trainX, maxlen=max_document_length, value=0.) testX = pad_sequences(testX, maxlen=max_document_length, value=0.) # Converting labels to binary vectors trainY = to_categorical(trainY, nb_classes=2) testY = to_categorical(testY, nb_classes=2) # Building convolutional network network = input_data(shape=[None,max_document_length], name='input') network = tflearn.embedding(network, input_dim=1000000, output_dim=128) branch1 = conv_1d(network, 128, 3, padding='valid', activation='relu', regularizer="L2") branch2 = conv_1d(network, 128, 4, padding='valid', activation='relu', regularizer="L2") branch3 = conv_1d(network, 128, 5, padding='valid', activation='relu', regularizer="L2") network = merge([branch1, branch2, branch3], mode='concat', axis=1) network = tf.expand_dims(network, 2) network = global_max_pool(network) network = dropout(network, 0.8) network = fully_connected(network, 2, activation='softmax') network = regression(network, optimizer='adam', learning_rate=0.001, loss='categorical_crossentropy', name='target') model = tflearn.DNN(network, tensorboard_verbose=0) #if not os.path.exists(pkl_file): # Training model.fit(trainX, trainY, n_epoch=5, shuffle=True, validation_set=0.1, show_metric=True, batch_size=100,run_id="webshell") # model.save(pkl_file) #else: # model.load(pkl_file) y_predict_list=model.predict(testX) #y_predict = list(model.predict(testX,as_iterable=True)) y_predict=[] for i in y_predict_list: print i[0] if i[0] > 0.5: y_predict.append(0) else: y_predict.append(1) print 'y_predict_list:' print y_predict_list print 'y_predict:' print y_predict #print y_test do_metrics(y_test, y_predict)
def create_cnn_architecture_two_layers( self, model_name, outputDim=300, number_of_filters=60, filterSize=[3, 4], padding='same', activation_function_convLayer='relu', regularizer='L2', dropouts=0.5, activation_function_fc='softmax', optimizer='adam', learning_rate=0.001, loss_function='categorical_crossentropy'): if len(filterSize) == 0: filterSize = [3, 4] """ Define input shape and create word embedding """ self.cnn_model = input_data(shape=[None, self.max_words], name='input') self.cnn_model = tflearn.embedding( self.cnn_model, input_dim=len(self.vocabProcessor.vocabulary_), output_dim=outputDim) """ Add three/two convolutional layer. Set number of filters and filter sizes and then merge together """ conv1 = conv_1d(self.cnn_model, nb_filter=number_of_filters, filter_size=filterSize[0], padding=padding, activation=activation_function_convLayer, regularizer=regularizer) conv2 = conv_1d(self.cnn_model, nb_filter=number_of_filters, filter_size=filterSize[1], padding=padding, activation=activation_function_convLayer, regularizer=regularizer) #conv3 = conv_1d(cnn_model, nb_filter = 128, filter_size = 5, padding = 'same', # activation = 'relu', regularizer = 'L2') self.cnn_model = merge([conv1, conv2], mode='concat', axis=1) """ Expand one dimension to fit the max_pooling layer """ self.cnn_model = tf.expand_dims(self.cnn_model, 1) self.cnn_model = global_max_pool(self.cnn_model) """ Instantiate dropout layer and specify dropout parameter """ self.cnn_model = dropout(self.cnn_model, dropouts) """ Instantiate fully connected layer and regression layer. """ self.cnn_model = fully_connected(self.cnn_model, self.number_of_classes, activation=activation_function_fc) self.cnn_model = regression(self.cnn_model, optimizer=optimizer, learning_rate=learning_rate, loss=loss_function, name='models/' + model_name)
def resnext_model(X, Y, testX, testY, n_epoch): n_input = len(X[0]) n_classes = len(Y[0]) X = np.reshape(X, (-1, 1, n_input, 1)) testX = np.reshape(testX, (-1, 1, n_input, 1)) net = tflearn.input_data(shape=[None, 1, n_input, 1]) net = tflearn.conv_2d(net, 16, 3, regularizer='L2', weight_decay=0.0001, activation='selu') net = tflearn.resnext_block(net, n, 16, 32, activation='selu') net = tflearn.resnext_block(net, 1, 32, 32, downsample=True, activation='selu') net = tflearn.resnext_block(net, n-1, 32, 32, activation='selu') net = tflearn.resnext_block(net, 1, 64, 32, downsample=True, activation='selu') net = tflearn.resnext_block(net, n-1, 64, 32, activation='selu') net = tflearn.batch_normalization(net) net = tflearn.activation(net, 'selu') # net_p = tflearn.global_avg_pool(net, name='net_p') net_p = global_max_pool(net, name='net_p') # Regression net = tflearn.fully_connected(net_p, 2048, activation='selu') net = tflearn.dropout(net, 0.5) net = tflearn.fully_connected(net, 256, activation='selu') net = tflearn.dropout(net, 0.7) net = tflearn.fully_connected(net, n_classes, activation='softmax') # opt = tflearn.Momentum(0.1, lr_decay=0.1, decay_step=32000, staircase=True) net = tflearn.regression(net, optimizer='adam', loss='categorical_crossentropy') # Training model = tflearn.DNN(net, checkpoint_path='../model/model_resnext_grid_adam', max_checkpoints=10, tensorboard_verbose=0, clip_gradients=0.) # model.load('../model/model_resnext_grid_adam-70000') model.fit(X, Y, n_epoch=n_epoch, validation_set=(testX[:], testY), snapshot_epoch=False, snapshot_step=50000, show_metric=True, batch_size=128, shuffle=True, run_id='resnext') prob = model.predict(testX) # pdb.set_trace() # get the hidden layer value after global average pooling m2 = tflearn.DNN(net_p, session = model.session) feature_train = list() for i in range(4): feature_train_temp = m2.predict(X[i*4000:(i+1)*4000]) feature_train += list(feature_train_temp) feature_train_temp = m2.predict(X[16000:]) feature_train = np.array(feature_train+list(feature_train_temp)) feature_test = m2.predict(testX) # pdb.set_trace() return prob, feature_train, feature_test
def buildNet(self): #necessary to allow the model to be loaded after it has been trained tf.reset_default_graph() #build input layer to accept 140 chars network = input_data(shape=[None, 72], name='input') #embedded layer network = tflearn.embedding(network, input_dim=len(self.vp.vocabulary_) + 2, output_dim=128) #create three convolutional layers branch1 = conv_1d(network, 128, 3, padding='valid', activation='relu', regularizer="L2") branch2 = conv_1d(network, 128, 4, padding='valid', activation='relu', regularizer="L2") branch3 = conv_1d(network, 128, 5, padding='valid', activation='relu', regularizer="L2") #merge all incoming tensors into a single tenso network = merge([branch1, branch2, branch3], mode='concat', axis=1) #expand dimensions of network to 3d tensor, as input is a 1d tensor network = tf.expand_dims(network, 2) #perform reduction operation over input tensor network = global_max_pool(network) #prevent overfitting by including dropout network = dropout(network, 0.8) #output layer network = fully_connected(network, 8, activation='softmax') network = regression(network, optimizer='adam', learning_rate=0.0001, loss='categorical_crossentropy', name='target') return network
def do_cnn_1d(trainX, testX, trainY, testY): # Converting labels to binary vectors trainY = to_categorical(trainY, nb_classes=4) testY = to_categorical(testY, nb_classes=4) # Building convolutional network network = input_data(shape=[None, 1000], name='input') network = tflearn.embedding(network, input_dim=1000000, output_dim=128, validate_indices=False) branch1 = conv_1d(network, 128, 3, padding='valid', activation='relu', regularizer="L2") branch2 = conv_1d(network, 128, 4, padding='valid', activation='relu', regularizer="L2") branch3 = conv_1d(network, 128, 5, padding='valid', activation='relu', regularizer="L2") network = merge([branch1, branch2, branch3], mode='concat', axis=1) network = tf.expand_dims(network, 2) network = global_max_pool(network) network = dropout(network, 0.8) network = fully_connected(network, 4, activation='softmax') network = regression(network, optimizer='adam', learning_rate=0.001, loss='categorical_crossentropy', name='target') # Training model = tflearn.DNN(network, tensorboard_verbose=0) model.fit(trainX, trainY, n_epoch=5, shuffle=True, validation_set=(testX, testY), show_metric=True, batch_size=100, run_id="malware")
def model_2(train_x, train_y, test_x, test_y, embedding_size): # train_x = pad_sequences(train_x, maxlen=100, value=0.) # test_x = pad_sequences(test_x, maxlen=100, value=0.) out_dim = embedding_size # embedding size num_cat = len(train_y[0]) network = input_data(shape=[None, len(train_x[0])], name='input') network = tflearn.embedding(network, input_dim=len(train_x[0]), output_dim=out_dim) # input_dim - vocab size branch1 = conv_1d(network, out_dim, 3, padding='same', activation='relu', regularizer="L2") branch2 = conv_1d(network, out_dim, 4, padding='same', activation='relu', regularizer="L2") branch3 = conv_1d(network, out_dim, 5, padding='same', activation='relu', regularizer="L2") network = merge([branch1, branch2, branch3], mode='concat', axis=1) network = tf.expand_dims(network, 2) network = global_max_pool(network) network = dropout(network, 0.5) network = fully_connected(network, num_cat, activation='softmax') network = regression(network, optimizer='adam', learning_rate=0.001, loss='categorical_crossentropy', name='target') # Training model = tflearn.DNN(network, tensorboard_verbose=0) model.fit(train_x, train_y, n_epoch=5, shuffle=True, validation_set=(test_x, test_y), show_metric=True, batch_size=32) return model
def nlp_cnn(trainX, trainY, testX, testY): # pad the sequence trainX = pad_sequences(trainX, maxlen=100, value=0.) testX = pad_sequences(testX, maxlen=100, value=0.) # one_hot encoding trainY = to_categorical(trainY, nb_classes=2) testY = to_categorical(testY, nb_classes=2) # build an embedding network = input_data(shape=[None, 100], name='input') network = tflearn.embedding(network, input_dim=10000, output_dim=128) # build an convnet branch1 = conv_1d(network, 128, 3, padding='valid', activation='relu', regularizer='L2') branch2 = conv_1d(network, 128, 4, padding='valid', activation='relu', regularizer='L2') branch3 = conv_1d(network, 128, 5, padding='valid', activation='relu', regularizer='L2') network = merge([branch1, branch2, branch3], mode='concat', axis=1) network = tf.expand_dims(network, 2) network = global_max_pool(network) network = dropout(network, 0.5) network = fully_connected(network, 2, activation='softmax') network = regression(network, optimizer='adam', learning_rate=0.001, loss='categorical_crossentropy', name='target') # training model = tflearn.DNN(network, tensorboard_verbose=0) model.fit(trainX, trainY, n_epoch=5, shuffle=True, validation_set=(testX, testY), show_metric=True, batch_size=32)
def do_cnn_wordbad_tfidf(trainX, testX, trainY, testY): trainX = pad_sequences(trainX, value=0.) testX = pad_sequences(testX, value=0.) trainY = to_categorical(trainY, nb_classes=2) testY = to_categorical(testY, nb_classes=2) network = input_data(name='input') network = tflearn.embedding(network, input_dim=1000000, output_dim=128) branch1 = conv_1d(network, 128, 3, padding='valid', activation='relu', regularizer="L2") branch2 = conv_1d(network, 128, 4, padding='valid', activation='relu', regularizer="L2") branch3 = conv_1d(network, 128, 5, padding='valid', activation='relu', regularizer="L2") network = merge([branch1, branch2, branch3], mode='concat', axis=1) network = tf.expand_dims(network, 2) network = global_max_pool(network) network = dropout(network, 0.8) network = fully_connected(network, 2, activation='softmax') network = regression(network, optimizer='adam', learning_rate=0.001, loss='categorical_crossentropy', name='target') model = tflearn.DNN(network, tensorboard_verbose=0) model.fit(trainX, trainY, n_epoch=5, shuffle=True, validation_set=(testX, testY), show_metric=True, batch_size=100, run_id="url")
def cnn(inp_dim, vocab_size, embed_size, num_classes, learn_rate): tf.reset_default_graph() network = input_data(shape=[None, inp_dim], name='input') network = tflearn.embedding(network, input_dim=vocab_size, output_dim=embed_size, name="EmbeddingLayer") network = dropout(network, 0.25) branch1 = conv_1d(network, embed_size, 3, padding='valid', activation='relu', regularizer="L2", name="layer_1") branch2 = conv_1d(network, embed_size, 4, padding='valid', activation='relu', regularizer="L2", name="layer_2") branch3 = conv_1d(network, embed_size, 5, padding='valid', activation='relu', regularizer="L2", name="layer_3") network = merge([branch1, branch2, branch3], mode='concat', axis=1) network = tf.expand_dims(network, 2) network = global_max_pool(network) network = dropout(network, 0.50) network = fully_connected(network, num_classes, activation='softmax', name="fc") network = regression(network, optimizer='adam', learning_rate=learn_rate, loss='categorical_crossentropy', name='target') model = tflearn.DNN(network, tensorboard_verbose=0) return model
def CNN(max_length,n_words,n_classes,n_units): ''' define CNN model ''' net = tflearn.input_data(shape=[None, max_length], name='input') net = tflearn.embedding(net, input_dim=n_words, output_dim=n_units) branch1 = conv_1d(net, n_units, 3, padding='valid', activation='relu', regularizer="L2") branch2 = conv_1d(net, n_units, 4, padding='valid', activation='relu', regularizer="L2") branch3 = conv_1d(net, n_units, 5, padding='valid', activation='relu', regularizer="L2") net = tflearn.merge([branch1, branch2, branch3], mode='concat', axis=1) net = tf.expand_dims(net, 2) net = global_max_pool(net) net = tflearn.dropout(net, 0.5) net = tflearn.fully_connected(net, n_classes, activation='softmax') net = tflearn.regression(net, optimizer='adam', learning_rate=0.001, loss='categorical_crossentropy') return net
def NCAM_Module(in_dim): """ Position attention module""" # Ref from SAGAN chanel_in = in_dim #gamma = tf.Variable(tf.zeros([1]),name='gamma') m_batchsize, height, width, C = combined_static_and_dynamic_shape( chanel_in) globel_avg = global_avg_pool(chanel_in) channel_avg_weights = tf.reshape(globel_avg, [1, C, -1]) globel_max = global_max_pool(chanel_in) channel_max_weights = tf.reshape(globel_max, [1, -1, C]) energy = tf.matmul(channel_avg_weights, channel_max_weights) # 矩阵乘法 attention = tf.nn.softmax(energy, dim=-1) # 添加非线性函数 proj_value_CAM = tf.reshape(chanel_in, [m_batchsize, C, -1]) out = tf.matmul(attention, proj_value_CAM) out = tf.reshape(out, [m_batchsize, height, width, C]) # reshape到原图 # out =gamma * out out = PAM_Module(out) out = out + chanel_in return out
def do_cnn(trainX, testX, trainY, testY): global max_features y_test = testY #trainX = pad_sequences(trainX, maxlen=max_features, value=0.) #testX = pad_sequences(testX, maxlen=max_features, value=0.) # Converting labels to binary vectors trainY = to_categorical(trainY, nb_classes=2) testY = to_categorical(testY, nb_classes=2) # Building convolutional network network = input_data(shape=[None,max_features], name='input') network = tflearn.embedding(network, input_dim=1000, output_dim=128,validate_indices=False) branch1 = conv_1d(network, 128, 2, padding='valid', activation='relu', regularizer="L2") branch2 = conv_1d(network, 128, 3, padding='valid', activation='relu', regularizer="L2") branch3 = conv_1d(network, 128, 4, padding='valid', activation='relu', regularizer="L2") network = merge([branch1, branch2, branch3], mode='concat', axis=1) network = tf.expand_dims(network, 2) network = global_max_pool(network) network = dropout(network, 1) network = fully_connected(network, 2, activation='softmax') network = regression(network, optimizer='adam', learning_rate=0.001, loss='categorical_crossentropy', name='target') # Training model = tflearn.DNN(network, tensorboard_verbose=0) model.fit(trainX, trainY, n_epoch=10, shuffle=True, validation_set=0, show_metric=True, batch_size=10,run_id="uba") y_predict_list = model.predict(testX) y_predict = [] for i in y_predict_list: if i[0] > 0.5: y_predict.append(0) else: y_predict.append(1) print(classification_report(y_test, y_predict)) print metrics.confusion_matrix(y_test, y_predict)
def build_conv(max_len, lr=0.001, d_out=0.8): ''' Simple Convolutional network ''' tf.reset_default_graph() network = input_data(shape=[None, max_len], name='input') network = tflearn.embedding(network, input_dim=10000, output_dim=128) branch1 = conv_1d(network, 128, 3, padding='valid', activation='relu', regularizer="L2") branch2 = conv_1d(network, 128, 4, padding='valid', activation='relu', regularizer="L2") branch3 = conv_1d(network, 128, 5, padding='valid', activation='relu', regularizer="L2") network = merge([branch1, branch2, branch3], mode='concat', axis=1) network = tf.expand_dims(network, 2) network = global_max_pool(network) if d_out: network = dropout(network, d_out) network = fully_connected(network, 2, activation='softmax') network = regression(network, optimizer='adam', learning_rate=lr, loss='categorical_crossentropy', name='target') model = tflearn.DNN(network, tensorboard_verbose=0) return model
def do_cnn_word2vec(x, y): global max_document_length print "CNN" with open('metrics.txt', 'a') as f: f.write("CNN: \n") f.close() trainX, testX, trainY, testY = train_test_split(x, y, test_size=0.4, random_state=0) y_test = testY # Converting labels to binary vectors trainY = to_categorical(trainY, nb_classes=2) testY = to_categorical(testY, nb_classes=2) # Building convolutional network network = input_data(shape=[None, max_document_length, max_features], name='input') #不再需要将特征向量化 # network = tflearn.embedding(network, input_dim=100000, output_dim=max_features) branch1 = conv_1d(network, 200, 3, padding='valid', activation='relu', regularizer="L2") branch2 = conv_1d(network, 200, 4, padding='valid', activation='relu', regularizer="L2") branch3 = conv_1d(network, 200, 5, padding='valid', activation='relu', regularizer="L2") network = merge([branch1, branch2, branch3], mode='concat', axis=1) network = tf.expand_dims(network, 2) network = global_max_pool(network) network = dropout(network, 0.8) network = fully_connected(network, 2, activation='softmax') network = regression(network, optimizer='adam', learning_rate=0.001, loss='categorical_crossentropy', name='target') model = tflearn.DNN(network, tensorboard_verbose=0) model.fit(trainX, trainY, n_epoch=5, shuffle=True, validation_set=0.1, show_metric=True, batch_size=100, run_id="webshell") y_predict_list = model.predict(testX) y_predict = [] for i in y_predict_list: if i[0] > 0.5: y_predict.append(0) else: y_predict.append(1) do_metrics(y_test, y_predict)
padding='valid', activation='relu', regularizer="L2") # [batch_size, new steps2, nb_filters] branch3 = conv_1d(network, 128, 5, padding='valid', activation='relu', regularizer="L2") # [batch_size, new steps3, nb_filters] network = merge( [branch1, branch2, branch3], mode='concat', axis=1 ) # merge a list of `Tensor` into a single one.===>[batch_size, new steps1+new step2+new step3, nb_filters] network = tf.expand_dims( network, 2 ) #[batch_size, new steps1+new step2+new step3,1, nb_filters] Inserts a dimension of 1 into a tensor's shape network = global_max_pool(network) #[batch_size, pooled dim] network = dropout(network, 0.5) #[batch_size, pooled dim] network = fully_connected( network, 2, activation='softmax' ) #matmul([batch_size, pooled_dim],[pooled_dim,2])---->[batch_size,2] network = regression(network, optimizer='adam', learning_rate=0.001, loss='categorical_crossentropy', name='target') # Training model = tflearn.DNN(network, tensorboard_verbose=0) model.fit(trainX, trainY, n_epoch=5, shuffle=True,
######################################MODEL:1.conv-2.conv-3.conv-4.max_pool-5.dropout-6.FC############################################################################################## #(shape=None, placeholder=None, dtype=tf.float32,data_preprocessing=None, data_augmentation=None,name="InputData") network = input_data(shape=[None, 100], name='input') #[None, 100] `input_data` is used as a data entry (placeholder) of a network. This placeholder will be feeded with data when training network = tflearn.embedding(network, input_dim=vocab_size, output_dim=256) #TODO 128 [None, 100,128].embedding layer for a sequence of ids. network: Incoming 2-D Tensor. input_dim: vocabulary size, oput_dim:embedding size #conv_1d(incoming,nb_filter,filter_size) branch1 = conv_1d(network, 256, 1, padding='valid', activation='relu', regularizer="L2") #128 branch2 = conv_1d(network, 256, 2, padding='valid', activation='relu', regularizer="L2") #128 branch3 = conv_1d(network, 256, 3, padding='valid', activation='relu', regularizer="L2") #128 [batch_size, new steps1, nb_filters]. padding:"VALID",only ever drops the right-most columns branch4 = conv_1d(network, 256, 4, padding='valid', activation='relu', regularizer="L2") #128 [batch_size, new steps2, nb_filters] branch5 = conv_1d(network, 256, 5, padding='valid', activation='relu', regularizer="L2") #128 [batch_size, new steps3, nb_filters] branch6 = conv_1d(network, 256, 6, padding='valid', activation='relu', regularizer="L2") #128 [batch_size, new steps3, nb_filters] #ADD branch7 = conv_1d(network, 256, 7, padding='valid', activation='relu', regularizer="L2") #128 [batch_size, new steps3, nb_filters] #ADD branch8 = conv_1d(network, 256, 7, padding='valid', activation='relu', regularizer="L2") #128 [batch_size, new steps3, nb_filters] #ADD branch9 = conv_1d(network, 256, 8, padding='valid', activation='relu', regularizer="L2") #128 [batch_size, new steps3, nb_filters] #ADD branch10 = conv_1d(network,256, 9, padding='valid', activation='relu', regularizer="L2") #128 [batch_size, new steps3, nb_filters] #ADD network = merge([branch1, branch2, branch3,branch4,branch5,branch6, branch7, branch8,branch9,branch10], mode='concat', axis=1) # merge a list of `Tensor` into a single one.===>[batch_size, new steps1+new step2+new step3, nb_filters] network = tf.expand_dims(network, 2) #[batch_size, new steps1+new step2+new step3,1, nb_filters] Inserts a dimension of 1 into a tensor's shape network = global_max_pool(network) #input: 4-D tensors,[batch_size,height,width,in_channels]; output:2-D Tensor,[batch_size, pooled dim] network = dropout(network, 0.5) #[batch_size, pooled dim] network = fully_connected(network, number_classes, activation='softmax') #matmul([batch_size, pooled_dim],[pooled_dim,2])---->[batch_size,number_classes] #top5 = tflearn.metrics.Top_k(k=5) network = regression(network, optimizer='adam', learning_rate=0.001,loss='categorical_crossentropy', name='target') #,metric=top5 ######################################MODEL:1.conv-2.conv-3.conv-4.max_pool-5.dropout-6.FC################################################################################################ # 4.Training model = tflearn.DNN(network, tensorboard_verbose=0) model.fit(trainX, trainY, n_epoch = 10, shuffle=True, validation_set=(testX, testY), show_metric=True, batch_size=256) #32 model.save('model_zhihu_cnn12345') print("going to make a prediction...") model.predict(testX[0:1000]) print("ended...")
regularizer="L2") branch2 = conv_1d(network, 128, 4, padding='valid', activation='relu', regularizer="L2") branch3 = conv_1d(network, 128, 5, padding='valid', activation='relu', regularizer="L2") network = merge([branch1, branch2, branch3], mode='concat', axis=1) network = tf.expand_dims(network, 2) network = global_max_pool(network) network = dropout(network, 0.5) network = fully_connected(network, 2, activation='softmax') network = regression(network, optimizer='adam', learning_rate=0.001, loss='categorical_crossentropy', name='target') # Training model = tflearn.DNN(network, tensorboard_verbose=0) model.fit(trainX, trainY, n_epoch=10, validation_set=(testX, testY), shuffle=True,
regularizer="L2") #128 [batch_size, new steps3, nb_filters] #ADD branch10 = conv_1d( net, 400, 9, padding='valid', activation='relu', regularizer="L2") #128 [batch_size, new steps3, nb_filters] #ADD net = merge( [ branch1, branch2, branch3, branch4, branch5, branch6, branch7, branch8, branch9, branch10 ], mode='concat', axis=1 ) # merge a list of `Tensor` into a single one.===>[batch_size, new steps1+new step2+new step3, nb_filters] print('shape: ', net.shape) net = tf.expand_dims(net, 1) print('shape: ', net.shape) net = global_max_pool(net) net = dropout(net, 0.7) net = tflearn.fully_connected(net, 200, activation='relu') net = dropout(net, 0.7) #net = tflearn.lstm(net, 300, dropout=0.8) net = tflearn.fully_connected(net, 1, activation='sigmoid') net = tflearn.regression(net, optimizer='adam', learning_rate=0.001, loss='mean_square', metric='R2') # Training model = tflearn.DNN(net, tensorboard_verbose=0, tensorboard_dir='tflearn_logs/') embeddingWeights = tflearn.get_layer_variables_by_name('EmbeddingLayer')[0]
#else: # print("traning data exists in cache. going to use it.") # 3.Building convolutional network ######################################MODEL:1.conv-2.conv-3.conv-4.max_pool-5.dropout-6.FC############################################################################################## #(shape=None, placeholder=None, dtype=tf.float32,data_preprocessing=None, data_augmentation=None,name="InputData") network = input_data(shape=[None, 100], name='input') #[None, 100] `input_data` is used as a data entry (placeholder) of a network. This placeholder will be feeded with data when training network = tflearn.embedding(network, input_dim=vocab_size, output_dim=128) #TODO [None, 100,128].embedding layer for a sequence of ids. network: Incoming 2-D Tensor. input_dim: vocabulary size, oput_dim:embedding size #conv_1d(incoming,nb_filter,filter_size) branch1 = conv_1d(network, 128, 1, padding='valid', activation='relu', regularizer="L2") branch2 = conv_1d(network, 128, 2, padding='valid', activation='relu', regularizer="L2") branch3 = conv_1d(network, 128, 3, padding='valid', activation='relu', regularizer="L2") # [batch_size, new steps1, nb_filters]. padding:"VALID",only ever drops the right-most columns branch4 = conv_1d(network, 128, 4, padding='valid', activation='relu', regularizer="L2") # [batch_size, new steps2, nb_filters] branch5 = conv_1d(network, 128, 5, padding='valid', activation='relu', regularizer="L2") # [batch_size, new steps3, nb_filters] network = merge([branch1, branch2, branch3,branch4,branch5], mode='concat', axis=1) # merge a list of `Tensor` into a single one.===>[batch_size, new steps1+new step2+new step3, nb_filters] network = tf.expand_dims(network, 2) #[batch_size, new steps1+new step2+new step3,1, nb_filters] Inserts a dimension of 1 into a tensor's shape network = global_max_pool(network) #[batch_size, pooled dim] network = dropout(network, 0.5) #[batch_size, pooled dim] network = fully_connected(network, number_classes, activation='softmax') #matmul([batch_size, pooled_dim],[pooled_dim,2])---->[batch_size,number_classes] top5 = tflearn.metrics.Top_k(k=5) network = regression(network, optimizer='adam', learning_rate=0.001,loss='categorical_crossentropy', name='target') #metric=top5 ######################################MODEL:1.conv-2.conv-3.conv-4.max_pool-5.dropout-6.FC################################################################################################ # 4.Training model = tflearn.DNN(network, tensorboard_verbose=0) #model.fit(trainX, trainY, n_epoch = 10, shuffle=True, validation_set=(testX, testY), show_metric=True, batch_size=256) #32 #model.save('model_zhihu_cnn12345') model.load('model_zhihu_cnn12345') print("going to make a prediction...") predict_result=model.predict(testX[0:1000]) print("predict_result:",predict_result) print("ended...")
def Global_Max_Pool(x, stride=1): return global_max_pool(x, name="global_max_pooling")
branch2 = conv_1d(network, 128, 4, padding='valid', activation='relu', regularizer="L2") # [none,97,128] branch3 = conv_1d(network, 128, 5, padding='valid', activation='relu', regularizer="L2") # [none,96,128] network = merge([branch1, branch2, branch3], mode='concat', axis=1) # [none,291,128] network = tf.expand_dims(network, 2) # [none,291,1,128] network = global_max_pool(network) # [none,128] network = dropout(network, 0.5) network = fully_connected(network, n_class, activation='softmax') # [none,2] network = regression(network, optimizer='adam', learning_rate=0.001, loss='categorical_crossentropy', name='target') # Training model = tflearn.DNN(network, tensorboard_verbose=0) model.fit(trainX, trainY, n_epoch=5, shuffle=True, validation_set=(testX, testY), show_metric=True,
def do_dctfcnn(x1, x2, y): global max_document_length global max_features print "CNN" with open('metrics.txt', 'a') as f: f.write("dcCNN: \n") f.close() # 划分训练测试集 trainX1, testX1, trainX2, testX2, trainY, testY = train_test_split( x1, x2, y, test_size=0.4, random_state=0) y_test = testY # Converting labels to binary vectors trainY = to_categorical(trainY, nb_classes=2) testY = to_categorical(testY, nb_classes=2) # Building convolutional network net1 = input_data(shape=[None, max_document_length, max_features], name='input1') net2 = input_data(shape=[None, max_document_length, max_features], name='input2') branch11 = conv_1d(net1, 200, 3, padding='valid', activation='relu', regularizer="L2") branch12 = conv_1d(net1, 200, 4, padding='valid', activation='relu', regularizer="L2") branch13 = conv_1d(net1, 200, 5, padding='valid', activation='relu', regularizer="L2") net1 = merge([branch11, branch12, branch13], mode='concat', axis=1) net1 = tf.expand_dims(net1, 2) net1 = global_max_pool(net1) net1 = dropout(net1, 0.8) branch21 = conv_1d(net2, 200, 3, padding='valid', activation='relu', regularizer="L2") branch22 = conv_1d(net2, 200, 4, padding='valid', activation='relu', regularizer="L2") branch23 = conv_1d(net2, 200, 5, padding='valid', activation='relu', regularizer="L2") net2 = merge([branch21, branch22, branch23], mode='concat', axis=1) net2 = tf.expand_dims(net2, 2) net2 = global_max_pool(net2) net2 = dropout(net2, 0.8) network = merge([net1, net2], mode='concat', axis=1) network = fully_connected(network, 2, activation='softmax') network = regression(network, optimizer='adam', learning_rate=0.001, loss='categorical_crossentropy', name='target') model = tflearn.DNN(network, tensorboard_verbose=0) # Training model.fit([trainX1, trainX2], trainY, n_epoch=5, shuffle=True, validation_set=([testX1, testX2], testY), show_metric=True, batch_size=100, run_id="webshell") y_predict_list = model.predict([testX1, testX2]) y_predict = [] for i in y_predict_list: if i[0] > 0.5: y_predict.append(0) else: y_predict.append(1) do_metrics(y_test, y_predict)
def Global_Max_Pool(x, name=None): return global_max_pool(x, name=name + "_global_max_pooling")
# IMDB Dataset loading train, test, _ = imdb.load_data(path='imdb.pkl', n_words=10000, valid_portion=0.1) trainX, trainY = train testX, testY = test # Data preprocessing # Sequence padding trainX = pad_sequences(trainX, maxlen=100, value=0.) testX = pad_sequences(testX, maxlen=100, value=0.) # Converting labels to binary vectors trainY = to_categorical(trainY) testY = to_categorical(testY) # Building convolutional network network = input_data(shape=[None, 100], name='input') network = tflearn.embedding(network, input_dim=10000, output_dim=128) branch1 = conv_1d(network, 128, 3, padding='valid', activation='relu', regularizer="L2") branch2 = conv_1d(network, 128, 4, padding='valid', activation='relu', regularizer="L2") branch3 = conv_1d(network, 128, 5, padding='valid', activation='relu', regularizer="L2") network = merge([branch1, branch2, branch3], mode='concat', axis=1) network = tf.expand_dims(network, 2) network = global_max_pool(network) network = dropout(network, 0.5) network = fully_connected(network, 2, activation='softmax') network = regression(network, optimizer='adam', learning_rate=0.001, loss='categorical_crossentropy', name='target') # Training model = tflearn.DNN(network, tensorboard_verbose=0) model.fit(trainX, trainY, n_epoch = 5, shuffle=True, validation_set=(testX, testY), show_metric=True, batch_size=32)