def create_model(sequence_length, input_dimension, hidden_dimensions, class_number, tau): feature_input = layers.Input(shape=( sequence_length, input_dimension, )) x = feature_input # multi layers lstm for k, dim in enumerate(hidden_dimensions): x = layers.CuDNNLSTM(dim, kernel_regularizer=regularizers.l2(l=0.0004), return_sequences=True, name='lstm_{}'.format(k))(x) # lstm_state: batch_size x seq_length x dimension lstm_state = x # y: batch_size x seq_length x class_number y = layers.TimeDistributed(layers.Dense(class_number, activation='softmax'))(lstm_state) model = Model(inputs=feature_input, outputs=y) return model
def __init__(self, embedding, hidden_size, batch_size): super(Encoder, self).__init__() self.embedding = embedding self.batch_size = batch_size self.hidden_size = hidden_size self.bilstm = layers.Bidirectional(layers.CuDNNLSTM( self.hidden_size, return_sequences=True, return_state=True), merge_mode='concat')
def multi_input_model(): text_vocabulary_size = 10000 question_vocabulary_size = 10000 answer_vocabulary_size = 500 text_input = Input(shape=(None,), dtype='int32', name='text') embedded_text = layers.Embedding( text_vocabulary_size, 64)(text_input) encoded_text = layers.CuDNNLSTM(32)(embedded_text) question_input = Input(shape=(None,), dtype='int32', name='question') embedded_question = layers.Embedding( question_vocabulary_size, 32)(question_input) encoded_question = layers.CuDNNLSTM(16)(embedded_question) # 注意这里将两个输入合并 # 设axis=i,则沿着第i个下标变化的方向进行操作,-1代表倒数第一个加起来 concatenated = layers.concatenate([encoded_text, encoded_question], axis=-1) answer = layers.Dense(answer_vocabulary_size, activation='softmax')(concatenated) model = Model([text_input, question_input], answer) ''' model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['acc']) num_samples = 1000 max_length = 100 text = np.random.randint(1, text_vocabulary_size, size=(num_samples, max_length)) question = np.random.randint(1, question_vocabulary_size, size=(num_samples, max_length)) answers = np.random.randint(answer_vocabulary_size, size=(num_samples)) answers = to_categorical(answers, answer_vocabulary_size) model.fit([text, question], answers, epochs=10, batch_size=128) model.fit({'text': text, 'question': question}, answers, epochs=10, batch_size=128) ''' return model
def shared_weight_model(): lstm = layers.CuDNNLSTM(32) left_input = Input(shape=(None, 128)) left_output = lstm(left_input) right_input = Input(shape=(None, 128)) right_output = lstm(right_input) # 虽然也是用concatenate,但是是公用的一个lstm层,之前多输入是两个lstm merged = layers.concatenate([left_output, right_output], axis=-1) predictions = layers.Dense(1, activation='sigmoid')(merged) model = Model([left_input, right_input], predictions) # model.fit([left_data, right_data], targets) return model
def __init__(self, use_cudnn_lstm=True, plot_model_architecture=False): n_hidden = 50 input_dim = 300 # unit_forget_bias: Boolean. If True, add 1 to the bias of the forget gate at initialization. Setting it to true will also force bias_initializer="zeros". This is recommended in Jozefowicz et al. # he_normal: Gaussian initialization scaled by fan_in (He et al., 2014) if use_cudnn_lstm: # Use CuDNNLSTM instead of LSTM, because it is faster lstm = layers.CuDNNLSTM(n_hidden, unit_forget_bias=True, kernel_initializer='he_normal', kernel_regularizer='l2', name='lstm_layer') else: lstm = layers.LSTM(n_hidden, unit_forget_bias=True, kernel_initializer='he_normal', kernel_regularizer='l2', name='lstm_layer') # Building the left branch of the model: inputs are variable-length sequences of vectors of size 128. left_input = Input(shape=(None, input_dim), name='input_1') # left_masked_input = layers.Masking(mask_value=0)(left_input) left_output = lstm(left_input) # Building the right branch of the model: when you call an existing layer instance, you reuse its weights. right_input = Input(shape=(None, input_dim), name='input_2') # right_masked_input = layers.Masking(mask_value=0)(right_input) right_output = lstm(right_input) # Builds the classifier on top l1_norm = lambda x: 1 - K.abs(x[0] - x[1]) merged = layers.Lambda(function=l1_norm, output_shape=lambda x: x[0], name='L1_distance')([left_output, right_output]) predictions = layers.Dense(1, activation='tanh', name='Similarity_layer')(merged) #sigmoid # Instantiating and training the model: when you train such a model, the weights of the LSTM layer are updated based on both inputs. self.model = Model([left_input, right_input], predictions) self.__compile() print(self.model.summary()) if plot_model_architecture: from tensorflow.python.keras.utils import plot_model plot_model(self.model, to_file='siamese_architecture.png')
def main(thread_num): # causing allocating 2 gpu devices # if tf.test.is_gpu_available(): # select gpu device 0,1 if device_type == 'gpu': os.environ["CUDA_VISIBLE_DEVICES"] = thread_num task = pd.read_csv("task.csv") #print(task) if os.path.isfile("output.csv"): output_csv = pd.read_csv("output.csv") else: output_csv = task output_csv = output_csv.drop(columns=['data_set']) #output_csv['train_acc'] = 0.0 output_csv['final_train_loss'] = 100.0 #output_csv['valid_acc'] = 0.0 output_csv['final_valid_loss'] = 100.0 output_csv['best_trade_acc'] = 0.0 output_csv['best_trade_acc_epoch'] = 0 output_csv['best_trade_f1'] = 0.0 output_csv['best_trade_f1_epoch'] = 0 output_csv['best_trade_precision'] = 0.0 output_csv['best_trade_precision_epoch'] = 0 output_csv['best_trade_recall'] = 0.0 output_csv['best_trade_recall_epoch'] = 0 # output_csv['best_trade_loss'] = 100.0 # output_csv['best_trade_loss_epoch'] = 0 output_csv['completed'] = 0 for index, row in task.iterrows(): #if tf.test.is_gpu_available(): if device_type == 'gpu': if index % 2 != int(thread_num): continue completed = output_csv['completed'][index] if completed == 1: continue data_set = int(task['data_set'][index]) load_dir = os.path.join(os.getcwd(), 'data_set/' + str(data_set)) if not os.path.isdir(load_dir): continue task_id = int(task['task_id'][index]) input_size = int(task['input'][index]) pred_k = int(task['k'][index]) feature_num = int(task['feature_num'][index]) label_threshold = float(task['label_threshold'][index]) lstm_units = int(task['lstm_units'][index]) lr = float(task['learning_rate'][index]) epsilon = float(task['epsilon'][index]) regularizer = float(task['regularizer'][index]) train_x = np.load(os.path.join(load_dir, 'train_x.npy')) train_y = np.load(os.path.join(load_dir, 'train_y_onehot.npy')) valid_x = np.load(os.path.join(load_dir, 'valid_x.npy')) valid_y = np.load(os.path.join(load_dir, 'valid_y_onehot.npy')) trade_y = np.load(os.path.join(load_dir, 'trading_valid_y_onehot.npy')) print('Running experiment {}'.format(task_id)) #clear previous models clear_session() model_dir = os.path.join(os.getcwd(), 'load_model') if os.path.isdir(model_dir): model_dir = os.path.join( model_dir, str(task_id) + '/model/model_epoch_500.h5') if not os.path.isdir(model_dir): continue model = load_model(model_dir) else: #input_tensor = Input(shape=(30,4,1)) input_tensor = Input(shape=(input_size, 4, 1)) layer_x = layers.Conv2D(16, (1, 4), kernel_regularizer=regularizers.l1( l=regularizer))(input_tensor) layer_x = layers.BatchNormalization()(layer_x) layer_x = layers.LeakyReLU(alpha=0.01)(layer_x) layer_x = layers.Conv2D( 16, (4, 1), padding='same', kernel_regularizer=regularizers.l1(l=regularizer))(layer_x) layer_x = layers.BatchNormalization()(layer_x) layer_x = layers.LeakyReLU(alpha=0.01)(layer_x) layer_x = layers.Conv2D( 16, (4, 1), padding='same', kernel_regularizer=regularizers.l1(l=regularizer))(layer_x) layer_x = layers.BatchNormalization()(layer_x) layer_x = layers.LeakyReLU(alpha=0.01)(layer_x) #dual input for ohlc+volume if feature_num == 5: train_x_ohlc = train_x[:, :, :4, :] train_x_volume = train_x[:, :, -1:, :] train_x = [train_x_ohlc, train_x_volume] valid_x_ohlc = valid_x[:, :, :4, :] valid_x_volume = valid_x[:, :, -1:, :] valid_x = [valid_x_ohlc, valid_x_volume] input_tensor2 = Input(shape=(input_size, 1, 1)) layer_x2 = layers.Conv2D(16, (1, 1), kernel_regularizer=regularizers.l1( l=regularizer))(input_tensor2) layer_x2 = layers.BatchNormalization()(layer_x2) layer_x2 = layers.LeakyReLU(alpha=0.01)(layer_x2) layer_x2 = layers.Conv2D(16, (4, 1), padding='same', kernel_regularizer=regularizers.l1( l=regularizer))(layer_x2) layer_x2 = layers.BatchNormalization()(layer_x2) layer_x2 = layers.LeakyReLU(alpha=0.01)(layer_x2) layer_x2 = layers.Conv2D(16, (4, 1), padding='same', kernel_regularizer=regularizers.l1( l=regularizer))(layer_x2) layer_x2 = layers.BatchNormalization()(layer_x2) layer_x2 = layers.LeakyReLU(alpha=0.01)(layer_x2) layer_x = layers.concatenate([layer_x, layer_x2], axis=-1) # Inception Module tower_1 = layers.Conv2D( 32, (1, 1), padding='same', kernel_regularizer=regularizers.l1(l=regularizer))(layer_x) tower_1 = layers.BatchNormalization()(tower_1) tower_1 = layers.LeakyReLU(alpha=0.01)(tower_1) tower_1 = layers.Conv2D( 32, (3, 1), padding='same', kernel_regularizer=regularizers.l1(l=regularizer))(tower_1) tower_1 = layers.BatchNormalization()(tower_1) tower_1 = layers.LeakyReLU(alpha=0.01)(tower_1) tower_2 = layers.Conv2D( 32, (1, 1), padding='same', kernel_regularizer=regularizers.l1(l=regularizer))(layer_x) tower_2 = layers.BatchNormalization()(tower_2) tower_2 = layers.LeakyReLU(alpha=0.01)(tower_2) tower_2 = layers.Conv2D( 32, (5, 1), padding='same', kernel_regularizer=regularizers.l1(l=regularizer))(tower_2) tower_2 = layers.BatchNormalization()(tower_2) tower_2 = layers.LeakyReLU(alpha=0.01)(tower_2) tower_3 = layers.MaxPooling2D((3, 1), padding='same', strides=(1, 1))(layer_x) tower_3 = layers.Conv2D( 32, (1, 1), padding='same', kernel_regularizer=regularizers.l1(l=regularizer))(tower_3) tower_3 = layers.BatchNormalization()(tower_3) tower_3 = layers.LeakyReLU(alpha=0.01)(tower_3) layer_x = layers.concatenate([tower_1, tower_2, tower_3], axis=-1) # concatenate features of tower_1, tower_2, tower_3 layer_x = layers.Reshape((input_size, 96))(layer_x) #layer_x = layers.Reshape((input_size,feature_num))(input_tensor) # # 64 LSTM units #layer_x = layers.LSTM(64)(layer_x) # if using GPU if device_type == 'gpu': print('using GPU') layer_x = layers.CuDNNLSTM( lstm_units, kernel_regularizer=regularizers.l1(l=regularizer))(layer_x) # if using CPU elif device_type == 'cpu': print('using CPU') layer_x = layers.LSTM( lstm_units, kernel_regularizer=regularizers.l1(l=regularizer))(layer_x) else: sys.exit("wrong device type") # # The last output layer uses a softmax activation function output = layers.Dense(3, activation='softmax')(layer_x) if feature_num == 4: model = Model(input_tensor, output) elif feature_num == 5: model = Model([input_tensor, input_tensor2], output) opt = Adam(lr=lr, epsilon=epsilon) model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) #model.summary() save_dir = os.path.join(os.getcwd(), 'result/' + str(task_id)) if not os.path.isdir(save_dir): os.makedirs(save_dir) final_train_loss, \ final_valid_loss, \ best_trade_acc, \ best_trade_acc_epoch, \ best_trade_f1, \ best_trade_f1_epoch, \ best_trade_precision, \ best_trade_precision_epoch, \ best_trade_recall, \ best_trade_recall_epoch \ = train_model(model, \ save_dir, \ task_id, \ train_x, \ train_y, \ valid_x, \ valid_y, \ trade_y, \ batch_size=512, \ epochs=3) with open(os.path.join(save_dir, 'readme.txt'), 'w') as f: f.write("""'task id = {}\n input size = {}\n prediction k = {}\n feature = {}\n label threshold = {}\n lstm units = {}\n learning rate = {}\n epsilon = {}\n regularizer = {}\n data set = {}'""".format(task_id, \ input_size, \ pred_k, \ feature_num, \ label_threshold, \ lstm_units, \ lr, \ epsilon, \ regularizer, \ data_set)) #output_csv['train_acc'][index] = train_acc output_csv['final_train_loss'][index] = final_train_loss #output_csv['valid_acc'][index] = valid_acc output_csv['final_valid_loss'][index] = final_valid_loss output_csv['best_trade_acc'][index] = best_trade_acc output_csv['best_trade_acc_epoch'][index] = best_trade_acc_epoch output_csv['best_trade_f1'][index] = best_trade_f1 output_csv['best_trade_f1_epoch'][index] = best_trade_f1_epoch output_csv['best_trade_precision'][index] = best_trade_precision output_csv['best_trade_precision_epoch'][ index] = best_trade_precision_epoch output_csv['best_trade_recall'][index] = best_trade_recall output_csv['best_trade_recall_epoch'][index] = best_trade_recall_epoch #output_csv['best_trade_loss'] = best_trade_loss #output_csv['best_trade_loss_epoch'] = best_trade_loss_epoch output_csv['completed'][index] = 1 output_csv.to_csv('output.csv')
layer_x = layers.concatenate([tower_1, tower_2, tower_3], axis=-1) # concatenate features of tower_1, tower_2, tower_3 layer_x = layers.Reshape((input_size, 96))(layer_x) #layer_x = layers.Reshape((input_size,feature_num))(input_tensor) # # 64 LSTM units #layer_x = layers.LSTM(64)(layer_x) # if using GPU if tf.test.is_gpu_available(): print('using GPU') layer_x = layers.CuDNNLSTM( lstm_units, kernel_regularizer=regularizers.l1(l=regularizer), kernel_initializer="zeros", recurrent_initializer="zeros", bias_initializer="zeros")(layer_x) # if using CPU else: print('using CPU') layer_x = layers.LSTM( lstm_units, kernel_regularizer=regularizers.l1(l=regularizer), kernel_initializer="zeros", recurrent_initializer="zeros", bias_initializer="zeros")(layer_x) # # The last output layer uses a softmax activation function output = layers.Dense(3, activation='softmax',
tower_3 = layers.LeakyReLU(alpha=0.01)(tower_3) layer_x = layers.concatenate([tower_1, tower_2, tower_3], axis=-1) # concatenate features of tower_1, tower_2, tower_3 layer_x = layers.Reshape((input_size, 96))(layer_x) #layer_x = layers.Reshape((input_size,feature_num))(input_tensor) # # 64 LSTM units #layer_x = layers.LSTM(64)(layer_x) # if using GPU if tf.test.is_gpu_available(): print('using GPU') layer_x = layers.CuDNNLSTM( lstm_units, kernel_regularizer=regularizers.l1(l=regularizer))(layer_x) # if using CPU else: print('using CPU') layer_x = layers.LSTM( lstm_units, kernel_regularizer=regularizers.l1(l=regularizer))(layer_x) # # The last output layer uses a softmax activation function output = layers.Dense(3, activation='softmax')(layer_x) if feature_num == 4: model = Model(input_tensor, output) elif feature_num == 5:
def deeplob_model(): input_tensor = Input(shape=(100, 20, 1)) # convolutional filter is (1,2) with stride of (1,2) layer_x = layers.Conv2D(16, (1, 2), strides=(1, 2))(input_tensor) layer_x = layers.BatchNormalization()(layer_x) layer_x = layers.LeakyReLU(alpha=0.01)(layer_x) layer_x = layers.Conv2D(16, (4, 1), padding='same')(layer_x) layer_x = layers.BatchNormalization()(layer_x) layer_x = layers.LeakyReLU(alpha=0.01)(layer_x) layer_x = layers.Conv2D(16, (4, 1), padding='same')(layer_x) layer_x = layers.BatchNormalization()(layer_x) layer_x = layers.LeakyReLU(alpha=0.01)(layer_x) layer_x = layers.Conv2D(16, (1, 2), strides=(1, 2))(layer_x) layer_x = layers.BatchNormalization()(layer_x) layer_x = layers.LeakyReLU(alpha=0.01)(layer_x) layer_x = layers.Conv2D(16, (4, 1), padding='same')(layer_x) layer_x = layers.BatchNormalization()(layer_x) layer_x = layers.LeakyReLU(alpha=0.01)(layer_x) layer_x = layers.Conv2D(16, (4, 1), padding='same')(layer_x) layer_x = layers.BatchNormalization()(layer_x) layer_x = layers.LeakyReLU(alpha=0.01)(layer_x) layer_x = layers.Conv2D(16, (1, 5))(layer_x) layer_x = layers.BatchNormalization()(layer_x) layer_x = layers.LeakyReLU(alpha=0.01)(layer_x) layer_x = layers.Conv2D(16, (4, 1), padding='same')(layer_x) layer_x = layers.BatchNormalization()(layer_x) layer_x = layers.LeakyReLU(alpha=0.01)(layer_x) layer_x = layers.Conv2D(16, (4, 1), padding='same')(layer_x) layer_x = layers.BatchNormalization()(layer_x) layer_x = layers.LeakyReLU(alpha=0.01)(layer_x) # Inception Module tower_1 = layers.Conv2D(32, (1, 1), padding='same')(layer_x) layer_x = layers.BatchNormalization()(layer_x) tower_1 = layers.LeakyReLU(alpha=0.01)(tower_1) tower_1 = layers.Conv2D(32, (3, 1), padding='same')(tower_1) layer_x = layers.BatchNormalization()(layer_x) tower_1 = layers.LeakyReLU(alpha=0.01)(tower_1) tower_2 = layers.Conv2D(32, (1, 1), padding='same')(layer_x) layer_x = layers.BatchNormalization()(layer_x) tower_2 = layers.LeakyReLU(alpha=0.01)(tower_2) tower_2 = layers.Conv2D(32, (5, 1), padding='same')(tower_2) layer_x = layers.BatchNormalization()(layer_x) tower_2 = layers.LeakyReLU(alpha=0.01)(tower_2) tower_3 = layers.MaxPooling2D((3, 1), padding='same', strides=(1, 1))(layer_x) tower_3 = layers.Conv2D(32, (1, 1), padding='same')(tower_3) layer_x = layers.BatchNormalization()(layer_x) tower_3 = layers.LeakyReLU(alpha=0.01)(tower_3) layer_x = layers.concatenate([tower_1, tower_2, tower_3], axis=-1) # concatenate features of tower_1, tower_2, tower_3 layer_x = layers.Reshape((100, 96))(layer_x) # 64 LSTM units #CPU version #layer_x = layers.LSTM(64)(layer_x) #GPU version, cannot run on CPU layer_x = layers.CuDNNLSTM(64)(layer_x) # The last output layer uses a softmax activation function output = layers.Dense(3, activation='softmax')(layer_x) model = Model(input_tensor, output) opt = Adam( lr=0.01, epsilon=1) # learning rate and epsilon are the same as paper DeepLOB model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) return model