def test_sum_2D(self): data = np.random.randn(2, 3) parameter = Parameter(shape=(2, 3), init_weight=data) out = autograd.sum(parameter, axis=0) model = Model(input=parameter, output=out) result = model.forward(data) np.testing.assert_almost_equal(result, data.sum(axis=0), decimal=5)
def getKerasModel(): input = Input(shape=(3,)) user_select = Select(1, 0)(input) item_select = Select(1, 1)(input) other_feature = Narrow(1, 2, num_features - 2)(input) u2D = Flatten()(user_select) item2D = Flatten()(item_select) userEmbedding = Embedding(u_limit + 1, u_output)(u2D) itemEmbedding = Embedding(m_limit + 1, m_output)(item2D) u_flatten = Flatten()(userEmbedding) m_flatten = Flatten()(itemEmbedding) latent = merge(inputs=[u_flatten, m_flatten, other_feature], mode="concat") numEmbeddingOutput = u_output + m_output + num_features - 2 linear1 = Dense(numEmbeddingOutput // 2)(latent) x1 = Dropout(0.5)(linear1) linear2 = Dense(2)(x1) x2 = Dropout(0.5)(linear2) output = Dense(2)(x2) model = Model(input, output) model.summary() return model
def test_slice_2D(self): data = np.random.randn(2, 3) parameter = Parameter(shape=(2, 3), init_weight=data) out = parameter.slice(0, 0, 2) model = Model(input=parameter, output=out) result = model.forward(data) np.testing.assert_almost_equal(result, data[0:2], decimal=5)
def compare_binary_op(self, kk_func, z_layer, shape): x = klayers.Input(shape=shape[0][1:]) y = klayers.Input(shape=shape[1][1:]) batch = shape[0][0] kkresult = kk_func(x, y) x_value = np.random.uniform(0, 1, shape[0]) y_value = np.random.uniform(0, 1, shape[1]) k_grad_y_pred = KK.get_session().run(KK.gradients(kkresult, [x, y]), feed_dict={ x: x_value, y: y_value }) k_output = KK.get_session().run(kkresult, feed_dict={ x: x_value, y: y_value }) inputs = [Input(s) for s in remove_batch(shape)] model = Model(inputs, z_layer(inputs)) z_output = model.forward([x_value, y_value]) grad_output = np.array(z_output) grad_output.fill(1.0) z_grad_y_pred = model.backward(x_value, grad_output) self.assert_allclose(z_output, k_output) [ self.assert_allclose(z, k) for (z, k) in zip(z_grad_y_pred, k_grad_y_pred) ]
def test_reshape(self): a = np.random.random((2, 2, 3, 4)) i1 = ZLayer.Input(shape=(2, 3, 4)) s = ZLayer.Reshape((-1, 2, 12))(i1) m = ZModel(i1, s) # predict should not generate exception y = m.predict(a, distributed=False)
def test_load(self): input = ZLayer.Input(shape=(5, )) output = ZLayer.Dense(10)(input) zmodel = ZModel(input, output, name="graph1") tmp_path = create_tmp_path() zmodel.saveModel(tmp_path, None, True) model_reloaded = Net.load(tmp_path) input_data = np.random.random([3, 5]) self.compare_output_and_grad_input(zmodel, model_reloaded, input_data)
def test_unsqueeze_1D(self): data = np.random.randn(4, ) parameter = Parameter(shape=(4, ), init_weight=data) out = autograd.expand_dims(parameter, axis=0) model = Model(input=parameter, output=out) result = model.forward(data) np.testing.assert_almost_equal(result, np.expand_dims(data, axis=0), decimal=5)
def test_ExpandDim(self): inputdata = np.array([2, 1, 6]) input = Parameter(shape=(3, ), init_weight=inputdata) expand = ExpandDim(dim=0)(input) model = Model(input, expand) assert model.get_output_shape() == (1, 3) desired = inputdata.reshape(1, 3) outputdata = model.forward(inputdata) np.testing.assert_almost_equal(outputdata, desired, decimal=4)
def test_save_load_Model(self): input = ZLayer.Input(shape=(5, )) output = ZLayer.Dense(10)(input) zmodel = ZModel(input, output, name="graph1") tmp_path = create_tmp_path() zmodel.saveModel(tmp_path, None, True) model_reloaded = Net.load(tmp_path) input_data = np.random.random([10, 5]) y = np.random.random([10, 10]) model_reloaded.compile(optimizer="adam", loss="mse") model_reloaded.fit(x=input_data, y=y, batch_size=8, nb_epoch=2)
def __init__(self, n_block, n_head, intermediate_size, hidden_drop, attn_drop, initializer_range, output_all_block, embedding_layer, input_shape, bigdl_type="float"): self.hidden_drop = hidden_drop self.attn_drop = attn_drop self.n_head = n_head self.intermediate_size = intermediate_size self.output_all_block = output_all_block self.bigdl_type = bigdl_type self.seq_len = input_shape[0][0] self.initializer_range = initializer_range self.bidirectional = True self.n_block = n_block word_input = Input(shape=input_shape[0]) token_type_input = Input(shape=input_shape[1]) position_input = Input(shape=input_shape[2]) attention_mask = Input(shape=input_shape[3]) e = embedding_layer([word_input, token_type_input, position_input]) self.hidden_size = e.get_output_shape()[-1] extended_attention_mask = (-attention_mask + 1.0) * -10000.0 next_input = e model_output = [None] * n_block model_output[0] = self.block(next_input, self.hidden_size, extended_attention_mask) for _ in range(n_block - 1): output = self.block(model_output[_], self.hidden_size, extended_attention_mask) model_output[_ + 1] = output pooler_output = self.pooler(model_output[-1], self.hidden_size) if output_all_block: model_output.append(pooler_output) model = Model( [word_input, token_type_input, position_input, attention_mask], model_output) else: model = Model( [word_input, token_type_input, position_input, attention_mask], [model_output[-1], pooler_output]) self.value = model.value
def build_model(self): # Remark: Share weights for embedding is not supported. # Thus here the model takes concatenated input and slice to split the input. input = Input(name='input', shape=(self.text1_length + self.text2_length, )) embedding = Embedding(self.vocab_size, self.embed_size, weights=self.embed_weights, trainable=self.train_embed)(input) query_embed = embedding.slice(1, 0, self.text1_length) doc_embed = embedding.slice(1, self.text1_length, self.text2_length) mm = A.batch_dot(query_embed, doc_embed, axes=[2, 2]) # Translation Matrix. KM = [] for i in range(self.kernel_num): mu = 1. / (self.kernel_num - 1) + (2. * i) / (self.kernel_num - 1) - 1.0 sigma = self.sigma if mu > 1.0: # Exact match. sigma = self.exact_sigma mu = 1.0 mm_exp = A.exp(-0.5 * (mm - mu) * (mm - mu) / sigma / sigma) mm_doc_sum = A.sum(mm_exp, 2) mm_log = A.log(mm_doc_sum + 1.0) # Remark: Keep the reduced dimension for the last sum and squeeze after stack. # Otherwise, when batch=1, the output will become a Scalar not compatible for stack. mm_sum = A.sum(mm_log, 1, keepDims=True) KM.append(mm_sum) Phi = Squeeze(2)(A.stack(KM, 1)) output = Dense(1, init="uniform", activation="sigmoid")(Phi) model = Model(input=input, output=output) return model
def test_expose_node(self): image_shape = [3, 16, 16] input_shape = [2] + image_shape input = Input(shape=input_shape, name="input1") def l1(x): x1 = x.index_select(1, 0) # input is [B, 2, 3, 16, 16] x2 = x.index_select(1, 0) return abs(x1 - x2) output = Lambda(function=l1)(input) model = Model(input, output) mock_data = np.random.uniform(0, 1, [10] + input_shape) out_data = model.forward(mock_data) assert out_data.shape == (10, 3, 16, 16)
def __init__(self, n_block, resid_drop, attn_drop, n_head, mask_attention, embedding_layer, input_shape, bigdl_type="float"): self.resid_drop = resid_drop self.attn_drop = attn_drop self.n_head = n_head self.mask_attention = mask_attention self.seq_len = input_shape[0] self.bigdl_type = bigdl_type if mask_attention: mask_value = np.tril(np.ones((self.seq_len, self.seq_len), dtype=bigdl_type)) self.mask_value = auto.Constant(data=mask_value.reshape((1, 1, self.seq_len, self.seq_len))) input = Input(shape=list(input_shape)) embedding = embedding_layer(input) hidden_size = embedding.get_output_shape()[-1] next_input = embedding for _ in range(n_block): output = self.block(next_input, hidden_size) next_input = output model = Model(input, next_input) self.value = model.value
def __init__(self, n_block, hidden_drop, attn_drop, n_head, initializer_range, bidirectional, output_all_block, embedding_layer, input_shape, intermediate_size=0, bigdl_type="float"): self.hidden_drop = hidden_drop self.attn_drop = attn_drop self.n_head = n_head self.initializer_range = initializer_range self.output_all_block = output_all_block self.bidirectional = bidirectional self.intermediate_size = intermediate_size self.seq_len = input_shape[0][0] self.bigdl_type = bigdl_type if not bidirectional: mask_value = np.tril( np.ones((self.seq_len, self.seq_len), dtype=bigdl_type)) self.mask_value = auto.Constant( data=mask_value.reshape((1, 1, self.seq_len, self.seq_len))) (extended_attention_mask, embedding_inputs, inputs) = self.build_input(input_shape) embedding = embedding_layer(embedding_inputs) hidden_size = embedding.get_output_shape()[-1] next_input = embedding output = [None] * n_block output[0] = self.block(next_input, hidden_size, extended_attention_mask) for index in range(n_block - 1): o = self.block(output[index], hidden_size, extended_attention_mask) output[index + 1] = o pooler_output = self.pooler(output[-1], hidden_size) model = Model(inputs, output.append(pooler_output)) if output_all_block\ else Model(inputs, [output[-1], pooler_output]) self.value = model.value
def build_model(self): encoder_input = Input(name="encoder_input", shape=self.input_shape) decoder_input = Input(name="decoder_input", shape=self.output_shape) encoder_output = self.encoder(encoder_input) encoder_final_states = SelectTable(1)(encoder_output) decoder_init_states =\ self.bridge(encoder_final_states) if self.bridge else encoder_final_states decoder_output = self.decoder([decoder_input, decoder_init_states]) output = self.generator(decoder_output) if self.generator else decoder_output return Model([encoder_input, decoder_input], output)
def compare_binary_op(self, kk_func, z_layer, shape, rtol=1e-5, atol=1e-5): x = klayers.Input(shape=shape[0][1:]) y = klayers.Input(shape=shape[1][1:]) batch = shape[0][0] kkresult = kk_func(x, y) x_value = np.random.uniform(0, 1, shape[0]) y_value = np.random.uniform(0, 1, shape[1]) k_grads = KK.get_session().run(KK.gradients(kkresult, [x, y]), feed_dict={ x: x_value, y: y_value }) k_output = KK.get_session().run(kkresult, feed_dict={ x: x_value, y: y_value }) inputs = [Input(s) for s in remove_batch(shape)] model = Model(inputs, z_layer(inputs)) z_output = model.forward([x_value, y_value]) grad_output = np.array(z_output) grad_output.fill(1.0) z_grads = model.backward([x_value, y_value], grad_output) # Check if the model can be forward/backward multiple times or not z_output2 = model.forward([x_value, y_value]) z_grads2 = model.backward([x_value, y_value], grad_output) self.assert_allclose(z_output, z_output2, rtol, atol) [ self.assert_allclose(z, k, rtol, atol) for (z, k) in zip(z_grads, z_grads2) ] self.assert_allclose(z_output, k_output, rtol, atol) [ self.assert_allclose(z, k, rtol, atol) for (z, k) in zip(z_grads, k_grads) ]
def to_model(self): from zoo.pipeline.api.keras.models import Model return Model.from_jvalue(callBigDlFunc(self.bigdl_type, "kerasNetToModel", self.value))
token_shape = (max_len, ) position_shape = (max_len, ) token_input = Input(shape=token_shape) position_input = Input(shape=position_shape) O_seq = TransformerLayer.init(vocab=max_features, hidden_size=128, n_head=8, seq_len=max_len)([token_input, position_input]) # Select the first output of the Transformer. The second is the pooled output. O_seq = SelectTable(0)(O_seq) O_seq = GlobalAveragePooling1D()(O_seq) O_seq = Dropout(0.2)(O_seq) outputs = Dense(2, activation='softmax')(O_seq) model = Model([token_input, position_input], outputs) model.summary() batch_size = 128 print('Train...') est = Estimator.from_bigdl(model=model, loss=SparseCategoricalCrossEntropy(), optimizer=Adam(), metrics=[Accuracy()]) est.fit(data=train_dataset, batch_size=batch_size, epochs=1) print("Train finished.") print('Evaluating...') result = est.evaluate(val_dataset) print(result)
def init(cls, vocab=40990, hidden_size=768, n_block=12, n_head=12, seq_len=512, intermediate_size=3072, hidden_drop=0.1, attn_drop=0.1, initializer_range=0.02, output_all_block=True, bigdl_type="float"): """ vocab: vocabulary size of training data, default is 40990 hidden_size: size of the encoder layers, default is 768 n_block: block number, default is 12 n_head: head number, default is 12 seq_len: max sequence length of training data, default is 77 intermediate_size: The size of the "intermediate" (i.e., feed-forward) hidden_drop: drop probability of full connected layers, default is 0.1 attn_drop: drop probability of attention, default is 0.1 initializer_ranger: weight initialization range, default is 0.02 output_all_block: whether output all blocks' output, default is True """ word_input = Input(shape=(seq_len, )) token_type_input = Input(shape=(seq_len, )) position_input = Input(shape=(seq_len, )) word_embedding = Embedding(vocab, hidden_size, input_length=seq_len, weights=np.random.normal( 0.0, initializer_range, (vocab, hidden_size)))(word_input) position_embedding = Embedding( seq_len, hidden_size, input_length=seq_len, weights=np.random.normal(0.0, initializer_range, (seq_len, hidden_size)))(position_input) token_type_embedding = Embedding( 2, hidden_size, input_length=seq_len, weights=np.random.normal(0.0, initializer_range, (2, hidden_size)))(token_type_input) embedding = word_embedding + position_embedding + token_type_embedding w = auto.Parameter(shape=(1, hidden_size), init_weight=np.ones((1, hidden_size), dtype=bigdl_type)) b = auto.Parameter(shape=(1, hidden_size), init_weight=np.zeros((1, hidden_size), dtype=bigdl_type)) after_norm = layer_norm(embedding, w, b, 1e-12) h = Dropout(hidden_drop)(after_norm) embedding_layer = Model([word_input, token_type_input, position_input], h) shape = ((seq_len, ), (seq_len, ), (seq_len, ), (1, 1, seq_len)) return BERT(n_block, n_head, intermediate_size, hidden_drop, attn_drop, initializer_range, output_all_block, embedding_layer, input_shape=shape)
convolve_net.add( Dense( output_dim=FC_LINEAR_DIMENSION, # 尺寸: 32 -> 64. activation="sigmoid")) # BigDL 不支持 parameter sharing, 不得已而为之. both_feature = TimeDistributed(layer=convolve_net, input_shape=input_shape)(both_input) encode_left = both_feature.index_select(1, 0) encode_right = both_feature.index_select(1, 1) distance = autograd.abs(encode_left - encode_right) predict = Dense(output_dim=NUM_CLASS_LABEL, activation="sigmoid")(distance) siamese_net = Model(input=both_input, output=predict) siamese_net.compile(optimizer="adam", loss='sparse_categorical_crossentropy', metrics=["accuracy"]) # 构造分布式的数据集对象. data_set = TFDataset.from_rdd(train_rdd, shapes=[input_shape, [1]], batch_size=args.batch_size, val_rdd=test_rdd) optimizer = TFOptimizer.from_keras(siamese_net, data_set) app_name = "Siamese Network" optimizer.set_train_summary(TrainSummary("tmp", app_name)) optimizer.set_val_summary(ValidationSummary("tmp", app_name))
W_regularizer=L2Regularizer(args.penalty_rate))) convolve_net.add(Dropout(args.dropout_rate)) # BigDL 不支持 parameter sharing, 不得已而为之. both_feature = TimeDistributed(layer=convolve_net, input_shape=input_shape)(both_input) encode_left = both_feature.index_select(1, 0) encode_right = both_feature.index_select(1, 1) distance = autograd.abs(encode_left - encode_right) predict = Dense(output_dim=NUM_CLASS_LABEL, activation="sigmoid", W_regularizer=L2Regularizer(args.penalty_rate))(distance) siamese_net = Model(input=both_input, output=predict) # 声明优化器, 训练并测试模型. optimizer = Optimizer(model=siamese_net, training_rdd=train_rdd, optim_method=Adam(args.learning_rate), criterion=CrossEntropyCriterion(), end_trigger=MaxEpoch(args.num_epoch), batch_size=args.batch_size) optimizer.set_validation(batch_size=args.batch_size, val_rdd=test_rdd, trigger=EveryEpoch(), val_method=[Top1Accuracy()]) # 设置训练日志, 可用 TensorBoard 查询. app_name = "logs"
xmb[:, :, 0] = x_train xmb_val = np.zeros((len(x_test), max_len, 2), dtype=np.int32) # Position information that is added to the input embeddings in the TransformerModel xmb_val[:, :, 1] = np.arange(max_len) xmb_val[:, :, 0] = x_test S_inputs = Input(shape=(max_len, 2)) O_seq = TransformerLayer.init_with_default_embedding(vocab=max_features, hidden_size=128, n_head=8, seq_len=max_len)(S_inputs) O_seq = GlobalAveragePooling1D()(O_seq) O_seq = Dropout(0.2)(O_seq) outputs = Dense(2, activation='softmax')(O_seq) model = Model(S_inputs, outputs) model.summary() model.compile(optimizer=Adam(), loss="sparse_categorical_crossentropy", metrics=['accuracy']) batch_size = 160 print('Train...') model.fit(xmb, y_train, batch_size=batch_size, nb_epoch=1) print("Train finished.") print('Evaluating...') score = model.evaluate(xmb_val, y_test, batch_size=160)[0] print(score)
def bigdl_estimator(): from zoo.orca.learn.bigdl.estimator import Estimator from tensorflow.python.keras.datasets import imdb from tensorflow.python.keras.preprocessing import sequence from zoo.pipeline.api.keras.models import Model from zoo.pipeline.api.keras.objectives import SparseCategoricalCrossEntropy from zoo.orca.data import XShards from zoo.orca.learn.metrics import Accuracy import numpy as np # conf = {"spark.executor.extraJavaOptions": "-Xss512m", "spark.driver.extraJavaOptions": "-Xss512m"} # init_orca_context(cluster_mode="local", cores=8, memory="16g") init_orca_context(cluster_mode="local", cores=4, memory="16g") max_features = 200 max_len = 20 print("running bigdl estimator") (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) x_train = x_train[:1000] y_train = y_train[:1000] x_test = x_test[-1000:] y_test = y_test[-1000:] print(len(x_train), 'train sequences') print(len(x_test), 'test sequences') print('Pad sequences (samples x time)') x_train = sequence.pad_sequences(x_train, maxlen=max_len) x_test = sequence.pad_sequences(x_test, maxlen=max_len) print('x_train shape:', x_train.shape) print('x_test shape:', x_test.shape) train_pos = np.zeros((len(x_train), max_len), dtype=np.int32) val_pos = np.zeros((len(x_test), max_len), dtype=np.int32) for i in range(0, len(x_train)): train_pos[i, :] = np.arange(max_len) val_pos[i, :] = np.arange(max_len) train_dataset = XShards.partition({"x": (x_train, train_pos), "y": np.array(y_train)}) val_dataset = XShards.partition({"x": (x_test, val_pos), "y": np.array(y_test)}) token_shape = (max_len,) position_shape = (max_len,) token_input = Input(shape=token_shape) position_input = Input(shape=position_shape) O_seq = TransformerLayer.init(vocab=max_features, hidden_size=128, n_head=8, seq_len=max_len)([token_input, position_input]) # Select the first output of the Transformer. The second is the pooled output. O_seq = SelectTable(0)(O_seq) O_seq = GlobalAveragePooling1D()(O_seq) O_seq = Dropout(0.2)(O_seq) outputs = Dense(2, activation='softmax')(O_seq) model = Model([token_input, position_input], outputs) model.summary() batch_size = 64 print("Train started") est = Estimator.from_bigdl(model=model, loss=SparseCategoricalCrossEntropy(), optimizer=Adam(), metrics=[Accuracy()]) est.set_constant_gradient_clipping(0.1, 0.2) est.fit(data=train_dataset, batch_size=batch_size, epochs=1) result = est.evaluate(val_dataset) print(result) est.clear_gradient_clipping() est.set_l2_norm_gradient_clipping(0.5) est.fit(data=train_dataset, batch_size=batch_size, epochs=1) print("Train finished") print("Evaluating started") result = est.evaluate(val_dataset) print(result) print("Evaluating finished") est.save('work/saved_model') # est.load('work/saved_model') print("load and save API finished") est.get_train_summary(tag='Loss') est.get_validation_summary(tag='Top1Accuracy') print("get summary API finished") stop_orca_context()
spark = SparkSession.builder \ .config(conf=conf) \ .getOrCreate() # Init Big DL Engine init_engine() parkingInput2 = Input(shape=(inputs,)) print(parkingInput2.shape) denseLayer2 = Dense(output_dim=inputs, activation="relu") hidden2 = denseLayer2(parkingInput2) lastLayer2 = Dense(output_dim=outputs,activation="relu")(hidden2) zooModel = Model(input=parkingInput2, output=lastLayer2, name="functionalModel2") # model2 = Model(inputs=[parkingInput2], outputs=[lastLayer2]) log_dir = "../resources/board/model_log" app_name = "zooKeras" zooModel.set_tensorboard(log_dir = log_dir, app_name=app_name) zooModel.compile(optimizer='adam', loss='mean_squared_error') zooModel.fit(x=x.to_numpy(), y=y.to_numpy(), nb_epoch=2, distributed=False) zooModel.summary() weights2 = zooModel.get_weights() layers = zooModel.layers
def test_deprecated_save(self): with pytest.raises(Exception) as e_info: input = ZLayer.Input(shape=(5, )) output = ZLayer.Dense(10)(input) zmodel = ZModel(input, output, name="graph1") zmodel.save(create_tmp_path())
token_shape = (max_len, ) position_shape = (max_len, ) token_input = Input(shape=token_shape) position_input = Input(shape=position_shape) O_seq = TransformerLayer.init(vocab=max_features, hidden_size=128, n_head=8, seq_len=max_len)([token_input, position_input]) # Select the first output of the Transformer. The second is the pooled output. O_seq = SelectTable(0)(O_seq) O_seq = GlobalAveragePooling1D()(O_seq) O_seq = Dropout(0.2)(O_seq) outputs = Dense(2, activation='softmax')(O_seq) model = Model([token_input, position_input], outputs) model.summary() model.compile(optimizer=Adam(), loss="sparse_categorical_crossentropy", metrics=['accuracy']) batch_size = 160 print('Train...') model.fit(train_rdd, batch_size=batch_size, nb_epoch=1) print("Train finished.") print('Evaluating...') score = model.evaluate(val_rdd, batch_size=160)[0] print(score)