def prepare_sentence_data(datapath, vocab_path, embedding_path=None, embedding='glove', embedd_dim=100, prompt_id=1, vocab_size=0, tokenize_text=True, to_lower=True, sort_by_len=False): assert len(datapath) == 1, "data paths should include train, dev and test path" (train_x, train_y, train_prompts), vocab, overal_maxlen, overal_maxnum = reader.get_data(datapath, vocab_path, prompt_id, vocab_size, tokenize_text=True, to_lower=True, sort_by_len=False) X_train, y_train, mask_train = utils.padding_sentence_sequences(train_x, train_y, overal_maxnum, overal_maxlen, post_padding=True) train_mean = y_train.mean(axis=0) train_std = y_train.std(axis=0) # Convert scores to boundary of [0 1] for training and evaluation (loss calculation) ####################################### #这里我改了 prompt_id ->train_prompts ####################################### if prompt_id ==-1: Y_train = reader.get_model_friendly_scores(y_train, train_prompts) scaled_train_mean = Y_train.mean() elif prompt_id!=-1: Y_train = reader.get_model_friendly_scores(y_train, prompt_id) scaled_train_mean = reader.get_model_friendly_scores(train_mean, prompt_id) # print Y_train.shape logger.info('Statistics:') logger.info(' train X shape: ' + str(X_train.shape)) logger.info(' train Y shape: ' + str(Y_train.shape)) logger.info(' train_y mean: %s, stdev: %s, train_y mean after scaling: %s' % (str(train_mean), str(train_std), str(scaled_train_mean))) if embedding_path: embedd_dict, embedd_dim, _ = utils.load_word_embedding_dict(embedding, embedding_path, vocab, logger, embedd_dim) embedd_matrix = utils.build_embedd_table(vocab, embedd_dict, embedd_dim, logger, caseless=True) else: embedd_matrix = None return (X_train, Y_train, mask_train), vocab, len(vocab), embedd_matrix, overal_maxlen, overal_maxnum, scaled_train_mean
def createModel(input_data, in_vocabulary_size, sequence_length, slots, slot_size, intent_size, layer_size=128, isTraining=True, embed_dim=64): cell_fw = tf.contrib.rnn.BasicLSTMCell(layer_size) cell_bw = tf.contrib.rnn.BasicLSTMCell(layer_size) if isTraining == True: cell_fw = tf.contrib.rnn.DropoutWrapper(cell_fw, input_keep_prob=0.5, output_keep_prob=0.5) cell_bw = tf.contrib.rnn.DropoutWrapper(cell_bw, input_keep_prob=0.5, output_keep_prob=0.5) if arg.use_bert: # we already have the embeddings in this case inputs = input_data else: if arg.embedding_path: embeddings_dict = load_embedding(arg.embedding_path) word_alphabet = create_full_vocabulary() embeddings_weight = build_embedd_table(word_alphabet, embeddings_dict, embedd_dim=embed_dim, caseless=True) embedding = tf.get_variable( name="embedding", shape=embeddings_weight.shape, initializer=tf.constant_initializer(embeddings_weight), trainable=True) else: embedding = tf.get_variable('embedding', [in_vocabulary_size, embed_dim]) print("embedding shape", embedding.shape) inputs = tf.nn.embedding_lookup(embedding, input_data) state_outputs, final_state = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, inputs, sequence_length=sequence_length, dtype=tf.float32) final_state = tf.concat([ final_state[0][0], final_state[0][1], final_state[1][0], final_state[1][1] ], 1) state_outputs = tf.concat([state_outputs[0], state_outputs[1]], 2) state_shape = state_outputs.get_shape() with tf.variable_scope('attention'): slot_inputs = state_outputs if not remove_slot_attn: with tf.variable_scope('slot_attn'): attn_size = state_shape[2].value origin_shape = tf.shape(state_outputs) hidden = tf.expand_dims(state_outputs, 1) hidden_conv = tf.expand_dims(state_outputs, 2) k = tf.get_variable("AttnW", [1, 1, attn_size, attn_size]) hidden_features = tf.nn.conv2d(hidden_conv, k, [1, 1, 1, 1], "SAME") hidden_features = tf.reshape(hidden_features, origin_shape) hidden_features = tf.expand_dims(hidden_features, 1) v = tf.get_variable("AttnV", [attn_size]) slot_inputs_shape = tf.shape(slot_inputs) slot_inputs = tf.reshape(slot_inputs, [-1, attn_size]) y = core_rnn_cell._linear(slot_inputs, attn_size, True) y = tf.reshape(y, slot_inputs_shape) y = tf.expand_dims(y, 2) s = tf.reduce_sum(v * tf.tanh(hidden_features + y), [3]) a = tf.nn.softmax(s) a = tf.expand_dims(a, -1) slot_d = tf.reduce_sum(a * hidden, [2]) slot_reinforce_state = tf.expand_dims(slot_d, 2) else: attn_size = state_shape[2].value slot_d = slot_inputs slot_reinforce_state = tf.expand_dims(slot_inputs, 2) slot_inputs = tf.reshape(slot_inputs, [-1, attn_size]) intent_input = final_state with tf.variable_scope('intent_attn'): attn_size = state_shape[2].value hidden = tf.expand_dims(state_outputs, 2) k = tf.get_variable("AttnW", [1, 1, attn_size, attn_size]) hidden_features = tf.nn.conv2d(hidden, k, [1, 1, 1, 1], "SAME") v = tf.get_variable("AttnV", [attn_size]) y = core_rnn_cell._linear(intent_input, attn_size, True) y = tf.reshape(y, [-1, 1, 1, attn_size]) s = tf.reduce_sum(v * tf.tanh(hidden_features + y), [2, 3]) a = tf.nn.softmax(s) a = tf.expand_dims(a, -1) a = tf.expand_dims(a, -1) d = tf.reduce_sum(a * hidden, [1, 2]) r_intent = d intent_context_states = d if arg.priority_order == 'intent_first': for n in range(arg.iteration_num): with tf.variable_scope('intent_subnet' + str(n - 1)): attn_size = state_shape[2].value hidden = tf.expand_dims(state_outputs, 2) k1 = tf.get_variable("W1", [1, 1, attn_size, attn_size]) k2 = tf.get_variable('W2', [1, 1, attn_size, attn_size]) slot_reinforce_features = tf.nn.conv2d( slot_reinforce_state, k1, [1, 1, 1, 1], "SAME") hidden_features = tf.nn.conv2d(hidden, k2, [1, 1, 1, 1], "SAME") v1 = tf.get_variable("AttnV", [attn_size]) bias = tf.get_variable("Bias", [attn_size]) s = tf.reduce_sum( v1 * tf.tanh(hidden_features + slot_reinforce_features + bias), [2, 3]) a = tf.nn.softmax(s) a = tf.expand_dims(a, -1) a = tf.expand_dims(a, -1) r = tf.reduce_sum(a * slot_reinforce_state, [1, 2]) r_intent = r + intent_context_states intent_output = tf.concat([r_intent, intent_input], 1) with tf.variable_scope('slot_subnet' + str(n - 1)): intent_gate = core_rnn_cell._linear( r_intent, attn_size, True) intent_gate = tf.reshape( intent_gate, [-1, 1, intent_gate.get_shape()[1].value]) v1 = tf.get_variable("gateV", [attn_size]) relation_factor = v1 * tf.tanh(slot_d + intent_gate) relation_factor = tf.reduce_sum(relation_factor, [2]) relation_factor = tf.expand_dims(relation_factor, -1) slot_reinforce_state1 = slot_d * relation_factor slot_reinforce_state = tf.expand_dims( slot_reinforce_state1, 2) slot_reinforce_vector = tf.reshape(slot_reinforce_state1, [-1, attn_size]) slot_output = tf.concat( [slot_reinforce_vector, slot_inputs], 1) else: for n in range(arg.iteration_num): with tf.variable_scope('slot_subnet' + str(n - 1)): intent_gate = core_rnn_cell._linear( r_intent, attn_size, True) intent_gate = tf.reshape( intent_gate, [-1, 1, intent_gate.get_shape()[1].value]) v1 = tf.get_variable("gateV", [attn_size]) relation_factor = v1 * tf.tanh(slot_d + intent_gate) relation_factor = tf.reduce_sum(relation_factor, [2]) relation_factor = tf.expand_dims(relation_factor, -1) slot_reinforce_state = slot_d * relation_factor slot_reinforce_vector = tf.reshape(slot_reinforce_state, [-1, attn_size]) slot_output = tf.concat( [slot_reinforce_vector, slot_inputs], 1) with tf.variable_scope('intent_subnet' + str(n - 1)): attn_size = state_shape[2].value hidden = tf.expand_dims(state_outputs, 2) slot_reinforce_output = tf.expand_dims( slot_reinforce_state, 2) k1 = tf.get_variable("W1", [1, 1, attn_size, attn_size]) k2 = tf.get_variable('W2', [1, 1, attn_size, attn_size]) slot_features = tf.nn.conv2d(slot_reinforce_output, k1, [1, 1, 1, 1], "SAME") hidden_features = tf.nn.conv2d(hidden, k2, [1, 1, 1, 1], "SAME") v1 = tf.get_variable("AttnV", [attn_size]) bias = tf.get_variable("Bias", [attn_size]) s = tf.reduce_sum( v1 * tf.tanh(hidden_features + slot_features + bias), [2, 3]) a = tf.nn.softmax(s) a = tf.expand_dims(a, -1) a = tf.expand_dims(a, -1) r = tf.reduce_sum(a * slot_reinforce_output, [1, 2]) r_intent = r + intent_context_states intent_output = tf.concat([r_intent, intent_input], 1) with tf.variable_scope('intent_proj'): intent = core_rnn_cell._linear(intent_output, intent_size, True) with tf.variable_scope('slot_proj'): slot = core_rnn_cell._linear(slot_output, slot_size, True) if arg.use_crf: nstep = tf.shape(state_outputs)[1] slot = tf.reshape(slot, [-1, nstep, slot_size]) outputs = [slot, intent] return outputs
def prepare_sentence_data(datapaths, embedding_path=None, embedding='word2vec', embedd_dim=100, prompt_id=1, vocab_size=0, tokenize_text=True, \ to_lower=True, sort_by_len=False, vocab_path=None, score_index=6): assert len( datapaths) == 4, "data paths should include train, dev and test path" (train_x, train_y, train_prompts, train_ids), (dev_x, dev_y, dev_prompts, dev_ids), (test_x, test_y, test_prompts, test_ids), vocab, overal_maxlen, overal_maxnum = \ reader.get_data(datapaths, prompt_id, vocab_size, tokenize_text=True, to_lower=True, sort_by_len=False, vocab_path=None, score_index=6) train_d, max_sentnum = reader.read_description(datapaths[3], vocab, len(train_x), tokenize_text=True, to_lower=True) dev_d, max_sentnum = reader.read_description(datapaths[3], vocab, len(dev_x), tokenize_text=True, to_lower=True) test_d, max_sentnum = reader.read_description(datapaths[3], vocab, len(test_x), tokenize_text=True, to_lower=True) X_train, y_train, mask_train = utils.padding_sentence_sequences( train_x, train_y, overal_maxnum, overal_maxlen, post_padding=True) X_dev, y_dev, mask_dev = utils.padding_sentence_sequences( dev_x, dev_y, overal_maxnum, overal_maxlen, post_padding=True) X_test, y_test, mask_test = utils.padding_sentence_sequences( test_x, test_y, overal_maxnum, overal_maxlen, post_padding=True) D_train, mask_d_train = utils.padding_des_sequences(train_d, max_sentnum, overal_maxlen, post_padding=True) D_dev, mask_d_dev = utils.padding_des_sequences(dev_d, max_sentnum, overal_maxlen, post_padding=True) D_test, mask_d_test = utils.padding_des_sequences(test_d, max_sentnum, overal_maxlen, post_padding=True) if prompt_id: train_pmt = np.array(train_prompts, dtype='int32') dev_pmt = np.array(dev_prompts, dtype='int32') test_pmt = np.array(test_prompts, dtype='int32') train_mean = y_train.mean(axis=0) train_std = y_train.std(axis=0) dev_mean = y_dev.mean(axis=0) dev_std = y_dev.std(axis=0) test_mean = y_test.mean(axis=0) test_std = y_test.std(axis=0) # We need the dev and test sets in the original scale for evaluation # dev_y_org = y_dev.astype(reader.get_ref_dtype()) # test_y_org = y_test.astype(reader.get_ref_dtype()) # Convert scores to boundary of [0 1] for training and evaluation (loss calculation) Y_train = reader.get_model_friendly_scores(y_train, prompt_id) Y_dev = reader.get_model_friendly_scores(y_dev, prompt_id) Y_test = reader.get_model_friendly_scores(y_test, prompt_id) scaled_train_mean = reader.get_model_friendly_scores(train_mean, prompt_id) # print Y_train.shape logger.info('Statistics:') logger.info(' train X shape: ' + str(X_train.shape)) logger.info(' dev X shape: ' + str(X_dev.shape)) logger.info(' test X shape: ' + str(X_test.shape)) logger.info(' train Y shape: ' + str(Y_train.shape)) logger.info(' dev Y shape: ' + str(Y_dev.shape)) logger.info(' test Y shape: ' + str(Y_test.shape)) logger.info( ' train_y mean: %s, stdev: %s, train_y mean after scaling: %s' % (str(train_mean), str(train_std), str(scaled_train_mean))) if embedding_path: embedd_dict, embedd_dim, _ = utils.load_word_embedding_dict( embedding, embedding_path, vocab, logger, embedd_dim) embedd_matrix = utils.build_embedd_table(vocab, embedd_dict, embedd_dim, logger, caseless=True) else: embedd_matrix = None return (X_train, Y_train, D_train, mask_train, train_ids), (X_dev, Y_dev, D_dev, mask_dev, dev_ids), (X_test, Y_test, D_test ,mask_test, test_ids), \ vocab, len(vocab), embedd_matrix, overal_maxlen, overal_maxnum, max_sentnum, scaled_train_mean