def prepare_sentence_data(datapath, vocab_path, embedding_path=None, embedding='glove', embedd_dim=100, prompt_id=1, vocab_size=0, tokenize_text=True, to_lower=True, sort_by_len=False):

    assert len(datapath) == 1, "data paths should include train, dev and test path"
    (train_x, train_y, train_prompts), vocab, overal_maxlen, overal_maxnum = reader.get_data(datapath, vocab_path, prompt_id, vocab_size, tokenize_text=True, to_lower=True, sort_by_len=False)
        
        

    X_train, y_train, mask_train = utils.padding_sentence_sequences(train_x, train_y, overal_maxnum, overal_maxlen, post_padding=True)
    
        
    train_mean = y_train.mean(axis=0)
    train_std = y_train.std(axis=0)
    
    # Convert scores to boundary of [0 1] for training and evaluation (loss calculation)
    #######################################
    #这里我改了   prompt_id ->train_prompts
    #######################################
    if prompt_id ==-1:
        Y_train = reader.get_model_friendly_scores(y_train, train_prompts)
        scaled_train_mean = Y_train.mean()
    elif prompt_id!=-1:
        Y_train = reader.get_model_friendly_scores(y_train, prompt_id)
        scaled_train_mean = reader.get_model_friendly_scores(train_mean, prompt_id)
    # print Y_train.shape

    logger.info('Statistics:')

    logger.info('  train X shape: ' + str(X_train.shape))
    logger.info('  train Y shape: ' + str(Y_train.shape))
    logger.info('  train_y mean: %s, stdev: %s, train_y mean after scaling: %s' %
                (str(train_mean), str(train_std), str(scaled_train_mean)))

    if embedding_path:
        embedd_dict, embedd_dim, _ = utils.load_word_embedding_dict(embedding, embedding_path, vocab, logger, embedd_dim)
        embedd_matrix = utils.build_embedd_table(vocab, embedd_dict, embedd_dim, logger, caseless=True)
    else:
        embedd_matrix = None

    return (X_train, Y_train, mask_train), vocab, len(vocab), embedd_matrix, overal_maxlen, overal_maxnum, scaled_train_mean
def createModel(input_data,
                in_vocabulary_size,
                sequence_length,
                slots,
                slot_size,
                intent_size,
                layer_size=128,
                isTraining=True,
                embed_dim=64):
    cell_fw = tf.contrib.rnn.BasicLSTMCell(layer_size)
    cell_bw = tf.contrib.rnn.BasicLSTMCell(layer_size)

    if isTraining == True:
        cell_fw = tf.contrib.rnn.DropoutWrapper(cell_fw,
                                                input_keep_prob=0.5,
                                                output_keep_prob=0.5)
        cell_bw = tf.contrib.rnn.DropoutWrapper(cell_bw,
                                                input_keep_prob=0.5,
                                                output_keep_prob=0.5)

    if arg.use_bert:
        # we already have the embeddings in this case
        inputs = input_data
    else:
        if arg.embedding_path:
            embeddings_dict = load_embedding(arg.embedding_path)
            word_alphabet = create_full_vocabulary()
            embeddings_weight = build_embedd_table(word_alphabet,
                                                   embeddings_dict,
                                                   embedd_dim=embed_dim,
                                                   caseless=True)
            embedding = tf.get_variable(
                name="embedding",
                shape=embeddings_weight.shape,
                initializer=tf.constant_initializer(embeddings_weight),
                trainable=True)
        else:
            embedding = tf.get_variable('embedding',
                                        [in_vocabulary_size, embed_dim])
        print("embedding shape", embedding.shape)
        inputs = tf.nn.embedding_lookup(embedding, input_data)

    state_outputs, final_state = tf.nn.bidirectional_dynamic_rnn(
        cell_fw,
        cell_bw,
        inputs,
        sequence_length=sequence_length,
        dtype=tf.float32)
    final_state = tf.concat([
        final_state[0][0], final_state[0][1], final_state[1][0],
        final_state[1][1]
    ], 1)
    state_outputs = tf.concat([state_outputs[0], state_outputs[1]], 2)
    state_shape = state_outputs.get_shape()

    with tf.variable_scope('attention'):
        slot_inputs = state_outputs
        if not remove_slot_attn:
            with tf.variable_scope('slot_attn'):
                attn_size = state_shape[2].value
                origin_shape = tf.shape(state_outputs)
                hidden = tf.expand_dims(state_outputs, 1)
                hidden_conv = tf.expand_dims(state_outputs, 2)
                k = tf.get_variable("AttnW", [1, 1, attn_size, attn_size])
                hidden_features = tf.nn.conv2d(hidden_conv, k, [1, 1, 1, 1],
                                               "SAME")
                hidden_features = tf.reshape(hidden_features, origin_shape)
                hidden_features = tf.expand_dims(hidden_features, 1)
                v = tf.get_variable("AttnV", [attn_size])
                slot_inputs_shape = tf.shape(slot_inputs)
                slot_inputs = tf.reshape(slot_inputs, [-1, attn_size])
                y = core_rnn_cell._linear(slot_inputs, attn_size, True)
                y = tf.reshape(y, slot_inputs_shape)
                y = tf.expand_dims(y, 2)
                s = tf.reduce_sum(v * tf.tanh(hidden_features + y), [3])
                a = tf.nn.softmax(s)
                a = tf.expand_dims(a, -1)
                slot_d = tf.reduce_sum(a * hidden, [2])
                slot_reinforce_state = tf.expand_dims(slot_d, 2)
        else:
            attn_size = state_shape[2].value
            slot_d = slot_inputs
            slot_reinforce_state = tf.expand_dims(slot_inputs, 2)
            slot_inputs = tf.reshape(slot_inputs, [-1, attn_size])

        intent_input = final_state
        with tf.variable_scope('intent_attn'):
            attn_size = state_shape[2].value
            hidden = tf.expand_dims(state_outputs, 2)
            k = tf.get_variable("AttnW", [1, 1, attn_size, attn_size])
            hidden_features = tf.nn.conv2d(hidden, k, [1, 1, 1, 1], "SAME")
            v = tf.get_variable("AttnV", [attn_size])

            y = core_rnn_cell._linear(intent_input, attn_size, True)
            y = tf.reshape(y, [-1, 1, 1, attn_size])
            s = tf.reduce_sum(v * tf.tanh(hidden_features + y), [2, 3])
            a = tf.nn.softmax(s)
            a = tf.expand_dims(a, -1)
            a = tf.expand_dims(a, -1)
            d = tf.reduce_sum(a * hidden, [1, 2])
            r_intent = d
            intent_context_states = d

        if arg.priority_order == 'intent_first':
            for n in range(arg.iteration_num):
                with tf.variable_scope('intent_subnet' + str(n - 1)):
                    attn_size = state_shape[2].value
                    hidden = tf.expand_dims(state_outputs, 2)
                    k1 = tf.get_variable("W1", [1, 1, attn_size, attn_size])
                    k2 = tf.get_variable('W2', [1, 1, attn_size, attn_size])
                    slot_reinforce_features = tf.nn.conv2d(
                        slot_reinforce_state, k1, [1, 1, 1, 1], "SAME")
                    hidden_features = tf.nn.conv2d(hidden, k2, [1, 1, 1, 1],
                                                   "SAME")
                    v1 = tf.get_variable("AttnV", [attn_size])
                    bias = tf.get_variable("Bias", [attn_size])
                    s = tf.reduce_sum(
                        v1 * tf.tanh(hidden_features +
                                     slot_reinforce_features + bias), [2, 3])
                    a = tf.nn.softmax(s)
                    a = tf.expand_dims(a, -1)
                    a = tf.expand_dims(a, -1)
                    r = tf.reduce_sum(a * slot_reinforce_state, [1, 2])

                    r_intent = r + intent_context_states

                    intent_output = tf.concat([r_intent, intent_input], 1)

                with tf.variable_scope('slot_subnet' + str(n - 1)):
                    intent_gate = core_rnn_cell._linear(
                        r_intent, attn_size, True)
                    intent_gate = tf.reshape(
                        intent_gate,
                        [-1, 1, intent_gate.get_shape()[1].value])
                    v1 = tf.get_variable("gateV", [attn_size])
                    relation_factor = v1 * tf.tanh(slot_d + intent_gate)
                    relation_factor = tf.reduce_sum(relation_factor, [2])
                    relation_factor = tf.expand_dims(relation_factor, -1)
                    slot_reinforce_state1 = slot_d * relation_factor
                    slot_reinforce_state = tf.expand_dims(
                        slot_reinforce_state1, 2)
                    slot_reinforce_vector = tf.reshape(slot_reinforce_state1,
                                                       [-1, attn_size])
                    slot_output = tf.concat(
                        [slot_reinforce_vector, slot_inputs], 1)

        else:
            for n in range(arg.iteration_num):
                with tf.variable_scope('slot_subnet' + str(n - 1)):
                    intent_gate = core_rnn_cell._linear(
                        r_intent, attn_size, True)
                    intent_gate = tf.reshape(
                        intent_gate,
                        [-1, 1, intent_gate.get_shape()[1].value])
                    v1 = tf.get_variable("gateV", [attn_size])
                    relation_factor = v1 * tf.tanh(slot_d + intent_gate)
                    relation_factor = tf.reduce_sum(relation_factor, [2])
                    relation_factor = tf.expand_dims(relation_factor, -1)
                    slot_reinforce_state = slot_d * relation_factor
                    slot_reinforce_vector = tf.reshape(slot_reinforce_state,
                                                       [-1, attn_size])
                    slot_output = tf.concat(
                        [slot_reinforce_vector, slot_inputs], 1)

                with tf.variable_scope('intent_subnet' + str(n - 1)):
                    attn_size = state_shape[2].value
                    hidden = tf.expand_dims(state_outputs, 2)
                    slot_reinforce_output = tf.expand_dims(
                        slot_reinforce_state, 2)
                    k1 = tf.get_variable("W1", [1, 1, attn_size, attn_size])
                    k2 = tf.get_variable('W2', [1, 1, attn_size, attn_size])
                    slot_features = tf.nn.conv2d(slot_reinforce_output, k1,
                                                 [1, 1, 1, 1], "SAME")
                    hidden_features = tf.nn.conv2d(hidden, k2, [1, 1, 1, 1],
                                                   "SAME")
                    v1 = tf.get_variable("AttnV", [attn_size])
                    bias = tf.get_variable("Bias", [attn_size])
                    s = tf.reduce_sum(
                        v1 * tf.tanh(hidden_features + slot_features + bias),
                        [2, 3])
                    a = tf.nn.softmax(s)
                    a = tf.expand_dims(a, -1)
                    a = tf.expand_dims(a, -1)
                    r = tf.reduce_sum(a * slot_reinforce_output, [1, 2])

                    r_intent = r + intent_context_states

                    intent_output = tf.concat([r_intent, intent_input], 1)

    with tf.variable_scope('intent_proj'):
        intent = core_rnn_cell._linear(intent_output, intent_size, True)
    with tf.variable_scope('slot_proj'):
        slot = core_rnn_cell._linear(slot_output, slot_size, True)
        if arg.use_crf:
            nstep = tf.shape(state_outputs)[1]
            slot = tf.reshape(slot, [-1, nstep, slot_size])
    outputs = [slot, intent]
    return outputs
Exemplo n.º 3
0
def prepare_sentence_data(datapaths, embedding_path=None, embedding='word2vec', embedd_dim=100, prompt_id=1, vocab_size=0, tokenize_text=True, \
                         to_lower=True, sort_by_len=False, vocab_path=None, score_index=6):

    assert len(
        datapaths) == 4, "data paths should include train, dev and test path"
    (train_x, train_y, train_prompts, train_ids), (dev_x, dev_y, dev_prompts, dev_ids), (test_x, test_y, test_prompts, test_ids), vocab, overal_maxlen, overal_maxnum = \
        reader.get_data(datapaths, prompt_id, vocab_size, tokenize_text=True, to_lower=True, sort_by_len=False, vocab_path=None, score_index=6)

    train_d, max_sentnum = reader.read_description(datapaths[3],
                                                   vocab,
                                                   len(train_x),
                                                   tokenize_text=True,
                                                   to_lower=True)
    dev_d, max_sentnum = reader.read_description(datapaths[3],
                                                 vocab,
                                                 len(dev_x),
                                                 tokenize_text=True,
                                                 to_lower=True)
    test_d, max_sentnum = reader.read_description(datapaths[3],
                                                  vocab,
                                                  len(test_x),
                                                  tokenize_text=True,
                                                  to_lower=True)

    X_train, y_train, mask_train = utils.padding_sentence_sequences(
        train_x, train_y, overal_maxnum, overal_maxlen, post_padding=True)
    X_dev, y_dev, mask_dev = utils.padding_sentence_sequences(
        dev_x, dev_y, overal_maxnum, overal_maxlen, post_padding=True)
    X_test, y_test, mask_test = utils.padding_sentence_sequences(
        test_x, test_y, overal_maxnum, overal_maxlen, post_padding=True)

    D_train, mask_d_train = utils.padding_des_sequences(train_d,
                                                        max_sentnum,
                                                        overal_maxlen,
                                                        post_padding=True)
    D_dev, mask_d_dev = utils.padding_des_sequences(dev_d,
                                                    max_sentnum,
                                                    overal_maxlen,
                                                    post_padding=True)
    D_test, mask_d_test = utils.padding_des_sequences(test_d,
                                                      max_sentnum,
                                                      overal_maxlen,
                                                      post_padding=True)

    if prompt_id:
        train_pmt = np.array(train_prompts, dtype='int32')
        dev_pmt = np.array(dev_prompts, dtype='int32')
        test_pmt = np.array(test_prompts, dtype='int32')

    train_mean = y_train.mean(axis=0)
    train_std = y_train.std(axis=0)
    dev_mean = y_dev.mean(axis=0)
    dev_std = y_dev.std(axis=0)
    test_mean = y_test.mean(axis=0)
    test_std = y_test.std(axis=0)

    # We need the dev and test sets in the original scale for evaluation
    # dev_y_org = y_dev.astype(reader.get_ref_dtype())
    # test_y_org = y_test.astype(reader.get_ref_dtype())

    # Convert scores to boundary of [0 1] for training and evaluation (loss calculation)
    Y_train = reader.get_model_friendly_scores(y_train, prompt_id)
    Y_dev = reader.get_model_friendly_scores(y_dev, prompt_id)
    Y_test = reader.get_model_friendly_scores(y_test, prompt_id)
    scaled_train_mean = reader.get_model_friendly_scores(train_mean, prompt_id)
    # print Y_train.shape

    logger.info('Statistics:')

    logger.info('  train X shape: ' + str(X_train.shape))
    logger.info('  dev X shape:   ' + str(X_dev.shape))
    logger.info('  test X shape:  ' + str(X_test.shape))

    logger.info('  train Y shape: ' + str(Y_train.shape))
    logger.info('  dev Y shape:   ' + str(Y_dev.shape))
    logger.info('  test Y shape:  ' + str(Y_test.shape))

    logger.info(
        '  train_y mean: %s, stdev: %s, train_y mean after scaling: %s' %
        (str(train_mean), str(train_std), str(scaled_train_mean)))

    if embedding_path:
        embedd_dict, embedd_dim, _ = utils.load_word_embedding_dict(
            embedding, embedding_path, vocab, logger, embedd_dim)
        embedd_matrix = utils.build_embedd_table(vocab,
                                                 embedd_dict,
                                                 embedd_dim,
                                                 logger,
                                                 caseless=True)
    else:
        embedd_matrix = None

    return (X_train, Y_train, D_train, mask_train, train_ids), (X_dev, Y_dev, D_dev, mask_dev, dev_ids), (X_test, Y_test, D_test ,mask_test, test_ids), \
            vocab, len(vocab), embedd_matrix, overal_maxlen, overal_maxnum, max_sentnum, scaled_train_mean