Пример #1
0
def load_testing_data():
    dp.establish_db_connection()
    sequence_list = []
    padded_output = []
    #sentence_numbers = []
    testing_dataset = DBHelperMethod.load_dataset_by_type("testing")
    #testing_dataset = DBHelperMethod.load_dataset_by_type_and_sentence_number_for_testing_purpose("testing", 3228)
    #sentence_numbers.append(3228)
    sentence_numbers = DBHelperMethod.get_list_of_sentence_numbers_by(
        "testing")
    labels_and_equiv_encoding = dp.get_label_table()

    for each_sentence_number in sentence_numbers:
        selected_sentence = testing_dataset[numpy.where(
            testing_dataset[:, 3] == str(each_sentence_number))]
        x, y = pad_data(selected_sentence, selected_sentence[:, [0, 1]],
                        labels_and_equiv_encoding)

        sequence_list.append(x)
        padded_output.append(y)

    padded_input, vocabulary, vocabulary_inv = convert_input_to_vocab(
        sequence_list)
    padded_output = numpy.array(list(chain(*padded_output)))

    testing_words = np.take(testing_dataset, 4, axis=1)
    input_testing_letters = np.take(testing_dataset, 0, axis=1)
    op_testing_letters = np.take(testing_dataset, 5, axis=1)
    sent_num = np.take(testing_dataset, 3, axis=1)
    letters_loc = np.take(testing_dataset, 6, axis=1)
    undiac_word = np.take(testing_dataset, 7, axis=1)

    return padded_input, padded_output, vocabulary, vocabulary_inv, testing_words, input_testing_letters, op_testing_letters,\
           sent_num, letters_loc, undiac_word
Пример #2
0
def get_testing_data():
    DBHelperMethod.connect_to_db()
    training_dataset = DBHelperMethod.load_dataset_by_type("testing")

    x = dp.load_nn_input_dataset_string(training_dataset[:, [0, 6]])
    y = dp.load_nn_labels_dataset_string(training_dataset[:, [0, 1]])

    return x, y
Пример #3
0
def load_testing_data():
    dp.establish_db_connection()
    testing_dataset = DBHelperMethod.load_dataset_by_type("testing")

    # x = dp.load_nn_input_dataset_string(testing_dataset[:, [0, 6]])
    x = dp.load_nn_input_dataset_string_space_only(testing_dataset[:, [0, 6]])
    y = dp.load_nn_labels_dataset_string(testing_dataset[:, [0, 1]])

    sent_num, sen_len = dp.load_nn_seq_lengths(testing_dataset[:, [3]])
    sentences_padded, vocabulary, vocabulary_inv = dp.pad_sentences1(x, sen_len, req_char_index, window_size)

    return sentences_padded, y, vocabulary, vocabulary_inv
Пример #4
0
def load_testing_data():
    dp.establish_db_connection()
    training_dataset = DBHelperMethod.load_dataset_by_type("testing")

    x = dp.load_nn_input_dataset_string_space_only(training_dataset[:, [0, 6]])
    y = dp.load_nn_labels_dataset_string(training_dataset[:, [0, 1]])

    sent_num, sen_len = dp.load_nn_seq_lengths(training_dataset[:, [3]])
    sentences_padded, vocabulary, vocabulary_inv = dp.extract_sent_and_pad(
        x, sen_len, window_size)
    padded_output = dp.extract_sent_and_pad_output(y, sen_len, window_size)

    return sentences_padded, padded_output, vocabulary, vocabulary_inv
Пример #5
0
def load_training_data():
    dp.establish_db_connection()
    training_dataset = DBHelperMethod.load_dataset_by_type("training")

    # x = dp.load_nn_input_dataset_string(training_dataset[:, [0, 6]])
    x = dp.load_nn_input_dataset_string_space_only(training_dataset[:, [0, 6]])
    y = dp.load_nn_labels_dataset_string(training_dataset[:, [0, 1]])

    sent_num, sen_len = dp.load_nn_seq_lengths(training_dataset[:, [3]])
    sentences_padded, vocabulary, vocabulary_inv = dp.build_one_to_one_input_data(
        x, sen_len, req_char_index, window_size)

    return numpy.array(sentences_padded), numpy.array(
        y), vocabulary, vocabulary_inv
Пример #6
0
def load_testing_data():
    dp.establish_db_connection()
    training_dataset = DBHelperMethod.load_dataset_by_type("testing")

    x = dp.load_nn_input_dataset_string_space_only(training_dataset[:, [0, 6]])
    y = dp.load_nn_labels_dataset_string(training_dataset[:, [0, 1]])
    vocab, vocab_inv = dp.build_vocab(x)

    sent_num, sen_len = dp.load_nn_seq_lengths(training_dataset[:, [3]])

    input_sentences = sqp.create_window_of_chars(list(x), window_size)
    input_sentences = dp.build_input_data_without_flattening(
        input_sentences, vocab)
    input_sentences = numpy.array(input_sentences[:-1])

    output_labels = sqp.create_window_of_chars(list(y), window_size)
    output_labels = numpy.array(output_labels[:-1])

    return input_sentences, output_labels, vocab, vocab_inv
Пример #7
0
def load_testing_data():
    dp.establish_db_connection()
    testing_dataset = DBHelperMethod.load_dataset_by_type("testing")
    #testing_dataset = DBHelperMethod.load_dataset_by_type_and_sentence_number_for_testing_purpose("testing", 3062)

    x = dp.load_nn_input_dataset_string_space_only(testing_dataset[:, [0, 6]])
    y = dp.load_nn_labels_dataset_string(testing_dataset[:, [0, 1]])

    sent_num, sen_len = dp.load_nn_seq_lengths(testing_dataset[:, [3]])
    sentences_padded, vocabulary, vocabulary_inv = dp.pad_sentences1(x, sen_len, req_char_index, window_size)

    testing_words = np.take(testing_dataset, 4, axis=1)
    input_testing_letters = np.take(testing_dataset, 0, axis=1)
    op_testing_letters = np.take(testing_dataset, 5, axis=1)
    sent_num = np.take(testing_dataset, 3, axis=1)
    letters_loc = np.take(testing_dataset, 6, axis=1)
    undiac_word = np.take(testing_dataset, 7, axis=1)

    return sentences_padded, y, vocabulary, vocabulary_inv, testing_words, input_testing_letters, op_testing_letters,\
           sent_num, letters_loc, undiac_word
Пример #8
0
def load_testing_data():
    dp.establish_db_connection()
    sequence_list = []
    padded_output = []

    training_dataset = DBHelperMethod.load_dataset_by_type("testing")
    sentence_numbers = DBHelperMethod.get_list_of_sentence_numbers_by(
        "testing")
    labels_and_equiv_encoding = dp.get_label_table()

    for each_sentence_number in sentence_numbers:
        selected_sentence = training_dataset[numpy.where(
            training_dataset[:, 3] == str(each_sentence_number))]
        x, y = pad_data(selected_sentence, selected_sentence[:, [0, 1]],
                        labels_and_equiv_encoding)

        sequence_list.append(x)
        padded_output.append(y)

    padded_input, vocabulary, vocabulary_inv = convert_input_to_vocab(
        sequence_list)
    padded_output = numpy.array(list(chain(*padded_output)))

    return padded_input, padded_output, vocabulary, vocabulary_inv