def test_read_run_two(self):
     res = '\nWord bear occurs 2 times.\n'
     result = run([
         '',
         'bear',
     ], db_name, table_name)
     self.assertEqual(result, res)
 def test_read_run_none(self):
     res = '\nWord 9999 occurs 0 times.\n'
     result = run([
         '',
         '9999',
     ], db_name, table_name)
     self.assertEqual(result, res)
 def test_read_run(self):
     res = '\nWord person occurs 3 times.\n'
     result = run([
         '',
         'person',
     ], db_name, table_name)
     self.assertEqual(result, res)
示例#4
0
def main(clean_folder: str, pretrained_embedding_location: str,
         max_length: int, pad_token: str, unk_token: str, ready_folder: str):

    # read all files
    (train_sentences, train_relations, valid_sentences, valid_relations,
     test_sentences, test_relations,
     embeddings) = read.run(clean_folder, pretrained_embedding_location)

    # xd: sentence
    add_pad_unk(embeddings, pad_token, unk_token)

    train_sentences_, train_case_seqs = adjust_sentence(
        train_sentences, max_length, pad_token, embeddings, unk_token)
    valid_sentences_, valid_case_seqs = adjust_sentence(
        valid_sentences, max_length, pad_token, embeddings, unk_token)
    test_sentences_, test_case_seqs = adjust_sentence(test_sentences,
                                                      max_length, pad_token,
                                                      embeddings, unk_token)

    # filter unused words
    sentences = train_sentences_ + valid_sentences_ + test_sentences_
    word_lookup, word_embedding = filter_unused_word(embeddings, sentences)

    # yd: relation
    train_relations_ = adjust_relation(train_relations, max_length)
    valid_relations_ = adjust_relation(valid_relations, max_length)
    test_relations_ = adjust_relation(test_relations, max_length)

    # write sentence and relation
    write.run(train_sentences_, valid_sentences_, test_sentences_, word_lookup,
              word_embedding, train_case_seqs, valid_case_seqs, test_case_seqs,
              train_relations_, valid_relations_, test_relations_,
              ready_folder)
 def test_read_run_none(self):
     res = '\nThere are no results to show.\n'
     result = run([
         '',
         '9999',
     ], db_name, table_name)
     self.assertEqual(result, res)
 def test_read_run(self):
     res = "WORD             ASSOC            DISTANCE         COUNT            \nlikes            to               0                1                \nlikes            fish             1                1                \nlikes            twice            2                1                \n"
     returned = run([
         '',
         'likes',
     ], db_name, table_name)
     self.maxDiff = None
     self.assertEqual(returned, res)
 def test_read_all(self):
     res = "WORD             ASSOC            DISTANCE         COUNT            \n" \
         "bear             likes            0                1                \n" \
         "fish             twice            0                1                \n" \
         "likes            to               0                1                \n" \
         "to               fish             0                1                \n" \
         "bear             to               1                1                \n" \
         "likes            fish             1                1                \n" \
         "to               twice            1                1                \n" \
         "bear             fish             2                1                \n" \
         "likes            twice            2                1                \n"
     returned = run([
         '',
     ], db_name, table_name)
     self.maxDiff = None
     self.assertEqual(returned, res)
示例#8
0
import read

read.run()
 def test_read_all(self):
     res = "\nWord         Count\nperson           3                \nbear             2                \nfish             1                \n"
     result = run([
         '',
     ], db_name, table_name)
     self.assertEqual(result, res)