Пример #1
0
    def test_insert_static_data_4(self):
        query_string = "item1 item2 item3"
        query = TokenQuery(query_string, self.session)
        query._query_id = 999

        options.cfg.selected_features = ["coquery_query_token"]

        df = query.insert_static_data(self.df)

        self.assertListEqual(
            sorted(df.columns.tolist()),
            sorted([
                "coq_word_label_1", "coq_word_label_2", "coq_word_label_3",
                "coquery_invisible_corpus_id", "coquery_dummy",
                "coquery_invisible_query_id", "coquery_query_token_1",
                "coquery_query_token_2", "coquery_query_token_3"
            ]))

        self.assertListEqual(df.coquery_query_token_1.tolist(),
                             ["item1"] * len(df))

        self.assertListEqual(df.coquery_query_token_2.tolist(),
                             ["item2"] * len(df))

        self.assertListEqual(df.coquery_query_token_3.tolist(),
                             ["item3"] * len(df))
Пример #2
0
    def test_insert_static_data_2(self):
        query = TokenQuery("item1{0,3} item2{1,3} item3{2,3}", self.session)
        query._query_id = 999

        df = query.insert_static_data(self.df)
        self.assertListEqual(df.columns.tolist(), [
            "coq_word_label_1", "coq_word_label_2", "coq_word_label_3",
            "coquery_invisible_corpus_id", "coquery_dummy",
            "coquery_invisible_query_id"
        ])
Пример #3
0
    def test_insert_static_data_2(self):
        query = TokenQuery("item1{0,3} item2{1,3} item3{2,3}", self.session)
        query._query_id = 999

        df = query.insert_static_data(self.df)
        self.assertListEqual(
            df.columns.tolist(),
            ["coq_word_label_1", "coq_word_label_2", "coq_word_label_3",
             "coquery_invisible_corpus_id",
             "coquery_dummy",
             "coquery_invisible_query_id"])
Пример #4
0
    def test_quantified_required_columns(self):
        ext_feature = "{}.word_data".format(self.link.get_hash())
        s = "happy to{0,1} [n*]"

        query = TokenQuery(s, self.Session)
        self.assertTrue(len(query.query_list) == 2)

        l = self.resource.get_corpus_joins(query.query_list[0])
        # 1     2    3
        # happy {to} [n*]

        l = self.resource.get_required_columns(query.query_list[0],
            ["word_label", ext_feature])
        self.assertListEqual(l,
            ["COQ_WORD_1.Word AS coq_word_label_1",
             "NULL AS coq_word_label_2",
             "COQ_WORD_3.Word AS coq_word_label_3",
             "EXTCORP_LEXICON_1.ExtData AS db_extcorp_coq_word_data_1",
             "NULL AS db_extcorp_coq_word_data_2",
             "EXTCORP_LEXICON_3.ExtData AS db_extcorp_coq_word_data_3",
             "COQ_CORPUS_1.ID AS coquery_invisible_corpus_id",
             "COQ_CORPUS_1.FileId AS coquery_invisible_origin_id"])

        l = self.resource.get_required_columns(query.query_list[1],
            ["word_label", ext_feature])
        self.assertListEqual(l,
            ["COQ_WORD_1.Word AS coq_word_label_1",
             "COQ_WORD_2.Word AS coq_word_label_2",
             "COQ_WORD_3.Word AS coq_word_label_3",
             "EXTCORP_LEXICON_1.ExtData AS db_extcorp_coq_word_data_1",
             "EXTCORP_LEXICON_2.ExtData AS db_extcorp_coq_word_data_2",
             "EXTCORP_LEXICON_3.ExtData AS db_extcorp_coq_word_data_3",
             "COQ_CORPUS_1.ID AS coquery_invisible_corpus_id",
             "COQ_CORPUS_1.FileId AS coquery_invisible_origin_id"])
Пример #5
0
 def test_get_required_columns_4(self):
     query = TokenQuery("*", self.Session)
     l = self.resource.get_required_columns(query.query_list[0],
                                            ["lemma_label"])
     self.assertListEqual(l, ["COQ_LEMMA_1.Lemma AS coq_lemma_label_1",
                              "COQ_CORPUS_1.ID AS coquery_invisible_corpus_id",
                              "COQ_CORPUS_1.FileId AS coquery_invisible_origin_id"])
Пример #6
0
 def test_get_required_columns_1(self):
     query = TokenQuery("*", self.Session)
     s = self.resource.get_required_columns(query.query_list[0],
                                            ["word_label"])
     self.assertListEqual(s, ["COQ_WORD_1.Word AS coq_word_label_1",
                              "COQ_CORPUS_1.ID AS coquery_invisible_corpus_id",
                              "COQ_CORPUS_1.FileId AS coquery_invisible_origin_id"])
Пример #7
0
 def test_where_conditions_2(self):
     query = TokenQuery("*'ll", self.Session)
     join_list = self.resource.get_corpus_joins(query.query_list[0])
     l = self.resource.get_condition_list(query.query_list[0],
                                          join_list,
                                          ["word_label"])
     self.assertListEqual(l,
         ["(COQ_WORD_1.Word LIKE '%''ll')"])
Пример #8
0
    def test_insert_static_data_3(self):
        query_string = "item1 item2 item3"
        query = TokenQuery(query_string, self.session)
        query._query_id = 999

        options.cfg.selected_features = ["word_label", "coquery_query_string"]

        df = query.insert_static_data(self.df)
        self.assertListEqual(
            sorted(df.columns.tolist()),
            sorted(["coq_word_label_1", "coq_word_label_2", "coq_word_label_3",
                    "coquery_invisible_corpus_id",
                    "coquery_dummy", "coquery_invisible_query_id",
                    "coquery_query_string"]))

        self.assertListEqual(
            df.coquery_query_string.tolist(),
            [query_string] * len(df))
Пример #9
0
 def test_linked_required_columns(self):
     query = TokenQuery("*", self.Session)
     ext_feature = "{}.word_data".format(self.link.get_hash())
     l = self.resource.get_required_columns(query.query_list[0],
                                            [ext_feature])
     self.assertListEqual(l,
         ["EXTCORP_LEXICON_1.ExtData AS db_extcorp_coq_word_data_1",
          "COQ_CORPUS_1.ID AS coquery_invisible_corpus_id",
          "COQ_CORPUS_1.FileId AS coquery_invisible_origin_id"])
Пример #10
0
 def test_get_required_columns_NULL_1(self):
     # tests issue #256
     query = TokenQuery("_NULL *", self.Session)
     l = self.resource.get_required_columns(query.query_list[0],
                                            ["word_label"])
     self.assertListEqual(l,
          ["NULL AS coq_word_label_1",
           "COQ_WORD_2.Word AS coq_word_label_2",
           "COQ_CORPUS_2.ID AS coquery_invisible_corpus_id",
           "COQ_CORPUS_2.FileId AS coquery_invisible_origin_id"])
Пример #11
0
 def test_corpus_joins_optimized_order_1(self):
     """
     Three query items, join order optimized by query item complexity.
     """
     query = TokenQuery("* *ier [n*]", self.Session)
     l = self.resource.get_corpus_joins(query.query_list[0])
     self.maxDiff = None
     self.assertListEqual(l, ["FROM       Corpus AS COQ_CORPUS_2",
                              "INNER JOIN Corpus AS COQ_CORPUS_3 ON COQ_CORPUS_3.ID = COQ_CORPUS_2.ID + 1",
                              "INNER JOIN Corpus AS COQ_CORPUS_1 ON COQ_CORPUS_1.ID = COQ_CORPUS_2.ID - 1"])
Пример #12
0
 def test_get_required_columns_3(self):
     query = TokenQuery("* *", self.Session)
     l = self.resource.get_required_columns(query.query_list[0],
                                            ["source_label", "word_label", "word_pos"])
     self.assertListEqual(l, ["COQ_WORD_1.Word AS coq_word_label_1",
                              "COQ_WORD_2.Word AS coq_word_label_2",
                              "COQ_WORD_1.POS AS coq_word_pos_1",
                              "COQ_WORD_2.POS AS coq_word_pos_2",
                              "COQ_SOURCE_1.Title AS coq_source_label_1",
                              "COQ_CORPUS_1.ID AS coquery_invisible_corpus_id",
                              "COQ_CORPUS_1.FileId AS coquery_invisible_origin_id"])
Пример #13
0
 def test_get_token_numbering_2(self):
     query = TokenQuery("item1{0,3} item2{1,3} item3{2,3}", self.session)
     self.assertEqual(query.get_token_numbering(0), "1.1")
     self.assertEqual(query.get_token_numbering(1), "1.2")
     self.assertEqual(query.get_token_numbering(2), "1.3")
     self.assertEqual(query.get_token_numbering(3), "2.1")
     self.assertEqual(query.get_token_numbering(4), "2.2")
     self.assertEqual(query.get_token_numbering(5), "2.3")
     self.assertEqual(query.get_token_numbering(6), "3.1")
     self.assertEqual(query.get_token_numbering(7), "3.2")
     self.assertEqual(query.get_token_numbering(8), "3.3")
Пример #14
0
 def test_where_conditions_quantified(self):
     s = "more * than [dt]{0,1} [jj]{0,3} [nn*]{1,2}"
     # 1    2 3     4      5    6    7      8     9
     # more * than {NONE} {NONE NONE NONE} {[nn*] NONE}
     query = TokenQuery(s, self.Session)
     join_list = self.resource.get_corpus_joins(query.query_list[0])
     l = self.resource.get_condition_list(query.query_list[0],
                                          join_list,
                                          ["word_label"])
     self.assertListEqual(l,
         ["(COQ_WORD_1.Word = 'more')",
          "(COQ_WORD_3.Word = 'than')",
          "(COQ_WORD_8.POS LIKE 'nn%')"])
Пример #15
0
 def test_query_string_apostrophe(self):
     query = TokenQuery("*'ll", self.Session)
     query_string = self.resource.get_query_string(
         query.query_list[0], ["word_label"])
     target_string = """
         SELECT COQ_WORD_1.Word AS coq_word_label_1,
                COQ_CORPUS_1.ID AS coquery_invisible_corpus_id,
                COQ_CORPUS_1.FileId AS coquery_invisible_origin_id
         FROM Corpus AS COQ_CORPUS_1
         INNER JOIN Lexicon AS COQ_WORD_1
                 ON COQ_WORD_1.WordId = COQ_CORPUS_1.WordId
         WHERE (COQ_WORD_1.Word LIKE '%''ll')"""
     self.assertEqual(self.simple(query_string),
                      self.simple(target_string))
Пример #16
0
    def test_quantified_query_string_1(self):
        query = TokenQuery("* b*{1,2} *", self.Session)
        self.assertTrue(len(query.query_list) == 2)

        l = self.resource.get_corpus_joins(query.query_list[0])
        self.assertListEqual(l,
            ["FROM       Corpus AS COQ_CORPUS_2",
             "INNER JOIN Corpus AS COQ_CORPUS_1 ON COQ_CORPUS_1.ID = COQ_CORPUS_2.ID - 1",
             "INNER JOIN Corpus AS COQ_CORPUS_4 ON COQ_CORPUS_4.ID = COQ_CORPUS_2.ID + 1"])

        l = self.resource.get_corpus_joins(query.query_list[1])
        self.assertListEqual(l,
            ["FROM       Corpus AS COQ_CORPUS_2",
             "INNER JOIN Corpus AS COQ_CORPUS_3 ON COQ_CORPUS_3.ID = COQ_CORPUS_2.ID + 1",
             "INNER JOIN Corpus AS COQ_CORPUS_1 ON COQ_CORPUS_1.ID = COQ_CORPUS_2.ID - 1",
             "INNER JOIN Corpus AS COQ_CORPUS_4 ON COQ_CORPUS_4.ID = COQ_CORPUS_2.ID + 2"])
Пример #17
0
 def test_get_token_numbering_2(self):
     query = TokenQuery("item1{0,3} item2{1,3} item3{2,3}", self.session)
     self.assertEqual(query.get_token_numbering(0), "1.1")
     self.assertEqual(query.get_token_numbering(1), "1.2")
     self.assertEqual(query.get_token_numbering(2), "1.3")
     self.assertEqual(query.get_token_numbering(3), "2.1")
     self.assertEqual(query.get_token_numbering(4), "2.2")
     self.assertEqual(query.get_token_numbering(5), "2.3")
     self.assertEqual(query.get_token_numbering(6), "3.1")
     self.assertEqual(query.get_token_numbering(7), "3.2")
     self.assertEqual(query.get_token_numbering(8), "3.3")
Пример #18
0
    def test_query_string_ortho_or_with_pos(self):
        query = TokenQuery("a*|b*.[n*]", self.Session)
        query_string = self.resource.get_query_string(query.query_list[0],
                                                      ["word_label"])
        target_string = """
            SELECT COQ_WORD_1.Word AS coq_word_label_1,
                   COQ_CORPUS_1.ID AS coquery_invisible_corpus_id,
                   COQ_CORPUS_1.FileId AS coquery_invisible_origin_id
            FROM Corpus AS COQ_CORPUS_1
            INNER JOIN Lexicon AS COQ_WORD_1
                    ON COQ_WORD_1.WordId = COQ_CORPUS_1.WordId
            WHERE (COQ_WORD_1.Word LIKE 'a%' OR
                   COQ_WORD_1.Word LIKE 'b%') AND
                  (COQ_WORD_1.POS LIKE 'n%')"""

        self.assertEqual(self.simple(query_string),
                         self.simple(target_string))
Пример #19
0
    def test_query_string_NULL_1(self):
        # tests issue #256
        query = TokenQuery("_NULL *", self.Session)
        query_string = self.resource.get_query_string(
            query.query_list[0], ["word_label", "source_label"])
        target_string = """
            SELECT NULL AS coq_word_label_1,
                   COQ_WORD_2.Word AS coq_word_label_2,
                   COQ_SOURCE_2.Title AS coq_source_label_1,
                   COQ_CORPUS_2.ID AS coquery_invisible_corpus_id,
                   COQ_CORPUS_2.FileId AS coquery_invisible_origin_id

            FROM Corpus AS COQ_CORPUS_2

            INNER JOIN Files AS COQ_SOURCE_2
                    ON COQ_SOURCE_2.FileId = COQ_CORPUS_2.FileId

            INNER JOIN Lexicon AS COQ_WORD_2
                    ON COQ_WORD_2.WordId = COQ_CORPUS_2.WordId"""

        self.assertEqual(self.simple(query_string),
                         self.simple(target_string))
Пример #20
0
    def test_get_required_columns_quantified(self):
        s = "more * than [dt]{0,1} [jj]{0,3} [nn*]{1,2}"
        query = TokenQuery(s, self.Session)

        self.assertTrue(len(query.query_list) == 16)
        l = self.resource.get_corpus_joins(query.query_list[0])
        # 1    2 3     4      5    6    7      8     9
        # more * than {NONE} {NONE NONE NONE} {[nn*] NONE}

        l = self.resource.get_required_columns(query.query_list[0],
            ["word_label"])
        self.assertListEqual(l,
            ["COQ_WORD_1.Word AS coq_word_label_1",
             "COQ_WORD_2.Word AS coq_word_label_2",
             "COQ_WORD_3.Word AS coq_word_label_3",
             "NULL AS coq_word_label_4",
             "NULL AS coq_word_label_5",
             "NULL AS coq_word_label_6",
             "NULL AS coq_word_label_7",
             "COQ_WORD_8.Word AS coq_word_label_8",
             "NULL AS coq_word_label_9",
             "COQ_CORPUS_1.ID AS coquery_invisible_corpus_id",
             "COQ_CORPUS_1.FileId AS coquery_invisible_origin_id"])
Пример #21
0
    def test_query_string_two_items(self):
        query = TokenQuery("a* b*", self.Session)
        query_string = self.resource.get_query_string(query.query_list[0],
                                                      ["word_label"])
        target_string = """
            SELECT COQ_WORD_1.Word AS coq_word_label_1,
                   COQ_WORD_2.Word AS coq_word_label_2,
                   COQ_CORPUS_1.ID AS coquery_invisible_corpus_id,
                   COQ_CORPUS_1.FileId AS coquery_invisible_origin_id

            FROM Corpus AS COQ_CORPUS_1
            INNER JOIN Corpus AS COQ_CORPUS_2
                    ON COQ_CORPUS_2.ID = COQ_CORPUS_1.ID + 1

            INNER JOIN Lexicon AS COQ_WORD_1
                    ON COQ_WORD_1.WordId = COQ_CORPUS_1.WordId
            INNER JOIN Lexicon AS COQ_WORD_2
                    ON COQ_WORD_2.WordId = COQ_CORPUS_2.WordId

            WHERE (COQ_WORD_1.Word LIKE 'a%') AND
                  (COQ_WORD_2.Word LIKE 'b%')"""

        self.assertEqual(self.simple(query_string),
                         self.simple(target_string))
Пример #22
0
 def test_max_tokens_2(self):
     query = TokenQuery("item1{0,3} item2{1,3} item3{2,3}", self.session)
     self.assertEqual(query.get_max_tokens(), 9)
Пример #23
0
 def test_corpus_joins_one_item(self):
     query = TokenQuery("*", self.Session)
     l = self.resource.get_corpus_joins(query.query_list[0])
     self.assertListEqual(l, ["FROM       Corpus AS COQ_CORPUS_1"])
Пример #24
0
 def test_max_tokens_2(self):
     query = TokenQuery("item1{0,3} item2{1,3} item3{2,3}", self.session)
     self.assertEqual(query.get_max_tokens(), 9)
Пример #25
0
 def test_get_token_numbering_1(self):
     query = TokenQuery("item1 item2 item3", self.session)
     self.assertEqual(query.get_token_numbering(0), "1")
     self.assertEqual(query.get_token_numbering(1), "2")
     self.assertEqual(query.get_token_numbering(2), "3")
Пример #26
0
 def test_lemmatized_corpus_joins_1(self):
     S = "#abc.[n*]"
     query = TokenQuery(S, self.Session)
     l = self.resource.get_corpus_joins(query.query_list[0])
     self.assertListEqual(l, ["FROM       Corpus AS COQ_CORPUS_1"])
Пример #27
0
 def test_max_tokens_1(self):
     query = TokenQuery("item1 item2 item3", self.session)
     self.assertEqual(query.get_max_tokens(), 3)
Пример #28
0
 def test_max_tokens_1(self):
     query = TokenQuery("item1 item2 item3", self.session)
     self.assertEqual(query.get_max_tokens(), 3)
Пример #29
0
 def test_get_token_numbering_1(self):
     query = TokenQuery("item1 item2 item3", self.session)
     self.assertEqual(query.get_token_numbering(0), "1")
     self.assertEqual(query.get_token_numbering(1), "2")
     self.assertEqual(query.get_token_numbering(2), "3")
Пример #30
0
 def test_corpus_joins_three_items(self):
     query = TokenQuery("* * *", self.Session)
     l = self.resource.get_corpus_joins(query.query_list[0])
     self.assertListEqual(l, ["FROM       Corpus AS COQ_CORPUS_1",
                              "INNER JOIN Corpus AS COQ_CORPUS_2 ON COQ_CORPUS_2.ID = COQ_CORPUS_1.ID + 1",
                              "INNER JOIN Corpus AS COQ_CORPUS_3 ON COQ_CORPUS_3.ID = COQ_CORPUS_1.ID + 2"])