Exemplo n.º 1
0
    def setUp(self):
        test_name = self.shortDescription()
        super(TfIdfTest, self).setUp()
        self.mock_obj = TfIdf()
        self.mock_obj._text_set = self.test_data.r_D_text_set
        self.token_list = res.token_list
        self.mock_obj.build_model()

        if test_name == "Test routine build_model() in TfIdf":
            print "setting up for testing  build_model()"

        elif test_name == "Test routine build_df_model() in TfIdf":
            print "setting up for testing  build_df_model()"
            self.document_frequency = res.document_frequency

        elif test_name == "Test routine build_tf_model() in TfIdf":
            print "setting up for testing  build_tf_model()"
            self.term_frequency = res.term_frequency

        elif test_name == "Test routine build_tf_idf_model()/compute() in TfIdf":
            print "setting up for testing  compute()/build_tf_idf_model()"
            self.test_data.attach_feature("tf_idf")
            self.tf_idf_weight = res.tf_idf_weight
Exemplo n.º 2
0
    def init_attribute(self, attribute_name):
        """Create attribute instance

        Parameter
        ---------
        attribute_name : string
            Contains the feature name, to create an attribute
            instance.
        """
        if attribute_name == "bag_of_words":
            attribute = BagOfWords(self.bow_model)
            return attribute
        elif attribute_name == "tf_idf":
            attribute = TfIdf(self.bow_model)
            return attribute
        elif attribute_name == "readability":
            attribute = Readability()
            return attribute
        elif attribute_name == "variety":
            attribute = Variety()
            return attribute
        elif attribute_name == "perfect_tense":
            attribute = PerfectTense()
            return attribute
        elif attribute_name == "nested_sentence":
            attribute = NestedSentence()
            return attribute
        elif attribute_name == "passive":
            attribute = Passive()
            return attribute
        elif attribute_name == "adjective":
            attribute = Adjective()
            return attribute
        elif attribute_name == "sentence_start":
            attribute = SentenceStart()
            return attribute
        elif attribute_name == "bag_of_pos":
            attribute = BagOfPos(self.bow_model)
            return attribute
        elif attribute_name == "modal_verb":
            attribute = ModalVerb()
            return attribute
        else:
            raise FeatureNotExistException(attribute_name)
Exemplo n.º 3
0
    def setUp(self):
        test_name = self.shortDescription()
        super(TfIdfTest, self).setUp()
        self.mock_obj = TfIdf()
        self.mock_obj._text_set = self.test_data.r_D_text_set
        self.token_list = res.token_list
        self.mock_obj.build_model()

        if test_name == "Test routine build_model() in TfIdf":
            print "setting up for testing  build_model()"

        elif test_name == "Test routine build_df_model() in TfIdf":
            print "setting up for testing  build_df_model()"
            self.document_frequency = res.document_frequency

        elif test_name == "Test routine build_tf_model() in TfIdf":
            print "setting up for testing  build_tf_model()"
            self.term_frequency = res.term_frequency

        elif test_name == "Test routine build_tf_idf_model()/compute() in TfIdf":
            print "setting up for testing  compute()/build_tf_idf_model()"
            self.test_data.attach_feature("tf_idf")
            self.tf_idf_weight = res.tf_idf_weight
Exemplo n.º 4
0
class TfIdfTest(TestBodyAttribute):
    @classmethod
    def setUpClass(cls):
        print "#################### Begin Testing TfIdf Class ####################" + "\n"

    @classmethod
    def tearDownClass(cls):
        print "\n" + "###################### End Testing TfIdf Class ######################"

    def setUp(self):
        test_name = self.shortDescription()
        super(TfIdfTest, self).setUp()
        self.mock_obj = TfIdf()
        self.mock_obj._text_set = self.test_data.r_D_text_set
        self.token_list = res.token_list
        self.mock_obj.build_model()

        if test_name == "Test routine build_model() in TfIdf":
            print "setting up for testing  build_model()"

        elif test_name == "Test routine build_df_model() in TfIdf":
            print "setting up for testing  build_df_model()"
            self.document_frequency = res.document_frequency

        elif test_name == "Test routine build_tf_model() in TfIdf":
            print "setting up for testing  build_tf_model()"
            self.term_frequency = res.term_frequency

        elif test_name == "Test routine build_tf_idf_model()/compute() in TfIdf":
            print "setting up for testing  compute()/build_tf_idf_model()"
            self.test_data.attach_feature("tf_idf")
            self.tf_idf_weight = res.tf_idf_weight

    def tearDown(self):
        test_name = self.shortDescription()
        self.korpus_file = None
        self.anno_file = None
        self.test_korpus = None
        self.test_data = None
        self.token_list = None
        self.mock_obj = None

        if test_name == "Test routine build_model() in TfIdf":
            print "cleaning up for testing  build_model()"
            print "--------------------------------------------------------------"

        elif test_name == "Test routine build_df_model() in TfIdf":
            print "cleaning up for testing  build_df_model()"
            print "--------------------------------------------------------------"
            self.document_frequency = None

        elif test_name == "Test routine build_tf_model() in TfIdf":
            print "cleaning up for testing  build_tf_model()"
            print "--------------------------------------------------------------"
            self.term_frequency = None

        elif test_name == "Test routine build_tf_idf_model()/compute() in TfIdf":
            print "cleaning up for testing  compute()/build_tf_idf_model()"
            print "--------------------------------------------------------------"
            self.tf_idf_weight = None

    def test__tf_idf__build_model(self):
        """ Test routine build_model() in TfIdf """

        self.assertListEqual(sorted(self.token_list), sorted(self.mock_obj.model.keys()))

    def test__tf_idf__build_df_model(self):
        """ Test routine build_df_model() in TfIdf """

        df_model = collections.OrderedDict(sorted(self.mock_obj.build_df_model().items()))
        self.assertListEqual(df_model.values(), self.document_frequency)

    def test__tf_idf__build_tf_model(self):
        """ Test routine build_tf_model() in TfIdf """

        for text in self.mock_obj.text_set:
            tf_model = collections.OrderedDict(sorted(self.mock_obj.build_tf_model(text.tokenlist).items()))
            self.assertListEqual(tf_model.values(), self.term_frequency[text.id])

    def test__tf_idf__compute(self):
        """ Test routine build_tf_idf_model()/compute() in TfIdf """

        for textpair in self.test_data.real_data.values():
            self.assertListEqual(textpair.text1.features["tf_idf"], self.tf_idf_weight[textpair.text1.id])
            self.assertListEqual(textpair.text2.features["tf_idf"], self.tf_idf_weight[textpair.text2.id])
Exemplo n.º 5
0
class TfIdfTest(TestBodyAttribute):
    @classmethod
    def setUpClass(cls):
        print "#################### Begin Testing TfIdf Class ####################" + "\n"

    @classmethod
    def tearDownClass(cls):
        print "\n" + "###################### End Testing TfIdf Class ######################"

    def setUp(self):
        test_name = self.shortDescription()
        super(TfIdfTest, self).setUp()
        self.mock_obj = TfIdf()
        self.mock_obj._text_set = self.test_data.r_D_text_set
        self.token_list = res.token_list
        self.mock_obj.build_model()

        if test_name == "Test routine build_model() in TfIdf":
            print "setting up for testing  build_model()"

        elif test_name == "Test routine build_df_model() in TfIdf":
            print "setting up for testing  build_df_model()"
            self.document_frequency = res.document_frequency

        elif test_name == "Test routine build_tf_model() in TfIdf":
            print "setting up for testing  build_tf_model()"
            self.term_frequency = res.term_frequency

        elif test_name == "Test routine build_tf_idf_model()/compute() in TfIdf":
            print "setting up for testing  compute()/build_tf_idf_model()"
            self.test_data.attach_feature("tf_idf")
            self.tf_idf_weight = res.tf_idf_weight

    def tearDown(self):
        test_name = self.shortDescription()
        self.korpus_file = None
        self.anno_file = None
        self.test_korpus = None
        self.test_data = None
        self.token_list = None
        self.mock_obj = None

        if test_name == "Test routine build_model() in TfIdf":
            print "cleaning up for testing  build_model()"
            print "--------------------------------------------------------------"

        elif test_name == "Test routine build_df_model() in TfIdf":
            print "cleaning up for testing  build_df_model()"
            print "--------------------------------------------------------------"
            self.document_frequency = None

        elif test_name == "Test routine build_tf_model() in TfIdf":
            print "cleaning up for testing  build_tf_model()"
            print "--------------------------------------------------------------"
            self.term_frequency = None

        elif test_name == "Test routine build_tf_idf_model()/compute() in TfIdf":
            print "cleaning up for testing  compute()/build_tf_idf_model()"
            print "--------------------------------------------------------------"
            self.tf_idf_weight = None

    def test__tf_idf__build_model(self):
        """ Test routine build_model() in TfIdf """

        self.assertListEqual(sorted(self.token_list),
                             sorted(self.mock_obj.model.keys()))

    def test__tf_idf__build_df_model(self):
        """ Test routine build_df_model() in TfIdf """

        df_model = collections.OrderedDict(
            sorted(self.mock_obj.build_df_model().items()))
        self.assertListEqual(df_model.values(), self.document_frequency)

    def test__tf_idf__build_tf_model(self):
        """ Test routine build_tf_model() in TfIdf """

        for text in self.mock_obj.text_set:
            tf_model = collections.OrderedDict(
                sorted(self.mock_obj.build_tf_model(text.tokenlist).items()))
            self.assertListEqual(tf_model.values(),
                                 self.term_frequency[text.id])

    def test__tf_idf__compute(self):
        """ Test routine build_tf_idf_model()/compute() in TfIdf """

        for textpair in self.test_data.real_data.values():
            self.assertListEqual(textpair.text1.features["tf_idf"],
                                 self.tf_idf_weight[textpair.text1.id])
            self.assertListEqual(textpair.text2.features["tf_idf"],
                                 self.tf_idf_weight[textpair.text2.id])