示例#1
0
    def similarity_over_all(self):
        """Calculate similarity between bug (summary, description) over
         all.

        Returns:
            dict, {bug_id -> [score, summary_score, description_score, stacktrace_score]}
        """

        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection
        from ir_text import IRText
        from ir_tfidf import IRTFIDF

        logger = IRLog.get_instance()
        search_time_span = 2 * 3600 * 24 * 365

        bug_id_name = IRConfig.get_instance().get('bug_id_name')

        create_ts_name = IRConfig.get_instance().get('bug_create_ts_name')
        product_name = IRConfig.get_instance().get('bug_product_name')

        basic_collection = IRCollection('bug_db_name',
                                        'bug_basic_collection_name', 'r')

        reports2scan = basic_collection.find({
            product_name: self.get_product(),
            create_ts_name: {
                '$gt': self.get_create_ts() - search_time_span
            },
            bug_id_name: {
                '$nin': self.__exclude_report_ids
            }
        })
        result = {}
        IRLog.get_instance().println('Comparing with %d reports.' \
                % (reports2scan.count()) )

        print self.__summary_text
        print self.__description_text

        for report in reports2scan:
            bug_id = report[bug_id_name]
            if bug_id == self.get_dummy_bug_id():
                continue
            # because we don't want to load stacktrace in case of self.__stacktrace
            #    being none, we create and fill the info of report manually
            other_report = IRReport("", "")
            other_report.__summary_tfidf, other_report.__description_tfidf = \
                    IRTFIDF.get_tfidf_of_bug(bug_id)
            # if self.__stacktrace is empty, we don't need to do this
            if self.get_stacktrace() is not None and \
                    self.get_stacktrace().__len__() > 0:
                other_report.__stacktrace = IRText.get_stacktrace_of_bug(
                    bug_id)
            if other_report.__stacktrace is None:
                other_report.__stacktrace = []
            result[bug_id] = self.similarity_with(other_report)

        return result
示例#2
0
    def similarity_over_all(self):
        """Calculate similarity between bug (summary, description) over
         all.

        Returns:
            dict, {bug_id -> [score, summary_score, description_score, stacktrace_score]}
        """

        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_mongodb_helper import IRCollection
        from ir_text import IRText
        from ir_tfidf import IRTFIDF

        logger = IRLog.get_instance()
        search_time_span = 2 * 3600 * 24 * 365
        
        bug_id_name = IRConfig.get_instance().get('bug_id_name')
        
        create_ts_name = IRConfig.get_instance().get('bug_create_ts_name')
        product_name = IRConfig.get_instance().get('bug_product_name')

        basic_collection = IRCollection(
            'bug_db_name', 'bug_basic_collection_name', 'r')
        
        reports2scan = basic_collection.find({
            product_name : self.get_product(),
            create_ts_name : {'$gt' : self.get_create_ts() - search_time_span},
            bug_id_name : {'$nin' : self.__exclude_report_ids} })
        result = {}
        IRLog.get_instance().println('Comparing with %d reports.' \
                % (reports2scan.count()) )
        
        print self.__summary_text
        print self.__description_text

        for report in reports2scan:
            bug_id = report[bug_id_name]
            if bug_id == self.get_dummy_bug_id():
                continue
            # because we don't want to load stacktrace in case of self.__stacktrace 
            #    being none, we create and fill the info of report manually
            other_report = IRReport("", "")
            other_report.__summary_tfidf, other_report.__description_tfidf = \
                    IRTFIDF.get_tfidf_of_bug(bug_id)
            # if self.__stacktrace is empty, we don't need to do this
            if self.get_stacktrace() is not None and \
                    self.get_stacktrace().__len__() > 0:
                other_report.__stacktrace = IRText.get_stacktrace_of_bug(bug_id)
            if other_report.__stacktrace is None:
                other_report.__stacktrace = []
            result[bug_id] = self.similarity_with(other_report)

        return result
示例#3
0
    def test_show_dict_compare(self):
        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_tfidf import IRTFIDF

        IRLog.get_instance().start_log()
        IRConfig.get_instance().load('../data/test/bug_test.cfg')
        summary_name = IRConfig.get_instance().get('bug_summary_name')
        description_name = IRConfig.get_instance().get('bug_description_name')
        
        summary_a, description_a = IRTFIDF.get_tfidf_of_bug(100000)
        summary_b, description_b = IRTFIDF.get_tfidf_of_bug(100200)
        IRLog.get_instance().println('Summary 100000 vs 100200')
        IRTFIDF.show_dict_compare(summary_a, summary_b, summary_name)
        IRLog.get_instance().println('Description 100000 vs 100200')
        IRTFIDF.show_dict_compare(description_a, description_b)
        IRLog.get_instance().println('Summary 100000 vs 100000')
        IRTFIDF.show_dict_compare(summary_a, summary_a)
        IRLog.get_instance().println('Description 100000 vs 100000')
        IRTFIDF.show_dict_compare(description_a, description_a, description_name)
示例#4
0
    def test_get_tfidf_of_bug(self):
        #import sys
        #sys.path.append('../bin/')
        from ir_log import IRLog
        from ir_config import IRConfig
        from ir_tfidf import IRTFIDF

        IRLog.get_instance().start_log()
        IRConfig.get_instance().load('../data/test/bug_test.cfg')
        summary, description = IRTFIDF.get_tfidf_of_bug(100000)
        IRLog.get_instance().println('Summary tfidf: %s' % (str(summary)))
        IRLog.get_instance().println('Description tfidf: %s' % (str(description)))
        IRLog.get_instance().stop_log()
示例#5
0
 def get_summary_and_description_tfidf(self):
     if self.__bug_id is None:
         if self.__summary_tfidf is None or \
                         self.__description_tfidf is None:
             self.__update_summary_and_description_tfidf_from_termcount()
         return [self.__summary_tfidf, self.__description_tfidf]
     else:
         if self.__allow_cache and \
                         self.__summary_tfidf is not None and \
                         self.__description_tfidf is not None:
             return [self.__summary_tfidf, self.__description_tfidf]
         from ir_tfidf import IRTFIDF
         summary_tfidf, description_tfidf = \
             IRTFIDF.get_tfidf_of_bug(self.__bug_id)
         if self.__allow_cache:
             self.__summary_tfidf, self.__description_tfidf = \
                     summary_tfidf, description_tfidf
         return [summary_tfidf, description_tfidf]
示例#6
0
 def get_summary_and_description_tfidf(self):
     if self.__bug_id is None:
         if self.__summary_tfidf is None or \
                         self.__description_tfidf is None:
             self.__update_summary_and_description_tfidf_from_termcount()
         return [self.__summary_tfidf, self.__description_tfidf]
     else:
         if self.__allow_cache and \
                         self.__summary_tfidf is not None and \
                         self.__description_tfidf is not None:
             return [self.__summary_tfidf, self.__description_tfidf]
         from ir_tfidf import IRTFIDF
         summary_tfidf, description_tfidf = \
             IRTFIDF.get_tfidf_of_bug(self.__bug_id)
         if self.__allow_cache:
             self.__summary_tfidf, self.__description_tfidf = \
                     summary_tfidf, description_tfidf
         return [summary_tfidf, description_tfidf]